summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpaulberg <paulberg>2006-02-25 22:08:39 +0000
committerpaulberg <paulberg>2006-02-25 22:08:39 +0000
commitfc35039e97ae63e9542404f512fa126055ee4c67 (patch)
tree9daf55e88bd7f1ad4b789bde75e5d007f2ef8f0d
parent06ad12d8dad0b13befb85b44f7e10a5bb1408f44 (diff)
downloadlibjpeg-fc35039e97ae63e9542404f512fa126055ee4c67.tar.gz
Initial commit of Vollbeding changes
-rw-r--r--cdjpeg.h5
-rw-r--r--cjpeg.c38
-rw-r--r--jcapimin.c4
-rw-r--r--jccoefct.c8
-rw-r--r--jcdctmgr.c289
-rw-r--r--jcinit.c6
-rw-r--r--jcmainct.c14
-rw-r--r--jcmarker.c13
-rw-r--r--jcmaster.c217
-rw-r--r--jcparam.c68
-rw-r--r--jcprepct.c14
-rw-r--r--jcsample.c94
-rw-r--r--jctrans.c2
-rw-r--r--jdapistd.c2
-rw-r--r--jdcoefct.c14
-rw-r--r--jdct.h239
-rw-r--r--jddctmgr.c125
-rw-r--r--jdhuff.c126
-rw-r--r--jdinput.c10
-rw-r--r--jdmainct.c42
-rw-r--r--jdmaster.c148
-rw-r--r--jdsample.c26
-rw-r--r--jdtrans.c6
-rw-r--r--jerror.h16
-rw-r--r--jfdctflt.c47
-rw-r--r--jfdctfst.c47
-rw-r--r--jfdctint.c4196
-rw-r--r--jidctint.c4674
-rw-r--r--jmorecfg.h7
-rw-r--r--jpegint.h2
-rw-r--r--jpeglib.h38
-rw-r--r--jpegtran.c94
-rw-r--r--makefile.cfg16
-rw-r--r--rdswitch.c39
-rw-r--r--transupp.c1039
-rw-r--r--transupp.h126
36 files changed, 11101 insertions, 750 deletions
diff --git a/cdjpeg.h b/cdjpeg.h
index 2b387b6..ed024ac 100644
--- a/cdjpeg.h
+++ b/cdjpeg.h
@@ -104,6 +104,7 @@ typedef struct cdjpeg_progress_mgr * cd_progress_ptr;
#define jinit_write_targa jIWrTarga
#define read_quant_tables RdQTables
#define read_scan_script RdScnScript
+#define set_quality_ratings SetQRates
#define set_quant_slots SetQSlots
#define set_sample_factors SetSFacts
#define read_color_map RdCMap
@@ -131,8 +132,10 @@ EXTERN(djpeg_dest_ptr) jinit_write_targa JPP((j_decompress_ptr cinfo));
/* cjpeg support routines (in rdswitch.c) */
EXTERN(boolean) read_quant_tables JPP((j_compress_ptr cinfo, char * filename,
- int scale_factor, boolean force_baseline));
+ boolean force_baseline));
EXTERN(boolean) read_scan_script JPP((j_compress_ptr cinfo, char * filename));
+EXTERN(boolean) set_quality_ratings JPP((j_compress_ptr cinfo, char *arg,
+ boolean force_baseline));
EXTERN(boolean) set_quant_slots JPP((j_compress_ptr cinfo, char *arg));
EXTERN(boolean) set_sample_factors JPP((j_compress_ptr cinfo, char *arg));
diff --git a/cjpeg.c b/cjpeg.c
index f2a929f..b55ad6f 100644
--- a/cjpeg.c
+++ b/cjpeg.c
@@ -149,7 +149,7 @@ usage (void)
#endif
fprintf(stderr, "Switches (names may be abbreviated):\n");
- fprintf(stderr, " -quality N Compression quality (0..100; 5-95 is useful range)\n");
+ fprintf(stderr, " -quality N[,...] Compression quality (0..100; 5-95 is useful range)\n");
fprintf(stderr, " -grayscale Create monochrome JPEG file\n");
#ifdef ENTROPY_OPT_SUPPORTED
fprintf(stderr, " -optimize Optimize Huffman table (smaller file, but slow compression)\n");
@@ -157,6 +157,9 @@ usage (void)
#ifdef C_PROGRESSIVE_SUPPORTED
fprintf(stderr, " -progressive Create progressive JPEG file\n");
#endif
+#ifdef DCT_SCALING_SUPPORTED
+ fprintf(stderr, " -scale M/N Scale image by fraction M/N, eg, 1/2\n");
+#endif
#ifdef TARGA_SUPPORTED
fprintf(stderr, " -targa Input file is Targa format (usually not needed)\n");
#endif
@@ -209,21 +212,16 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
{
int argn;
char * arg;
- int quality; /* -quality parameter */
- int q_scale_factor; /* scaling percentage for -qtables */
boolean force_baseline;
boolean simple_progressive;
+ char * qualityarg = NULL; /* saves -quality parm if any */
char * qtablefile = NULL; /* saves -qtables filename if any */
char * qslotsarg = NULL; /* saves -qslots parm if any */
char * samplearg = NULL; /* saves -sample parm if any */
char * scansarg = NULL; /* saves -scans parm if any */
/* Set up default JPEG parameters. */
- /* Note that default -quality level need not, and does not,
- * match the default scaling for an explicit -qtables argument.
- */
- quality = 75; /* default -quality value */
- q_scale_factor = 100; /* default to no scaling for -qtables */
+
force_baseline = FALSE; /* by default, allow 16-bit quantizers */
simple_progressive = FALSE;
is_targa = FALSE;
@@ -328,13 +326,10 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
#endif
} else if (keymatch(arg, "quality", 1)) {
- /* Quality factor (quantization table scaling factor). */
+ /* Quality ratings (quantization table scaling factors). */
if (++argn >= argc) /* advance to next argument */
usage();
- if (sscanf(argv[argn], "%d", &quality) != 1)
- usage();
- /* Change scale factor in case -qtables is present. */
- q_scale_factor = jpeg_quality_scaling(quality);
+ qualityarg = argv[argn];
} else if (keymatch(arg, "qslots", 2)) {
/* Quantization table slot numbers. */
@@ -382,7 +377,15 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
* default sampling factors.
*/
- } else if (keymatch(arg, "scans", 2)) {
+ } else if (keymatch(arg, "scale", 4)) {
+ /* Scale the image by a fraction M/N. */
+ if (++argn >= argc) /* advance to next argument */
+ usage();
+ if (sscanf(argv[argn], "%d/%d",
+ &cinfo->scale_num, &cinfo->scale_denom) != 2)
+ usage();
+
+ } else if (keymatch(arg, "scans", 4)) {
/* Set scan script. */
#ifdef C_MULTISCAN_FILES_SUPPORTED
if (++argn >= argc) /* advance to next argument */
@@ -422,11 +425,12 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
/* Set quantization tables for selected quality. */
/* Some or all may be overridden if -qtables is present. */
- jpeg_set_quality(cinfo, quality, force_baseline);
+ if (qualityarg != NULL) /* process -quality if it was present */
+ if (! set_quality_ratings(cinfo, qualityarg, force_baseline))
+ usage();
if (qtablefile != NULL) /* process -qtables if it was present */
- if (! read_quant_tables(cinfo, qtablefile,
- q_scale_factor, force_baseline))
+ if (! read_quant_tables(cinfo, qtablefile, force_baseline))
usage();
if (qslotsarg != NULL) /* process -qslots if it was present */
diff --git a/jcapimin.c b/jcapimin.c
index 54fb8c5..563ab42 100644
--- a/jcapimin.c
+++ b/jcapimin.c
@@ -63,8 +63,10 @@ jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize)
cinfo->comp_info = NULL;
- for (i = 0; i < NUM_QUANT_TBLS; i++)
+ for (i = 0; i < NUM_QUANT_TBLS; i++) {
cinfo->quant_tbl_ptrs[i] = NULL;
+ cinfo->q_scale_factor[i] = 100;
+ }
for (i = 0; i < NUM_HUFF_TBLS; i++) {
cinfo->dc_huff_tbl_ptrs[i] = NULL;
diff --git a/jccoefct.c b/jccoefct.c
index 1963ddb..14ccaeb 100644
--- a/jccoefct.c
+++ b/jccoefct.c
@@ -170,7 +170,8 @@ compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
: compptr->last_col_width;
xpos = MCU_col_num * compptr->MCU_sample_width;
- ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */
+ ypos = yoffset * compptr->DCT_v_scaled_size;
+ /* ypos == (yoffset+yindex) * DCTSIZE */
for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
if (coef->iMCU_row_num < last_iMCU_row ||
yoffset+yindex < compptr->last_row_height) {
@@ -195,7 +196,7 @@ compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
}
}
blkn += compptr->MCU_width;
- ypos += DCTSIZE;
+ ypos += compptr->DCT_v_scaled_size;
}
}
/* Try to write the MCU. In event of a suspension failure, we will
@@ -281,7 +282,8 @@ compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
thisblockrow = buffer[block_row];
(*cinfo->fdct->forward_DCT) (cinfo, compptr,
input_buf[ci], thisblockrow,
- (JDIMENSION) (block_row * DCTSIZE),
+ (JDIMENSION) (block_row *
+ compptr->DCT_v_scaled_size),
(JDIMENSION) 0, blocks_across);
if (ndummy > 0) {
/* Create dummy blocks at the right edge of the image. */
diff --git a/jcdctmgr.c b/jcdctmgr.c
index 61fa79b..f8f5e76 100644
--- a/jcdctmgr.c
+++ b/jcdctmgr.c
@@ -23,7 +23,7 @@ typedef struct {
struct jpeg_forward_dct pub; /* public fields */
/* Pointer to the DCT routine actually in use */
- forward_DCT_method_ptr do_dct;
+ forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
/* The actual post-DCT divisors --- not identical to the quant table
* entries, because of scaling (especially for an unnormalized DCT).
@@ -33,7 +33,7 @@ typedef struct {
#ifdef DCT_FLOAT_SUPPORTED
/* Same as above for the floating-point case. */
- float_DCT_method_ptr do_float_dct;
+ float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
#endif
} my_fdct_controller;
@@ -61,6 +61,223 @@ start_pass_fdctmgr (j_compress_ptr cinfo)
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
+ /* Select the proper DCT routine for this component's scaling */
+ switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
+#ifdef DCT_SCALING_SUPPORTED
+ case ((1 << 8) + 1):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_1x1;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((2 << 8) + 2):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_2x2;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((3 << 8) + 3):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_3x3;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((4 << 8) + 4):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_4x4;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((5 << 8) + 5):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_5x5;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((6 << 8) + 6):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_6x6;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((7 << 8) + 7):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_7x7;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((9 << 8) + 9):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_9x9;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((10 << 8) + 10):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_10x10;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((11 << 8) + 11):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_11x11;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((12 << 8) + 12):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_12x12;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((13 << 8) + 13):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_13x13;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((14 << 8) + 14):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_14x14;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((15 << 8) + 15):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_15x15;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((16 << 8) + 16):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_16x16;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((16 << 8) + 8):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_16x8;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((14 << 8) + 7):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_14x7;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((12 << 8) + 6):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_12x6;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((10 << 8) + 5):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_10x5;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((8 << 8) + 4):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_8x4;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((6 << 8) + 3):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_6x3;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((4 << 8) + 2):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_4x2;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((2 << 8) + 1):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_2x1;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((8 << 8) + 16):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_8x16;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((7 << 8) + 14):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_7x14;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((6 << 8) + 12):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_6x12;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((5 << 8) + 10):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_5x10;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((4 << 8) + 8):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_4x8;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((3 << 8) + 6):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_3x6;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((2 << 8) + 4):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_2x4;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ case ((1 << 8) + 2):
+ if (cinfo->dct_method == JDCT_ISLOW)
+ fdct->do_dct[ci] = jpeg_fdct_1x2;
+ else
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+#endif
+ case ((DCTSIZE << 8) + DCTSIZE):
+ switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+ case JDCT_ISLOW:
+ fdct->do_dct[ci] = jpeg_fdct_islow;
+ break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+ case JDCT_IFAST:
+ fdct->do_dct[ci] = jpeg_fdct_ifast;
+ break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+ case JDCT_FLOAT:
+ fdct->do_float_dct[ci] = jpeg_fdct_float;
+ break;
+#endif
+ default:
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
+ break;
+ }
+ break;
+ default:
+ ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
+ compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
+ break;
+ }
qtblno = compptr->quant_tbl_no;
/* Make sure specified quantization table is present */
if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
@@ -185,43 +402,16 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
{
/* This routine is heavily used, so it's worth coding it tightly. */
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
- forward_DCT_method_ptr do_dct = fdct->do_dct;
+ forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
JDIMENSION bi;
sample_data += start_row; /* fold in the vertical offset once */
- for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
- /* Load data into workspace, applying unsigned->signed conversion */
- { register DCTELEM *workspaceptr;
- register JSAMPROW elemptr;
- register int elemr;
-
- workspaceptr = workspace;
- for (elemr = 0; elemr < DCTSIZE; elemr++) {
- elemptr = sample_data[elemr] + start_col;
-#if DCTSIZE == 8 /* unroll the inner loop */
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-#else
- { register int elemc;
- for (elemc = DCTSIZE; elemc > 0; elemc--) {
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
- }
- }
-#endif
- }
- }
-
+ for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
/* Perform the DCT */
- (*do_dct) (workspace);
+ (*do_dct) (workspace, sample_data, start_col);
/* Quantize/descale the coefficients, and store into coef_blocks[] */
{ register DCTELEM temp, qval;
@@ -275,7 +465,7 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
{
/* This routine is heavily used, so it's worth coding it tightly. */
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
- float_DCT_method_ptr do_dct = fdct->do_float_dct;
+ float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
JDIMENSION bi;
@@ -283,36 +473,8 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
sample_data += start_row; /* fold in the vertical offset once */
for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
- /* Load data into workspace, applying unsigned->signed conversion */
- { register FAST_FLOAT *workspaceptr;
- register JSAMPROW elemptr;
- register int elemr;
-
- workspaceptr = workspace;
- for (elemr = 0; elemr < DCTSIZE; elemr++) {
- elemptr = sample_data[elemr] + start_col;
-#if DCTSIZE == 8 /* unroll the inner loop */
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-#else
- { register int elemc;
- for (elemc = DCTSIZE; elemc > 0; elemc--) {
- *workspaceptr++ = (FAST_FLOAT)
- (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
- }
- }
-#endif
- }
- }
-
/* Perform the DCT */
- (*do_dct) (workspace);
+ (*do_dct) (workspace, sample_data, start_col);
/* Quantize/descale the coefficients, and store into coef_blocks[] */
{ register FAST_FLOAT temp;
@@ -357,19 +519,16 @@ jinit_forward_dct (j_compress_ptr cinfo)
#ifdef DCT_ISLOW_SUPPORTED
case JDCT_ISLOW:
fdct->pub.forward_DCT = forward_DCT;
- fdct->do_dct = jpeg_fdct_islow;
break;
#endif
#ifdef DCT_IFAST_SUPPORTED
case JDCT_IFAST:
fdct->pub.forward_DCT = forward_DCT;
- fdct->do_dct = jpeg_fdct_ifast;
break;
#endif
#ifdef DCT_FLOAT_SUPPORTED
case JDCT_FLOAT:
fdct->pub.forward_DCT = forward_DCT_float;
- fdct->do_float_dct = jpeg_fdct_float;
break;
#endif
default:
diff --git a/jcinit.c b/jcinit.c
index 5efffe3..a0f3e4f 100644
--- a/jcinit.c
+++ b/jcinit.c
@@ -41,9 +41,9 @@ jinit_compress_master (j_compress_ptr cinfo)
/* Forward DCT */
jinit_forward_dct(cinfo);
/* Entropy encoding: either Huffman or arithmetic coding. */
- if (cinfo->arith_code) {
- ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
- } else {
+ if (cinfo->arith_code)
+ jinit_arith_encoder(cinfo);
+ else {
if (cinfo->progressive_mode) {
#ifdef C_PROGRESSIVE_SUPPORTED
jinit_phuff_encoder(cinfo);
diff --git a/jcmainct.c b/jcmainct.c
index e0279a7..7de75d1 100644
--- a/jcmainct.c
+++ b/jcmainct.c
@@ -118,17 +118,17 @@ process_data_simple_main (j_compress_ptr cinfo,
while (main->cur_iMCU_row < cinfo->total_iMCU_rows) {
/* Read input data if we haven't filled the main buffer yet */
- if (main->rowgroup_ctr < DCTSIZE)
+ if (main->rowgroup_ctr < (JDIMENSION) cinfo->min_DCT_v_scaled_size)
(*cinfo->prep->pre_process_data) (cinfo,
input_buf, in_row_ctr, in_rows_avail,
main->buffer, &main->rowgroup_ctr,
- (JDIMENSION) DCTSIZE);
+ (JDIMENSION) cinfo->min_DCT_v_scaled_size);
/* If we don't have a full iMCU row buffered, return to application for
* more data. Note that preprocessor will always pad to fill the iMCU row
* at the bottom of the image.
*/
- if (main->rowgroup_ctr != DCTSIZE)
+ if (main->rowgroup_ctr != (JDIMENSION) cinfo->min_DCT_v_scaled_size)
return;
/* Send the completed row to the compressor */
@@ -269,10 +269,10 @@ jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
ci++, compptr++) {
main->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
- compptr->width_in_blocks * DCTSIZE,
+ compptr->width_in_blocks * compptr->DCT_h_scaled_size,
(JDIMENSION) jround_up((long) compptr->height_in_blocks,
(long) compptr->v_samp_factor) * DCTSIZE,
- (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
+ (JDIMENSION) (compptr->v_samp_factor * compptr->DCT_v_scaled_size));
}
#else
ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
@@ -286,8 +286,8 @@ jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
ci++, compptr++) {
main->buffer[ci] = (*cinfo->mem->alloc_sarray)
((j_common_ptr) cinfo, JPOOL_IMAGE,
- compptr->width_in_blocks * DCTSIZE,
- (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
+ compptr->width_in_blocks * compptr->DCT_h_scaled_size,
+ (JDIMENSION) (compptr->v_samp_factor * compptr->DCT_v_scaled_size));
}
}
}
diff --git a/jcmarker.c b/jcmarker.c
index 3d1e6c6..54d109a 100644
--- a/jcmarker.c
+++ b/jcmarker.c
@@ -285,13 +285,13 @@ emit_sof (j_compress_ptr cinfo, JPEG_MARKER code)
emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */
/* Make sure image isn't bigger than SOF field can handle */
- if ((long) cinfo->image_height > 65535L ||
- (long) cinfo->image_width > 65535L)
+ if ((long) cinfo->jpeg_height > 65535L ||
+ (long) cinfo->jpeg_width > 65535L)
ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) 65535);
emit_byte(cinfo, cinfo->data_precision);
- emit_2bytes(cinfo, (int) cinfo->image_height);
- emit_2bytes(cinfo, (int) cinfo->image_width);
+ emit_2bytes(cinfo, (int) cinfo->jpeg_height);
+ emit_2bytes(cinfo, (int) cinfo->jpeg_width);
emit_byte(cinfo, cinfo->num_components);
@@ -529,7 +529,10 @@ write_frame_header (j_compress_ptr cinfo)
/* Emit the proper SOF marker */
if (cinfo->arith_code) {
- emit_sof(cinfo, M_SOF9); /* SOF code for arithmetic coding */
+ if (cinfo->progressive_mode)
+ emit_sof(cinfo, M_SOF10); /* SOF code for progressive arithmetic */
+ else
+ emit_sof(cinfo, M_SOF9); /* SOF code for sequential arithmetic */
} else {
if (cinfo->progressive_mode)
emit_sof(cinfo, M_SOF2); /* SOF code for progressive Huffman */
diff --git a/jcmaster.c b/jcmaster.c
index aab4020..72b3769 100644
--- a/jcmaster.c
+++ b/jcmaster.c
@@ -42,23 +42,172 @@ typedef my_comp_master * my_master_ptr;
* Support routines that do various essential calculations.
*/
+/*
+ * Compute JPEG image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ */
+
+GLOBAL(void)
+jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+#ifdef DCT_SCALING_SUPPORTED
+
+ /* Compute actual JPEG image dimensions and DCT scaling choices. */
+ if (cinfo->scale_num >= cinfo->scale_denom * 8) {
+ /* Provide 8/1 scaling */
+ cinfo->jpeg_width = cinfo->image_width << 3;
+ cinfo->jpeg_height = cinfo->image_height << 3;
+ cinfo->min_DCT_h_scaled_size = 1;
+ cinfo->min_DCT_v_scaled_size = 1;
+ } else if (cinfo->scale_num >= cinfo->scale_denom * 4) {
+ /* Provide 4/1 scaling */
+ cinfo->jpeg_width = cinfo->image_width << 2;
+ cinfo->jpeg_height = cinfo->image_height << 2;
+ cinfo->min_DCT_h_scaled_size = 2;
+ cinfo->min_DCT_v_scaled_size = 2;
+ } else if (cinfo->scale_num * 3 >= cinfo->scale_denom * 8) {
+ /* Provide 8/3 scaling */
+ cinfo->jpeg_width = (cinfo->image_width << 1) +
+ jdiv_round_up((long) cinfo->image_width * 2, 3L);
+ cinfo->jpeg_height = (cinfo->image_height << 1) +
+ jdiv_round_up((long) cinfo->image_height * 2, 3L);
+ cinfo->min_DCT_h_scaled_size = 3;
+ cinfo->min_DCT_v_scaled_size = 3;
+ } else if (cinfo->scale_num >= cinfo->scale_denom * 2) {
+ /* Provide 2/1 scaling */
+ cinfo->jpeg_width = cinfo->image_width << 1;
+ cinfo->jpeg_height = cinfo->image_height << 1;
+ cinfo->min_DCT_h_scaled_size = 4;
+ cinfo->min_DCT_v_scaled_size = 4;
+ } else if (cinfo->scale_num * 5 >= cinfo->scale_denom * 8) {
+ /* Provide 8/5 scaling */
+ cinfo->jpeg_width = cinfo->image_width +
+ jdiv_round_up((long) cinfo->image_width * 3, 5L);
+ cinfo->jpeg_height = cinfo->image_height +
+ jdiv_round_up((long) cinfo->image_height * 3, 5L);
+ cinfo->min_DCT_h_scaled_size = 5;
+ cinfo->min_DCT_v_scaled_size = 5;
+ } else if (cinfo->scale_num * 3 >= cinfo->scale_denom * 4) {
+ /* Provide 4/3 scaling */
+ cinfo->jpeg_width = cinfo->image_width +
+ jdiv_round_up((long) cinfo->image_width, 3L);
+ cinfo->jpeg_height = cinfo->image_height +
+ jdiv_round_up((long) cinfo->image_height, 3L);
+ cinfo->min_DCT_h_scaled_size = 6;
+ cinfo->min_DCT_v_scaled_size = 6;
+ } else if (cinfo->scale_num * 7 >= cinfo->scale_denom * 8) {
+ /* Provide 8/7 scaling */
+ cinfo->jpeg_width = cinfo->image_width +
+ jdiv_round_up((long) cinfo->image_width, 7L);
+ cinfo->jpeg_height = cinfo->image_height +
+ jdiv_round_up((long) cinfo->image_height, 7L);
+ cinfo->min_DCT_h_scaled_size = 7;
+ cinfo->min_DCT_v_scaled_size = 7;
+ } else if (cinfo->scale_num >= cinfo->scale_denom) {
+ /* Provide 1/1 scaling */
+ cinfo->jpeg_width = cinfo->image_width;
+ cinfo->jpeg_height = cinfo->image_height;
+ cinfo->min_DCT_h_scaled_size = DCTSIZE;
+ cinfo->min_DCT_v_scaled_size = DCTSIZE;
+ } else if (cinfo->scale_num * 9 >= cinfo->scale_denom * 8) {
+ /* Provide 8/9 scaling */
+ cinfo->jpeg_width = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width * 8, 9L);
+ cinfo->jpeg_height = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height * 8, 9L);
+ cinfo->min_DCT_h_scaled_size = 9;
+ cinfo->min_DCT_v_scaled_size = 9;
+ } else if (cinfo->scale_num * 5 >= cinfo->scale_denom * 4) {
+ /* Provide 4/5 scaling */
+ cinfo->jpeg_width = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width * 4, 5L);
+ cinfo->jpeg_height = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height * 4, 5L);
+ cinfo->min_DCT_h_scaled_size = 10;
+ cinfo->min_DCT_v_scaled_size = 10;
+ } else if (cinfo->scale_num * 11 >= cinfo->scale_denom * 8) {
+ /* Provide 8/11 scaling */
+ cinfo->jpeg_width = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width * 8, 11L);
+ cinfo->jpeg_height = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height * 8, 11L);
+ cinfo->min_DCT_h_scaled_size = 11;
+ cinfo->min_DCT_v_scaled_size = 11;
+ } else if (cinfo->scale_num * 3 >= cinfo->scale_denom * 2) {
+ /* Provide 2/3 scaling */
+ cinfo->jpeg_width = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width * 2, 3L);
+ cinfo->jpeg_height = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height * 2, 3L);
+ cinfo->min_DCT_h_scaled_size = 12;
+ cinfo->min_DCT_v_scaled_size = 12;
+ } else if (cinfo->scale_num * 13 >= cinfo->scale_denom * 8) {
+ /* Provide 8/13 scaling */
+ cinfo->jpeg_width = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width * 8, 13L);
+ cinfo->jpeg_height = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height * 8, 13L);
+ cinfo->min_DCT_h_scaled_size = 13;
+ cinfo->min_DCT_v_scaled_size = 13;
+ } else if (cinfo->scale_num * 7 >= cinfo->scale_denom * 4) {
+ /* Provide 4/7 scaling */
+ cinfo->jpeg_width = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width * 4, 7L);
+ cinfo->jpeg_height = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height * 4, 7L);
+ cinfo->min_DCT_h_scaled_size = 14;
+ cinfo->min_DCT_v_scaled_size = 14;
+ } else if (cinfo->scale_num * 15 >= cinfo->scale_denom * 8) {
+ /* Provide 8/15 scaling */
+ cinfo->jpeg_width = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width * 8, 15L);
+ cinfo->jpeg_height = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height * 8, 15L);
+ cinfo->min_DCT_h_scaled_size = 15;
+ cinfo->min_DCT_v_scaled_size = 15;
+ } else {
+ /* Provide 1/2 scaling */
+ cinfo->jpeg_width = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width, 2L);
+ cinfo->jpeg_height = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height, 2L);
+ cinfo->min_DCT_h_scaled_size = 16;
+ cinfo->min_DCT_v_scaled_size = 16;
+ }
+
+#else /* !DCT_SCALING_SUPPORTED */
+
+ /* Hardwire it to "no scaling" */
+ cinfo->jpeg_width = cinfo->image_width;
+ cinfo->jpeg_height = cinfo->image_height;
+ cinfo->min_DCT_h_scaled_size = DCTSIZE;
+ cinfo->min_DCT_v_scaled_size = DCTSIZE;
+
+#endif /* DCT_SCALING_SUPPORTED */
+}
+
+
LOCAL(void)
initial_setup (j_compress_ptr cinfo)
/* Do computations that are needed before master selection phase */
{
- int ci;
+ int ci, ssize;
jpeg_component_info *compptr;
long samplesperrow;
JDIMENSION jd_samplesperrow;
+ jpeg_calc_jpeg_dimensions(cinfo);
+
/* Sanity check on image dimensions */
- if (cinfo->image_height <= 0 || cinfo->image_width <= 0
+ if (cinfo->jpeg_height <= 0 || cinfo->jpeg_width <= 0
|| cinfo->num_components <= 0 || cinfo->input_components <= 0)
ERREXIT(cinfo, JERR_EMPTY_IMAGE);
/* Make sure image isn't bigger than I can handle */
- if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
- (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
+ if ((long) cinfo->jpeg_height > (long) JPEG_MAX_DIMENSION ||
+ (long) cinfo->jpeg_width > (long) JPEG_MAX_DIMENSION)
ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
/* Width of an input scanline must be representable as JDIMENSION. */
@@ -95,22 +244,52 @@ initial_setup (j_compress_ptr cinfo)
ci++, compptr++) {
/* Fill in the correct component_index value; don't rely on application */
compptr->component_index = ci;
- /* For compression, we never do DCT scaling. */
- compptr->DCT_scaled_size = DCTSIZE;
+ /* In selecting the actual DCT scaling for each component, we try to
+ * scale down the chroma components via DCT scaling rather than downsampling.
+ * This saves time if the downsampler gets to use 1:1 scaling.
+ * Note this code adapts subsampling ratios which are powers of 2.
+ */
+ ssize = 1;
+#ifdef DCT_SCALING_SUPPORTED
+ while (cinfo->dct_method == JDCT_ISLOW &&
+ cinfo->min_DCT_h_scaled_size * ssize <= DCTSIZE &&
+ (cinfo->max_h_samp_factor % (compptr->h_samp_factor * ssize * 2)) == 0) {
+ ssize = ssize * 2;
+ }
+#endif
+ compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size * ssize;
+ ssize = 1;
+#ifdef DCT_SCALING_SUPPORTED
+ while (cinfo->dct_method == JDCT_ISLOW &&
+ cinfo->min_DCT_v_scaled_size * ssize <= DCTSIZE &&
+ (cinfo->max_v_samp_factor % (compptr->v_samp_factor * ssize * 2)) == 0) {
+ ssize = ssize * 2;
+ }
+#endif
+ compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size * ssize;
+
+ /* We don't support DCT ratios larger than 2. */
+ if (compptr->DCT_h_scaled_size > compptr->DCT_v_scaled_size * 2)
+ compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size * 2;
+ else if (compptr->DCT_v_scaled_size > compptr->DCT_h_scaled_size * 2)
+ compptr->DCT_v_scaled_size = compptr->DCT_h_scaled_size * 2;
+
/* Size in DCT blocks */
compptr->width_in_blocks = (JDIMENSION)
- jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+ jdiv_round_up((long) cinfo->jpeg_width * (long) compptr->h_samp_factor,
(long) (cinfo->max_h_samp_factor * DCTSIZE));
compptr->height_in_blocks = (JDIMENSION)
- jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+ jdiv_round_up((long) cinfo->jpeg_height * (long) compptr->v_samp_factor,
(long) (cinfo->max_v_samp_factor * DCTSIZE));
/* Size in samples */
compptr->downsampled_width = (JDIMENSION)
- jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
- (long) cinfo->max_h_samp_factor);
+ jdiv_round_up((long) cinfo->jpeg_width *
+ (long) (compptr->h_samp_factor * compptr->DCT_h_scaled_size),
+ (long) (cinfo->max_h_samp_factor * DCTSIZE));
compptr->downsampled_height = (JDIMENSION)
- jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
- (long) cinfo->max_v_samp_factor);
+ jdiv_round_up((long) cinfo->jpeg_height *
+ (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size),
+ (long) (cinfo->max_v_samp_factor * DCTSIZE));
/* Mark component needed (this flag isn't actually used for compression) */
compptr->component_needed = TRUE;
}
@@ -119,7 +298,7 @@ initial_setup (j_compress_ptr cinfo)
* main controller will call coefficient controller).
*/
cinfo->total_iMCU_rows = (JDIMENSION)
- jdiv_round_up((long) cinfo->image_height,
+ jdiv_round_up((long) cinfo->jpeg_height,
(long) (cinfo->max_v_samp_factor*DCTSIZE));
}
@@ -325,7 +504,7 @@ per_scan_setup (j_compress_ptr cinfo)
compptr->MCU_width = 1;
compptr->MCU_height = 1;
compptr->MCU_blocks = 1;
- compptr->MCU_sample_width = DCTSIZE;
+ compptr->MCU_sample_width = compptr->DCT_h_scaled_size;
compptr->last_col_width = 1;
/* For noninterleaved scans, it is convenient to define last_row_height
* as the number of block rows present in the last iMCU row.
@@ -347,10 +526,10 @@ per_scan_setup (j_compress_ptr cinfo)
/* Overall image size in MCUs */
cinfo->MCUs_per_row = (JDIMENSION)
- jdiv_round_up((long) cinfo->image_width,
+ jdiv_round_up((long) cinfo->jpeg_width,
(long) (cinfo->max_h_samp_factor*DCTSIZE));
cinfo->MCU_rows_in_scan = (JDIMENSION)
- jdiv_round_up((long) cinfo->image_height,
+ jdiv_round_up((long) cinfo->jpeg_height,
(long) (cinfo->max_v_samp_factor*DCTSIZE));
cinfo->blocks_in_MCU = 0;
@@ -361,7 +540,7 @@ per_scan_setup (j_compress_ptr cinfo)
compptr->MCU_width = compptr->h_samp_factor;
compptr->MCU_height = compptr->v_samp_factor;
compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
- compptr->MCU_sample_width = compptr->MCU_width * DCTSIZE;
+ compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_h_scaled_size;
/* Figure number of non-dummy blocks in last MCU column & row */
tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
if (tmp == 0) tmp = compptr->MCU_width;
@@ -433,7 +612,7 @@ prepare_for_pass (j_compress_ptr cinfo)
/* Do Huffman optimization for a scan after the first one. */
select_scan_parameters(cinfo);
per_scan_setup(cinfo);
- if (cinfo->Ss != 0 || cinfo->Ah == 0 || cinfo->arith_code) {
+ if (cinfo->Ss != 0 || cinfo->Ah == 0) {
(*cinfo->entropy->start_pass) (cinfo, TRUE);
(*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
master->pub.call_pass_startup = FALSE;
@@ -567,7 +746,7 @@ jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only)
cinfo->num_scans = 1;
}
- if (cinfo->progressive_mode) /* TEMPORARY HACK ??? */
+ if (cinfo->progressive_mode && cinfo->arith_code == 0) /* TEMPORARY HACK ??? */
cinfo->optimize_coding = TRUE; /* assume default tables no good for progressive mode */
/* Initialize my private state */
diff --git a/jcparam.c b/jcparam.c
index 6fc48f5..6bef48d 100644
--- a/jcparam.c
+++ b/jcparam.c
@@ -60,6 +60,47 @@ jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
}
+/* These are the sample quantization tables given in JPEG spec section K.1.
+ * The spec says that the values given produce "good" quality, and
+ * when divided by 2, "very good" quality.
+ */
+static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
+ 16, 11, 10, 16, 24, 40, 51, 61,
+ 12, 12, 14, 19, 26, 58, 60, 55,
+ 14, 13, 16, 24, 40, 57, 69, 56,
+ 14, 17, 22, 29, 51, 87, 80, 62,
+ 18, 22, 37, 56, 68, 109, 103, 77,
+ 24, 35, 55, 64, 81, 104, 113, 92,
+ 49, 64, 78, 87, 103, 121, 120, 101,
+ 72, 92, 95, 98, 112, 100, 103, 99
+};
+static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
+ 17, 18, 24, 47, 99, 99, 99, 99,
+ 18, 21, 26, 66, 99, 99, 99, 99,
+ 24, 26, 56, 99, 99, 99, 99, 99,
+ 47, 66, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99
+};
+
+
+GLOBAL(void)
+jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables
+ * and straight percentage-scaling quality scales.
+ * This entry point allows different scalings for luminance and chrominance.
+ */
+{
+ /* Set up two quantization tables using the specified scaling */
+ jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
+ cinfo->q_scale_factor[0], force_baseline);
+ jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
+ cinfo->q_scale_factor[1], force_baseline);
+}
+
+
GLOBAL(void)
jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
boolean force_baseline)
@@ -69,31 +110,6 @@ jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
* applications that insist on a linear percentage scaling.
*/
{
- /* These are the sample quantization tables given in JPEG spec section K.1.
- * The spec says that the values given produce "good" quality, and
- * when divided by 2, "very good" quality.
- */
- static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
- 16, 11, 10, 16, 24, 40, 51, 61,
- 12, 12, 14, 19, 26, 58, 60, 55,
- 14, 13, 16, 24, 40, 57, 69, 56,
- 14, 17, 22, 29, 51, 87, 80, 62,
- 18, 22, 37, 56, 68, 109, 103, 77,
- 24, 35, 55, 64, 81, 104, 113, 92,
- 49, 64, 78, 87, 103, 121, 120, 101,
- 72, 92, 95, 98, 112, 100, 103, 99
- };
- static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
- 17, 18, 24, 47, 99, 99, 99, 99,
- 18, 21, 26, 66, 99, 99, 99, 99,
- 24, 26, 56, 99, 99, 99, 99, 99,
- 47, 66, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99
- };
-
/* Set up two quantization tables using the specified scaling */
jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
scale_factor, force_baseline);
@@ -284,6 +300,8 @@ jpeg_set_defaults (j_compress_ptr cinfo)
/* Initialize everything not dependent on the color space */
+ cinfo->scale_num = 1; /* 1:1 scaling */
+ cinfo->scale_denom = 1;
cinfo->data_precision = BITS_IN_JSAMPLE;
/* Set up two quantization tables using default quality of 75 */
jpeg_set_quality(cinfo, 75, TRUE);
diff --git a/jcprepct.c b/jcprepct.c
index fa93333..be44cc4 100644
--- a/jcprepct.c
+++ b/jcprepct.c
@@ -173,10 +173,12 @@ pre_process_data (j_compress_ptr cinfo,
*out_row_group_ctr < out_row_groups_avail) {
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
+ numrows = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+ cinfo->min_DCT_v_scaled_size;
expand_bottom_edge(output_buf[ci],
- compptr->width_in_blocks * DCTSIZE,
- (int) (*out_row_group_ctr * compptr->v_samp_factor),
- (int) (out_row_groups_avail * compptr->v_samp_factor));
+ compptr->width_in_blocks * compptr->DCT_h_scaled_size,
+ (int) (*out_row_group_ctr * numrows),
+ (int) (out_row_groups_avail * numrows));
}
*out_row_group_ctr = out_row_groups_avail;
break; /* can exit outer loop without test */
@@ -288,7 +290,8 @@ create_context_buffer (j_compress_ptr cinfo)
*/
true_buffer = (*cinfo->mem->alloc_sarray)
((j_common_ptr) cinfo, JPOOL_IMAGE,
- (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE *
+ (JDIMENSION) (((long) compptr->width_in_blocks *
+ cinfo->min_DCT_h_scaled_size *
cinfo->max_h_samp_factor) / compptr->h_samp_factor),
(JDIMENSION) (3 * rgroup_height));
/* Copy true buffer row pointers into the middle of the fake row array */
@@ -346,7 +349,8 @@ jinit_c_prep_controller (j_compress_ptr cinfo, boolean need_full_buffer)
ci++, compptr++) {
prep->color_buf[ci] = (*cinfo->mem->alloc_sarray)
((j_common_ptr) cinfo, JPOOL_IMAGE,
- (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE *
+ (JDIMENSION) (((long) compptr->width_in_blocks *
+ cinfo->min_DCT_h_scaled_size *
cinfo->max_h_samp_factor) / compptr->h_samp_factor),
(JDIMENSION) cinfo->max_v_samp_factor);
}
diff --git a/jcsample.c b/jcsample.c
index 212ec87..4d36f85 100644
--- a/jcsample.c
+++ b/jcsample.c
@@ -62,6 +62,15 @@ typedef struct {
/* Downsampling method pointers, one per component */
downsample1_ptr methods[MAX_COMPONENTS];
+
+ /* Height of an output row group for each component. */
+ int rowgroup_height[MAX_COMPONENTS];
+
+ /* These arrays save pixel expansion factors so that int_downsample need not
+ * recompute them each time. They are unused for other downsampling methods.
+ */
+ UINT8 h_expand[MAX_COMPONENTS];
+ UINT8 v_expand[MAX_COMPONENTS];
} my_downsampler;
typedef my_downsampler * my_downsample_ptr;
@@ -123,7 +132,8 @@ sep_downsample (j_compress_ptr cinfo,
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
in_ptr = input_buf[ci] + in_row_index;
- out_ptr = output_buf[ci] + (out_row_group_index * compptr->v_samp_factor);
+ out_ptr = output_buf[ci] +
+ (out_row_group_index * downsample->rowgroup_height[ci]);
(*downsample->methods[ci]) (cinfo, compptr, in_ptr, out_ptr);
}
}
@@ -140,14 +150,15 @@ METHODDEF(void)
int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
JSAMPARRAY input_data, JSAMPARRAY output_data)
{
+ my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v;
JDIMENSION outcol, outcol_h; /* outcol_h == outcol*h_expand */
- JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+ JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
JSAMPROW inptr, outptr;
INT32 outvalue;
- h_expand = cinfo->max_h_samp_factor / compptr->h_samp_factor;
- v_expand = cinfo->max_v_samp_factor / compptr->v_samp_factor;
+ h_expand = downsample->h_expand[compptr->component_index];
+ v_expand = downsample->v_expand[compptr->component_index];
numpix = h_expand * v_expand;
numpix2 = numpix/2;
@@ -158,8 +169,8 @@ int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
expand_right_edge(input_data, cinfo->max_v_samp_factor,
cinfo->image_width, output_cols * h_expand);
- inrow = 0;
- for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+ inrow = outrow = 0;
+ while (inrow < cinfo->max_v_samp_factor) {
outptr = output_data[outrow];
for (outcol = 0, outcol_h = 0; outcol < output_cols;
outcol++, outcol_h += h_expand) {
@@ -173,6 +184,7 @@ int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
*outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix);
}
inrow += v_expand;
+ outrow++;
}
}
@@ -191,8 +203,8 @@ fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
jcopy_sample_rows(input_data, 0, output_data, 0,
cinfo->max_v_samp_factor, cinfo->image_width);
/* Edge-expand */
- expand_right_edge(output_data, cinfo->max_v_samp_factor,
- cinfo->image_width, compptr->width_in_blocks * DCTSIZE);
+ expand_right_edge(output_data, cinfo->max_v_samp_factor, cinfo->image_width,
+ compptr->width_in_blocks * compptr->DCT_h_scaled_size);
}
@@ -212,9 +224,9 @@ METHODDEF(void)
h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
JSAMPARRAY input_data, JSAMPARRAY output_data)
{
- int outrow;
+ int inrow;
JDIMENSION outcol;
- JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+ JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
register JSAMPROW inptr, outptr;
register int bias;
@@ -225,9 +237,9 @@ h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
expand_right_edge(input_data, cinfo->max_v_samp_factor,
cinfo->image_width, output_cols * 2);
- for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
- outptr = output_data[outrow];
- inptr = input_data[outrow];
+ for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
+ outptr = output_data[inrow];
+ inptr = input_data[inrow];
bias = 0; /* bias = 0,1,0,1,... for successive samples */
for (outcol = 0; outcol < output_cols; outcol++) {
*outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1])
@@ -251,7 +263,7 @@ h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
{
int inrow, outrow;
JDIMENSION outcol;
- JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+ JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
register JSAMPROW inptr0, inptr1, outptr;
register int bias;
@@ -262,8 +274,8 @@ h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
expand_right_edge(input_data, cinfo->max_v_samp_factor,
cinfo->image_width, output_cols * 2);
- inrow = 0;
- for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+ inrow = outrow = 0;
+ while (inrow < cinfo->max_v_samp_factor) {
outptr = output_data[outrow];
inptr0 = input_data[inrow];
inptr1 = input_data[inrow+1];
@@ -276,6 +288,7 @@ h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
inptr0 += 2; inptr1 += 2;
}
inrow += 2;
+ outrow++;
}
}
@@ -294,7 +307,7 @@ h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
{
int inrow, outrow;
JDIMENSION colctr;
- JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+ JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
INT32 membersum, neighsum, memberscale, neighscale;
@@ -321,8 +334,8 @@ h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
memberscale = 16384 - cinfo->smoothing_factor * 80; /* scaled (1-5*SF)/4 */
neighscale = cinfo->smoothing_factor * 16; /* scaled SF/4 */
- inrow = 0;
- for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+ inrow = outrow = 0;
+ while (inrow < cinfo->max_v_samp_factor) {
outptr = output_data[outrow];
inptr0 = input_data[inrow];
inptr1 = input_data[inrow+1];
@@ -378,6 +391,7 @@ h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
*outptr = (JSAMPLE) ((membersum + 32768) >> 16);
inrow += 2;
+ outrow++;
}
}
@@ -392,9 +406,9 @@ METHODDEF(void)
fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY output_data)
{
- int outrow;
+ int inrow;
JDIMENSION colctr;
- JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+ JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
register JSAMPROW inptr, above_ptr, below_ptr, outptr;
INT32 membersum, neighsum, memberscale, neighscale;
int colsum, lastcolsum, nextcolsum;
@@ -415,11 +429,11 @@ fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
memberscale = 65536L - cinfo->smoothing_factor * 512L; /* scaled 1-8*SF */
neighscale = cinfo->smoothing_factor * 64; /* scaled SF */
- for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
- outptr = output_data[outrow];
- inptr = input_data[outrow];
- above_ptr = input_data[outrow-1];
- below_ptr = input_data[outrow+1];
+ for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
+ outptr = output_data[inrow];
+ inptr = input_data[inrow];
+ above_ptr = input_data[inrow-1];
+ below_ptr = input_data[inrow+1];
/* Special case for first column */
colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
@@ -467,6 +481,7 @@ jinit_downsampler (j_compress_ptr cinfo)
int ci;
jpeg_component_info * compptr;
boolean smoothok = TRUE;
+ int h_in_group, v_in_group, h_out_group, v_out_group;
downsample = (my_downsample_ptr)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
@@ -482,8 +497,17 @@ jinit_downsampler (j_compress_ptr cinfo)
/* Verify we can handle the sampling factors, and set up method pointers */
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
- if (compptr->h_samp_factor == cinfo->max_h_samp_factor &&
- compptr->v_samp_factor == cinfo->max_v_samp_factor) {
+ /* Compute size of an "output group" for DCT scaling. This many samples
+ * are to be converted from max_h_samp_factor * max_v_samp_factor pixels.
+ */
+ h_out_group = (compptr->h_samp_factor * compptr->DCT_h_scaled_size) /
+ cinfo->min_DCT_h_scaled_size;
+ v_out_group = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+ cinfo->min_DCT_v_scaled_size;
+ h_in_group = cinfo->max_h_samp_factor;
+ v_in_group = cinfo->max_v_samp_factor;
+ downsample->rowgroup_height[ci] = v_out_group; /* save for use later */
+ if (h_in_group == h_out_group && v_in_group == v_out_group) {
#ifdef INPUT_SMOOTHING_SUPPORTED
if (cinfo->smoothing_factor) {
downsample->methods[ci] = fullsize_smooth_downsample;
@@ -491,12 +515,12 @@ jinit_downsampler (j_compress_ptr cinfo)
} else
#endif
downsample->methods[ci] = fullsize_downsample;
- } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
- compptr->v_samp_factor == cinfo->max_v_samp_factor) {
+ } else if (h_in_group == h_out_group * 2 &&
+ v_in_group == v_out_group) {
smoothok = FALSE;
downsample->methods[ci] = h2v1_downsample;
- } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
- compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) {
+ } else if (h_in_group == h_out_group * 2 &&
+ v_in_group == v_out_group * 2) {
#ifdef INPUT_SMOOTHING_SUPPORTED
if (cinfo->smoothing_factor) {
downsample->methods[ci] = h2v2_smooth_downsample;
@@ -504,10 +528,12 @@ jinit_downsampler (j_compress_ptr cinfo)
} else
#endif
downsample->methods[ci] = h2v2_downsample;
- } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 &&
- (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) {
+ } else if ((h_in_group % h_out_group) == 0 &&
+ (v_in_group % v_out_group) == 0) {
smoothok = FALSE;
downsample->methods[ci] = int_downsample;
+ downsample->h_expand[ci] = (UINT8) (h_in_group / h_out_group);
+ downsample->v_expand[ci] = (UINT8) (v_in_group / v_out_group);
} else
ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
}
diff --git a/jctrans.c b/jctrans.c
index 0e6d707..9753a43 100644
--- a/jctrans.c
+++ b/jctrans.c
@@ -167,7 +167,7 @@ transencode_master_selection (j_compress_ptr cinfo,
/* Entropy encoding: either Huffman or arithmetic coding. */
if (cinfo->arith_code) {
- ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+ jinit_arith_encoder(cinfo);
} else {
if (cinfo->progressive_mode) {
#ifdef C_PROGRESSIVE_SUPPORTED
diff --git a/jdapistd.c b/jdapistd.c
index c8e3fa0..9d74537 100644
--- a/jdapistd.c
+++ b/jdapistd.c
@@ -202,7 +202,7 @@ jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data,
}
/* Verify that at least one iMCU row can be returned. */
- lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size;
+ lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size;
if (max_lines < lines_per_iMCU_row)
ERREXIT(cinfo, JERR_BUFFER_SIZE);
diff --git a/jdcoefct.c b/jdcoefct.c
index 4938d20..462e92c 100644
--- a/jdcoefct.c
+++ b/jdcoefct.c
@@ -187,7 +187,7 @@ decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
: compptr->last_col_width;
output_ptr = output_buf[compptr->component_index] +
- yoffset * compptr->DCT_scaled_size;
+ yoffset * compptr->DCT_v_scaled_size;
start_col = MCU_col_num * compptr->MCU_sample_width;
for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
if (cinfo->input_iMCU_row < last_iMCU_row ||
@@ -197,11 +197,11 @@ decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
(*inverse_DCT) (cinfo, compptr,
(JCOEFPTR) coef->MCU_buffer[blkn+xindex],
output_ptr, output_col);
- output_col += compptr->DCT_scaled_size;
+ output_col += compptr->DCT_h_scaled_size;
}
}
blkn += compptr->MCU_width;
- output_ptr += compptr->DCT_scaled_size;
+ output_ptr += compptr->DCT_v_scaled_size;
}
}
}
@@ -362,9 +362,9 @@ decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
output_ptr, output_col);
buffer_ptr++;
- output_col += compptr->DCT_scaled_size;
+ output_col += compptr->DCT_h_scaled_size;
}
- output_ptr += compptr->DCT_scaled_size;
+ output_ptr += compptr->DCT_v_scaled_size;
}
}
@@ -654,9 +654,9 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
DC4 = DC5; DC5 = DC6;
DC7 = DC8; DC8 = DC9;
buffer_ptr++, prev_block_row++, next_block_row++;
- output_col += compptr->DCT_scaled_size;
+ output_col += compptr->DCT_h_scaled_size;
}
- output_ptr += compptr->DCT_scaled_size;
+ output_ptr += compptr->DCT_v_scaled_size;
}
}
diff --git a/jdct.h b/jdct.h
index 04192a2..360dec8 100644
--- a/jdct.h
+++ b/jdct.h
@@ -14,11 +14,16 @@
/*
- * A forward DCT routine is given a pointer to a work area of type DCTELEM[];
- * the DCT is to be performed in-place in that buffer. Type DCTELEM is int
- * for 8-bit samples, INT32 for 12-bit samples. (NOTE: Floating-point DCT
- * implementations use an array of type FAST_FLOAT, instead.)
- * The DCT inputs are expected to be signed (range +-CENTERJSAMPLE).
+ * A forward DCT routine is given a pointer to an input sample array and
+ * a pointer to a work area of type DCTELEM[]; the DCT is to be performed
+ * in-place in that buffer. Type DCTELEM is int for 8-bit samples, INT32
+ * for 12-bit samples. (NOTE: Floating-point DCT implementations use an
+ * array of type FAST_FLOAT, instead.)
+ * The input data is to be fetched from the sample array starting at a
+ * specified column. (Any row offset needed will be applied to the array
+ * pointer before it is passed to the FDCT code.)
+ * Note that the number of samples fetched by the FDCT routine is
+ * DCT_h_scaled_size * DCT_v_scaled_size.
* The DCT outputs are returned scaled up by a factor of 8; they therefore
* have a range of +-8K for 8-bit data, +-128K for 12-bit data. This
* convention improves accuracy in integer implementations and saves some
@@ -32,8 +37,12 @@ typedef int DCTELEM; /* 16 or 32 bits is fine */
typedef INT32 DCTELEM; /* must have 32 bits */
#endif
-typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
-typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
+typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data,
+ JSAMPARRAY sample_data,
+ JDIMENSION start_col));
+typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data,
+ JSAMPARRAY sample_data,
+ JDIMENSION start_col));
/*
@@ -44,7 +53,7 @@ typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
* sample array starting at a specified column. (Any row offset needed will
* be applied to the array pointer before it is passed to the IDCT code.)
* Note that the number of samples emitted by the IDCT routine is
- * DCT_scaled_size * DCT_scaled_size.
+ * DCT_h_scaled_size * DCT_v_scaled_size.
*/
/* typedef inverse_DCT_method_ptr is declared in jpegint.h */
@@ -84,19 +93,143 @@ typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
#define jpeg_fdct_islow jFDislow
#define jpeg_fdct_ifast jFDifast
#define jpeg_fdct_float jFDfloat
+#define jpeg_fdct_7x7 jFD7x7
+#define jpeg_fdct_6x6 jFD6x6
+#define jpeg_fdct_5x5 jFD5x5
+#define jpeg_fdct_4x4 jFD4x4
+#define jpeg_fdct_3x3 jFD3x3
+#define jpeg_fdct_2x2 jFD2x2
+#define jpeg_fdct_1x1 jFD1x1
+#define jpeg_fdct_9x9 jFD9x9
+#define jpeg_fdct_10x10 jFD10x10
+#define jpeg_fdct_11x11 jFD11x11
+#define jpeg_fdct_12x12 jFD12x12
+#define jpeg_fdct_13x13 jFD13x13
+#define jpeg_fdct_14x14 jFD14x14
+#define jpeg_fdct_15x15 jFD15x15
+#define jpeg_fdct_16x16 jFD16x16
+#define jpeg_fdct_16x8 jFD16x8
+#define jpeg_fdct_14x7 jFD14x7
+#define jpeg_fdct_12x6 jFD12x6
+#define jpeg_fdct_10x5 jFD10x5
+#define jpeg_fdct_8x4 jFD8x4
+#define jpeg_fdct_6x3 jFD6x3
+#define jpeg_fdct_4x2 jFD4x2
+#define jpeg_fdct_2x1 jFD2x1
+#define jpeg_fdct_8x16 jFD8x16
+#define jpeg_fdct_7x14 jFD7x14
+#define jpeg_fdct_6x12 jFD6x12
+#define jpeg_fdct_5x10 jFD5x10
+#define jpeg_fdct_4x8 jFD4x8
+#define jpeg_fdct_3x6 jFD3x6
+#define jpeg_fdct_2x4 jFD2x4
+#define jpeg_fdct_1x2 jFD1x2
#define jpeg_idct_islow jRDislow
#define jpeg_idct_ifast jRDifast
#define jpeg_idct_float jRDfloat
+#define jpeg_idct_7x7 jRD7x7
+#define jpeg_idct_6x6 jRD6x6
+#define jpeg_idct_5x5 jRD5x5
#define jpeg_idct_4x4 jRD4x4
+#define jpeg_idct_3x3 jRD3x3
#define jpeg_idct_2x2 jRD2x2
#define jpeg_idct_1x1 jRD1x1
+#define jpeg_idct_9x9 jRD9x9
+#define jpeg_idct_10x10 jRD10x10
+#define jpeg_idct_11x11 jRD11x11
+#define jpeg_idct_12x12 jRD12x12
+#define jpeg_idct_13x13 jRD13x13
+#define jpeg_idct_14x14 jRD14x14
+#define jpeg_idct_15x15 jRD15x15
+#define jpeg_idct_16x16 jRD16x16
+#define jpeg_idct_16x8 jRD16x8
+#define jpeg_idct_14x7 jRD14x7
+#define jpeg_idct_12x6 jRD12x6
+#define jpeg_idct_10x5 jRD10x5
+#define jpeg_idct_8x4 jRD8x4
+#define jpeg_idct_6x3 jRD6x3
+#define jpeg_idct_4x2 jRD4x2
+#define jpeg_idct_2x1 jRD2x1
+#define jpeg_idct_8x16 jRD8x16
+#define jpeg_idct_7x14 jRD7x14
+#define jpeg_idct_6x12 jRD6x12
+#define jpeg_idct_5x10 jRD5x10
+#define jpeg_idct_4x8 jRD4x8
+#define jpeg_idct_3x6 jRD3x8
+#define jpeg_idct_2x4 jRD2x4
+#define jpeg_idct_1x2 jRD1x2
#endif /* NEED_SHORT_EXTERNAL_NAMES */
/* Extern declarations for the forward and inverse DCT routines. */
-EXTERN(void) jpeg_fdct_islow JPP((DCTELEM * data));
-EXTERN(void) jpeg_fdct_ifast JPP((DCTELEM * data));
-EXTERN(void) jpeg_fdct_float JPP((FAST_FLOAT * data));
+EXTERN(void) jpeg_fdct_islow
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_ifast
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_float
+ JPP((FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_7x7
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_6x6
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_5x5
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_4x4
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_3x3
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_2x2
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_1x1
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_9x9
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_10x10
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_11x11
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_12x12
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_13x13
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_14x14
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_15x15
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_16x16
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_16x8
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_14x7
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_12x6
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_10x5
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_8x4
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_6x3
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_4x2
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_2x1
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_8x16
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_7x14
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_6x12
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_5x10
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_4x8
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_3x6
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_2x4
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_1x2
+ JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
EXTERN(void) jpeg_idct_islow
JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
@@ -107,15 +240,99 @@ EXTERN(void) jpeg_idct_ifast
EXTERN(void) jpeg_idct_float
JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_7x7
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_6x6
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_5x5
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
EXTERN(void) jpeg_idct_4x4
JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_3x3
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
EXTERN(void) jpeg_idct_2x2
JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
EXTERN(void) jpeg_idct_1x1
JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_9x9
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_10x10
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_11x11
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_12x12
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_13x13
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_14x14
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_15x15
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_16x16
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_16x8
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_14x7
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_12x6
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_10x5
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_8x4
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_6x3
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x2
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x1
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_8x16
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_7x14
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_6x12
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_5x10
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x8
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_3x6
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x4
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_1x2
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
/*
diff --git a/jddctmgr.c b/jddctmgr.c
index bbf8d0e..c0a22cf 100644
--- a/jddctmgr.c
+++ b/jddctmgr.c
@@ -98,22 +98,134 @@ start_pass (j_decompress_ptr cinfo)
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
/* Select the proper IDCT routine for this component's scaling */
- switch (compptr->DCT_scaled_size) {
+ switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
#ifdef IDCT_SCALING_SUPPORTED
- case 1:
+ case ((1 << 8) + 1):
method_ptr = jpeg_idct_1x1;
method = JDCT_ISLOW; /* jidctred uses islow-style table */
break;
- case 2:
+ case ((2 << 8) + 2):
method_ptr = jpeg_idct_2x2;
method = JDCT_ISLOW; /* jidctred uses islow-style table */
break;
- case 4:
+ case ((3 << 8) + 3):
+ method_ptr = jpeg_idct_3x3;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((4 << 8) + 4):
method_ptr = jpeg_idct_4x4;
method = JDCT_ISLOW; /* jidctred uses islow-style table */
break;
+ case ((5 << 8) + 5):
+ method_ptr = jpeg_idct_5x5;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((6 << 8) + 6):
+ method_ptr = jpeg_idct_6x6;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((7 << 8) + 7):
+ method_ptr = jpeg_idct_7x7;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((9 << 8) + 9):
+ method_ptr = jpeg_idct_9x9;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((10 << 8) + 10):
+ method_ptr = jpeg_idct_10x10;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((11 << 8) + 11):
+ method_ptr = jpeg_idct_11x11;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((12 << 8) + 12):
+ method_ptr = jpeg_idct_12x12;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((13 << 8) + 13):
+ method_ptr = jpeg_idct_13x13;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((14 << 8) + 14):
+ method_ptr = jpeg_idct_14x14;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((15 << 8) + 15):
+ method_ptr = jpeg_idct_15x15;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((16 << 8) + 16):
+ method_ptr = jpeg_idct_16x16;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((16 << 8) + 8):
+ method_ptr = jpeg_idct_16x8;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((14 << 8) + 7):
+ method_ptr = jpeg_idct_14x7;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((12 << 8) + 6):
+ method_ptr = jpeg_idct_12x6;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((10 << 8) + 5):
+ method_ptr = jpeg_idct_10x5;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((8 << 8) + 4):
+ method_ptr = jpeg_idct_8x4;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((6 << 8) + 3):
+ method_ptr = jpeg_idct_6x3;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((4 << 8) + 2):
+ method_ptr = jpeg_idct_4x2;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((2 << 8) + 1):
+ method_ptr = jpeg_idct_2x1;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((8 << 8) + 16):
+ method_ptr = jpeg_idct_8x16;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((7 << 8) + 14):
+ method_ptr = jpeg_idct_7x14;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((6 << 8) + 12):
+ method_ptr = jpeg_idct_6x12;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((5 << 8) + 10):
+ method_ptr = jpeg_idct_5x10;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((4 << 8) + 8):
+ method_ptr = jpeg_idct_4x8;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((3 << 8) + 6):
+ method_ptr = jpeg_idct_3x6;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((2 << 8) + 4):
+ method_ptr = jpeg_idct_2x4;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
+ case ((1 << 8) + 2):
+ method_ptr = jpeg_idct_1x2;
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
+ break;
#endif
- case DCTSIZE:
+ case ((DCTSIZE << 8) + DCTSIZE):
switch (cinfo->dct_method) {
#ifdef DCT_ISLOW_SUPPORTED
case JDCT_ISLOW:
@@ -139,7 +251,8 @@ start_pass (j_decompress_ptr cinfo)
}
break;
default:
- ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->DCT_scaled_size);
+ ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
+ compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
break;
}
idct->pub.inverse_DCT[ci] = method_ptr;
diff --git a/jdhuff.c b/jdhuff.c
index b5ba39f..f721d48 100644
--- a/jdhuff.c
+++ b/jdhuff.c
@@ -71,13 +71,24 @@ typedef struct {
d_derived_tbl * dc_cur_tbls[D_MAX_BLOCKS_IN_MCU];
d_derived_tbl * ac_cur_tbls[D_MAX_BLOCKS_IN_MCU];
/* Whether we care about the DC and AC coefficient values for each block */
- boolean dc_needed[D_MAX_BLOCKS_IN_MCU];
- boolean ac_needed[D_MAX_BLOCKS_IN_MCU];
+ int coef_limit[D_MAX_BLOCKS_IN_MCU];
} huff_entropy_decoder;
typedef huff_entropy_decoder * huff_entropy_ptr;
+static const int jpeg_zigzag_order[DCTSIZE2] = {
+ 0, 1, 5, 6, 14, 15, 27, 28,
+ 2, 4, 7, 13, 16, 26, 29, 42,
+ 3, 8, 12, 17, 25, 30, 41, 43,
+ 9, 11, 18, 24, 31, 40, 44, 53,
+ 10, 19, 23, 32, 39, 45, 52, 54,
+ 20, 22, 33, 38, 46, 51, 55, 60,
+ 21, 34, 37, 47, 50, 56, 59, 61,
+ 35, 36, 48, 49, 57, 58, 62, 63
+};
+
+
/*
* Initialize for a Huffman-compressed scan.
*/
@@ -86,7 +97,7 @@ METHODDEF(void)
start_pass_huff_decoder (j_decompress_ptr cinfo)
{
huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
- int ci, blkn, dctbl, actbl;
+ int ci, blkn, dctbl, actbl, i;
jpeg_component_info * compptr;
/* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
@@ -120,11 +131,13 @@ start_pass_huff_decoder (j_decompress_ptr cinfo)
entropy->ac_cur_tbls[blkn] = entropy->ac_derived_tbls[compptr->ac_tbl_no];
/* Decide whether we really care about the coefficient values */
if (compptr->component_needed) {
- entropy->dc_needed[blkn] = TRUE;
- /* we don't need the ACs if producing a 1/8th-size image */
- entropy->ac_needed[blkn] = (compptr->DCT_scaled_size > 1);
+ ci = compptr->DCT_v_scaled_size;
+ if (ci <= 0 || ci > 8) ci = 8;
+ i = compptr->DCT_h_scaled_size;
+ if (i <= 0 || i > 8) i = 8;
+ entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order[(ci - 1) * DCTSIZE + i - 1];
} else {
- entropy->dc_needed[blkn] = entropy->ac_needed[blkn] = FALSE;
+ entropy->coef_limit[blkn] = 0;
}
}
@@ -435,26 +448,20 @@ jpeg_huff_decode (bitread_working_state * state,
/*
* Figure F.12: extend sign bit.
- * On some machines, a shift and add will be faster than a table lookup.
+ * On some machines, a shift and sub will be faster than a table lookup.
*/
#ifdef AVOID_TABLES
-#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
+#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) - ((1<<(s))-1) : (x))
#else
-#define HUFF_EXTEND(x,s) ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
+#define HUFF_EXTEND(x,s) ((x) <= bmask[(s) - 1] ? (x) - bmask[s] : (x))
-static const int extend_test[16] = /* entry n is 2**(n-1) */
- { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
- 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
-
-static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
- { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
- ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
- ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
- ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
+static const int bmask[16] = /* bmask[n] is mask for n rightmost bits */
+ { 0, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF,
+ 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF };
#endif /* AVOID_TABLES */
@@ -541,39 +548,40 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
JBLOCKROW block = MCU_data[blkn];
- d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
- d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
+ d_derived_tbl * htbl;
register int s, k, r;
+ int coef_limit, ci;
/* Decode a single block's worth of coefficients */
/* Section F.2.2.1: decode the DC coefficient difference */
- HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
- if (s) {
- CHECK_BIT_BUFFER(br_state, s, return FALSE);
- r = GET_BITS(s);
- s = HUFF_EXTEND(r, s);
- }
+ htbl = entropy->dc_cur_tbls[blkn];
+ HUFF_DECODE(s, br_state, htbl, return FALSE, label1);
- if (entropy->dc_needed[blkn]) {
+ htbl = entropy->ac_cur_tbls[blkn];
+ k = 1;
+ coef_limit = entropy->coef_limit[blkn];
+ if (coef_limit) {
/* Convert DC difference to actual value, update last_dc_val */
- int ci = cinfo->MCU_membership[blkn];
+ if (s) {
+ CHECK_BIT_BUFFER(br_state, s, return FALSE);
+ r = GET_BITS(s);
+ s = HUFF_EXTEND(r, s);
+ }
+ ci = cinfo->MCU_membership[blkn];
s += state.last_dc_val[ci];
state.last_dc_val[ci] = s;
- /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
+ /* Output the DC coefficient */
(*block)[0] = (JCOEF) s;
- }
-
- if (entropy->ac_needed[blkn]) {
/* Section F.2.2.2: decode the AC coefficients */
/* Since zeroes are skipped, output area must be cleared beforehand */
- for (k = 1; k < DCTSIZE2; k++) {
- HUFF_DECODE(s, br_state, actbl, return FALSE, label2);
-
+ for (; k < coef_limit; k++) {
+ HUFF_DECODE(s, br_state, htbl, return FALSE, label2);
+
r = s >> 4;
s &= 15;
-
+
if (s) {
k += r;
CHECK_BIT_BUFFER(br_state, s, return FALSE);
@@ -586,33 +594,37 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
(*block)[jpeg_natural_order[k]] = (JCOEF) s;
} else {
if (r != 15)
- break;
+ goto EndOfBlock;
k += 15;
}
}
-
} else {
-
- /* Section F.2.2.2: decode the AC coefficients */
- /* In this path we just discard the values */
- for (k = 1; k < DCTSIZE2; k++) {
- HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
-
- r = s >> 4;
- s &= 15;
-
- if (s) {
- k += r;
- CHECK_BIT_BUFFER(br_state, s, return FALSE);
- DROP_BITS(s);
- } else {
- if (r != 15)
- break;
- k += 15;
- }
+ if (s) {
+ CHECK_BIT_BUFFER(br_state, s, return FALSE);
+ DROP_BITS(s);
}
+ }
+
+ /* Section F.2.2.2: decode the AC coefficients */
+ /* In this path we just discard the values */
+ for (; k < DCTSIZE2; k++) {
+ HUFF_DECODE(s, br_state, htbl, return FALSE, label3);
+
+ r = s >> 4;
+ s &= 15;
+ if (s) {
+ k += r;
+ CHECK_BIT_BUFFER(br_state, s, return FALSE);
+ DROP_BITS(s);
+ } else {
+ if (r != 15)
+ break;
+ k += 15;
+ }
}
+
+ EndOfBlock: ;
}
/* Completed MCU, so update state */
diff --git a/jdinput.c b/jdinput.c
index 0c2ac8f..2e87693 100644
--- a/jdinput.c
+++ b/jdinput.c
@@ -74,12 +74,14 @@ initial_setup (j_decompress_ptr cinfo)
* In the full decompressor, this will be overridden by jdmaster.c;
* but in the transcoder, jdmaster.c is not used, so we must do it here.
*/
- cinfo->min_DCT_scaled_size = DCTSIZE;
+ cinfo->min_DCT_h_scaled_size = DCTSIZE;
+ cinfo->min_DCT_v_scaled_size = DCTSIZE;
/* Compute dimensions of components */
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
- compptr->DCT_scaled_size = DCTSIZE;
+ compptr->DCT_h_scaled_size = DCTSIZE;
+ compptr->DCT_v_scaled_size = DCTSIZE;
/* Size in DCT blocks */
compptr->width_in_blocks = (JDIMENSION)
jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
@@ -138,7 +140,7 @@ per_scan_setup (j_decompress_ptr cinfo)
compptr->MCU_width = 1;
compptr->MCU_height = 1;
compptr->MCU_blocks = 1;
- compptr->MCU_sample_width = compptr->DCT_scaled_size;
+ compptr->MCU_sample_width = compptr->DCT_h_scaled_size;
compptr->last_col_width = 1;
/* For noninterleaved scans, it is convenient to define last_row_height
* as the number of block rows present in the last iMCU row.
@@ -174,7 +176,7 @@ per_scan_setup (j_decompress_ptr cinfo)
compptr->MCU_width = compptr->h_samp_factor;
compptr->MCU_height = compptr->v_samp_factor;
compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
- compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_scaled_size;
+ compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_h_scaled_size;
/* Figure number of non-dummy blocks in last MCU column & row */
tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
if (tmp == 0) tmp = compptr->MCU_width;
diff --git a/jdmainct.c b/jdmainct.c
index 13c956f..02723ca 100644
--- a/jdmainct.c
+++ b/jdmainct.c
@@ -161,7 +161,7 @@ alloc_funny_pointers (j_decompress_ptr cinfo)
{
my_main_ptr main = (my_main_ptr) cinfo->main;
int ci, rgroup;
- int M = cinfo->min_DCT_scaled_size;
+ int M = cinfo->min_DCT_v_scaled_size;
jpeg_component_info *compptr;
JSAMPARRAY xbuf;
@@ -175,8 +175,8 @@ alloc_funny_pointers (j_decompress_ptr cinfo)
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
- rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
- cinfo->min_DCT_scaled_size; /* height of a row group of component */
+ rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+ cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
/* Get space for pointer lists --- M+4 row groups in each list.
* We alloc both pointer lists with one call to save a few cycles.
*/
@@ -202,14 +202,14 @@ make_funny_pointers (j_decompress_ptr cinfo)
{
my_main_ptr main = (my_main_ptr) cinfo->main;
int ci, i, rgroup;
- int M = cinfo->min_DCT_scaled_size;
+ int M = cinfo->min_DCT_v_scaled_size;
jpeg_component_info *compptr;
JSAMPARRAY buf, xbuf0, xbuf1;
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
- rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
- cinfo->min_DCT_scaled_size; /* height of a row group of component */
+ rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+ cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
xbuf0 = main->xbuffer[0][ci];
xbuf1 = main->xbuffer[1][ci];
/* First copy the workspace pointers as-is */
@@ -242,14 +242,14 @@ set_wraparound_pointers (j_decompress_ptr cinfo)
{
my_main_ptr main = (my_main_ptr) cinfo->main;
int ci, i, rgroup;
- int M = cinfo->min_DCT_scaled_size;
+ int M = cinfo->min_DCT_v_scaled_size;
jpeg_component_info *compptr;
JSAMPARRAY xbuf0, xbuf1;
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
- rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
- cinfo->min_DCT_scaled_size; /* height of a row group of component */
+ rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+ cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
xbuf0 = main->xbuffer[0][ci];
xbuf1 = main->xbuffer[1][ci];
for (i = 0; i < rgroup; i++) {
@@ -277,8 +277,8 @@ set_bottom_pointers (j_decompress_ptr cinfo)
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
/* Count sample rows in one iMCU row and in one row group */
- iMCUheight = compptr->v_samp_factor * compptr->DCT_scaled_size;
- rgroup = iMCUheight / cinfo->min_DCT_scaled_size;
+ iMCUheight = compptr->v_samp_factor * compptr->DCT_v_scaled_size;
+ rgroup = iMCUheight / cinfo->min_DCT_v_scaled_size;
/* Count nondummy sample rows remaining for this component */
rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight);
if (rows_left == 0) rows_left = iMCUheight;
@@ -357,7 +357,7 @@ process_data_simple_main (j_decompress_ptr cinfo,
}
/* There are always min_DCT_scaled_size row groups in an iMCU row. */
- rowgroups_avail = (JDIMENSION) cinfo->min_DCT_scaled_size;
+ rowgroups_avail = (JDIMENSION) cinfo->min_DCT_v_scaled_size;
/* Note: at the bottom of the image, we may pass extra garbage row groups
* to the postprocessor. The postprocessor has to check for bottom
* of image anyway (at row resolution), so no point in us doing it too.
@@ -417,7 +417,7 @@ process_data_context_main (j_decompress_ptr cinfo,
case CTX_PREPARE_FOR_IMCU:
/* Prepare to process first M-1 row groups of this iMCU row */
main->rowgroup_ctr = 0;
- main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size - 1);
+ main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size - 1);
/* Check for bottom of image: if so, tweak pointers to "duplicate"
* the last sample row, and adjust rowgroups_avail to ignore padding rows.
*/
@@ -440,8 +440,8 @@ process_data_context_main (j_decompress_ptr cinfo,
main->buffer_full = FALSE;
/* Still need to process last row group of this iMCU row, */
/* which is saved at index M+1 of the other xbuffer */
- main->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_scaled_size + 1);
- main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size + 2);
+ main->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 1);
+ main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 2);
main->context_state = CTX_POSTPONED_ROW;
}
}
@@ -492,21 +492,21 @@ jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
* ngroups is the number of row groups we need.
*/
if (cinfo->upsample->need_context_rows) {
- if (cinfo->min_DCT_scaled_size < 2) /* unsupported, see comments above */
+ if (cinfo->min_DCT_v_scaled_size < 2) /* unsupported, see comments above */
ERREXIT(cinfo, JERR_NOTIMPL);
alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */
- ngroups = cinfo->min_DCT_scaled_size + 2;
+ ngroups = cinfo->min_DCT_v_scaled_size + 2;
} else {
- ngroups = cinfo->min_DCT_scaled_size;
+ ngroups = cinfo->min_DCT_v_scaled_size;
}
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
- rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
- cinfo->min_DCT_scaled_size; /* height of a row group of component */
+ rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+ cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
main->buffer[ci] = (*cinfo->mem->alloc_sarray)
((j_common_ptr) cinfo, JPOOL_IMAGE,
- compptr->width_in_blocks * compptr->DCT_scaled_size,
+ compptr->width_in_blocks * compptr->DCT_h_scaled_size,
(JDIMENSION) (rgroup * ngroups));
}
}
diff --git a/jdmaster.c b/jdmaster.c
index 2802c5b..0cdafbd 100644
--- a/jdmaster.c
+++ b/jdmaster.c
@@ -61,9 +61,12 @@ use_merged_upsample (j_decompress_ptr cinfo)
cinfo->comp_info[2].v_samp_factor != 1)
return FALSE;
/* furthermore, it doesn't work if we've scaled the IDCTs differently */
- if (cinfo->comp_info[0].DCT_scaled_size != cinfo->min_DCT_scaled_size ||
- cinfo->comp_info[1].DCT_scaled_size != cinfo->min_DCT_scaled_size ||
- cinfo->comp_info[2].DCT_scaled_size != cinfo->min_DCT_scaled_size)
+ if (cinfo->comp_info[0].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
+ cinfo->comp_info[1].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
+ cinfo->comp_info[2].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
+ cinfo->comp_info[0].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size ||
+ cinfo->comp_info[1].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size ||
+ cinfo->comp_info[2].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size)
return FALSE;
/* ??? also need to test for upsample-time rescaling, when & if supported */
return TRUE; /* by golly, it'll work... */
@@ -102,43 +105,150 @@ jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
jdiv_round_up((long) cinfo->image_width, 8L);
cinfo->output_height = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height, 8L);
- cinfo->min_DCT_scaled_size = 1;
+ cinfo->min_DCT_h_scaled_size = 1;
+ cinfo->min_DCT_v_scaled_size = 1;
} else if (cinfo->scale_num * 4 <= cinfo->scale_denom) {
/* Provide 1/4 scaling */
cinfo->output_width = (JDIMENSION)
jdiv_round_up((long) cinfo->image_width, 4L);
cinfo->output_height = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height, 4L);
- cinfo->min_DCT_scaled_size = 2;
+ cinfo->min_DCT_h_scaled_size = 2;
+ cinfo->min_DCT_v_scaled_size = 2;
+ } else if (cinfo->scale_num * 8 <= cinfo->scale_denom * 3) {
+ /* Provide 3/8 scaling */
+ cinfo->output_width = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width * 3L, 8L);
+ cinfo->output_height = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height * 3L, 8L);
+ cinfo->min_DCT_h_scaled_size = 3;
+ cinfo->min_DCT_v_scaled_size = 3;
} else if (cinfo->scale_num * 2 <= cinfo->scale_denom) {
/* Provide 1/2 scaling */
cinfo->output_width = (JDIMENSION)
jdiv_round_up((long) cinfo->image_width, 2L);
cinfo->output_height = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height, 2L);
- cinfo->min_DCT_scaled_size = 4;
- } else {
+ cinfo->min_DCT_h_scaled_size = 4;
+ cinfo->min_DCT_v_scaled_size = 4;
+ } else if (cinfo->scale_num * 8 <= cinfo->scale_denom * 5) {
+ /* Provide 5/8 scaling */
+ cinfo->output_width = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width * 5L, 8L);
+ cinfo->output_height = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height * 5L, 8L);
+ cinfo->min_DCT_h_scaled_size = 5;
+ cinfo->min_DCT_v_scaled_size = 5;
+ } else if (cinfo->scale_num * 4 <= cinfo->scale_denom * 3) {
+ /* Provide 3/4 scaling */
+ cinfo->output_width = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width * 3L, 4L);
+ cinfo->output_height = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height * 3L, 4L);
+ cinfo->min_DCT_h_scaled_size = 6;
+ cinfo->min_DCT_v_scaled_size = 6;
+ } else if (cinfo->scale_num * 8 <= cinfo->scale_denom * 7) {
+ /* Provide 7/8 scaling */
+ cinfo->output_width = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width * 7L, 8L);
+ cinfo->output_height = (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height * 7L, 8L);
+ cinfo->min_DCT_h_scaled_size = 7;
+ cinfo->min_DCT_v_scaled_size = 7;
+ } else if (cinfo->scale_num <= cinfo->scale_denom) {
/* Provide 1/1 scaling */
cinfo->output_width = cinfo->image_width;
cinfo->output_height = cinfo->image_height;
- cinfo->min_DCT_scaled_size = DCTSIZE;
+ cinfo->min_DCT_h_scaled_size = DCTSIZE;
+ cinfo->min_DCT_v_scaled_size = DCTSIZE;
+ } else if (cinfo->scale_num * 8 <= cinfo->scale_denom * 9) {
+ /* Provide 9/8 scaling */
+ cinfo->output_width = cinfo->image_width + (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width, 8L);
+ cinfo->output_height = cinfo->image_height + (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height, 8L);
+ cinfo->min_DCT_h_scaled_size = 9;
+ cinfo->min_DCT_v_scaled_size = 9;
+ } else if (cinfo->scale_num * 4 <= cinfo->scale_denom * 5) {
+ /* Provide 5/4 scaling */
+ cinfo->output_width = cinfo->image_width + (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width, 4L);
+ cinfo->output_height = cinfo->image_height + (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height, 4L);
+ cinfo->min_DCT_h_scaled_size = 10;
+ cinfo->min_DCT_v_scaled_size = 10;
+ } else if (cinfo->scale_num * 8 <= cinfo->scale_denom * 11) {
+ /* Provide 11/8 scaling */
+ cinfo->output_width = cinfo->image_width + (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width * 3L, 8L);
+ cinfo->output_height = cinfo->image_height + (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height * 3L, 8L);
+ cinfo->min_DCT_h_scaled_size = 11;
+ cinfo->min_DCT_v_scaled_size = 11;
+ } else if (cinfo->scale_num * 2 <= cinfo->scale_denom * 3) {
+ /* Provide 3/2 scaling */
+ cinfo->output_width = cinfo->image_width + (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width, 2L);
+ cinfo->output_height = cinfo->image_height + (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height, 2L);
+ cinfo->min_DCT_h_scaled_size = 12;
+ cinfo->min_DCT_v_scaled_size = 12;
+ } else if (cinfo->scale_num * 8 <= cinfo->scale_denom * 13) {
+ /* Provide 13/8 scaling */
+ cinfo->output_width = cinfo->image_width + (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width * 5L, 8L);
+ cinfo->output_height = cinfo->image_height + (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height * 5L, 8L);
+ cinfo->min_DCT_h_scaled_size = 13;
+ cinfo->min_DCT_v_scaled_size = 13;
+ } else if (cinfo->scale_num * 4 <= cinfo->scale_denom * 7) {
+ /* Provide 7/4 scaling */
+ cinfo->output_width = cinfo->image_width + (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width * 3L, 4L);
+ cinfo->output_height = cinfo->image_height + (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height * 3L, 4L);
+ cinfo->min_DCT_h_scaled_size = 14;
+ cinfo->min_DCT_v_scaled_size = 14;
+ } else if (cinfo->scale_num * 8 <= cinfo->scale_denom * 15) {
+ /* Provide 15/8 scaling */
+ cinfo->output_width = cinfo->image_width + (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_width * 7L, 8L);
+ cinfo->output_height = cinfo->image_height + (JDIMENSION)
+ jdiv_round_up((long) cinfo->image_height * 7L, 8L);
+ cinfo->min_DCT_h_scaled_size = 15;
+ cinfo->min_DCT_v_scaled_size = 15;
+ } else {
+ /* Provide 2/1 scaling */
+ cinfo->output_width = cinfo->image_width << 1;
+ cinfo->output_height = cinfo->image_height << 1;
+ cinfo->min_DCT_h_scaled_size = 16;
+ cinfo->min_DCT_v_scaled_size = 16;
}
/* In selecting the actual DCT scaling for each component, we try to
* scale up the chroma components via IDCT scaling rather than upsampling.
* This saves time if the upsampler gets to use 1:1 scaling.
- * Note this code assumes that the supported DCT scalings are powers of 2.
+ * Note this code adapts subsampling ratios which are powers of 2.
*/
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
- int ssize = cinfo->min_DCT_scaled_size;
- while (ssize < DCTSIZE &&
- (compptr->h_samp_factor * ssize * 2 <=
- cinfo->max_h_samp_factor * cinfo->min_DCT_scaled_size) &&
- (compptr->v_samp_factor * ssize * 2 <=
- cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size)) {
+ int ssize = 1;
+ while (cinfo->min_DCT_h_scaled_size * ssize <= DCTSIZE &&
+ (cinfo->max_h_samp_factor % (compptr->h_samp_factor * ssize * 2)) == 0) {
+ ssize = ssize * 2;
+ }
+ compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size * ssize;
+ ssize = 1;
+ while (cinfo->min_DCT_v_scaled_size * ssize <= DCTSIZE &&
+ (cinfo->max_v_samp_factor % (compptr->v_samp_factor * ssize * 2)) == 0) {
ssize = ssize * 2;
}
- compptr->DCT_scaled_size = ssize;
+ compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size * ssize;
+
+ /* We don't support IDCT ratios larger than 2. */
+ if (compptr->DCT_h_scaled_size > compptr->DCT_v_scaled_size * 2)
+ compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size * 2;
+ else if (compptr->DCT_v_scaled_size > compptr->DCT_h_scaled_size * 2)
+ compptr->DCT_v_scaled_size = compptr->DCT_h_scaled_size * 2;
}
/* Recompute downsampled dimensions of components;
@@ -149,11 +259,11 @@ jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
/* Size in samples, after IDCT scaling */
compptr->downsampled_width = (JDIMENSION)
jdiv_round_up((long) cinfo->image_width *
- (long) (compptr->h_samp_factor * compptr->DCT_scaled_size),
+ (long) (compptr->h_samp_factor * compptr->DCT_h_scaled_size),
(long) (cinfo->max_h_samp_factor * DCTSIZE));
compptr->downsampled_height = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height *
- (long) (compptr->v_samp_factor * compptr->DCT_scaled_size),
+ (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size),
(long) (cinfo->max_v_samp_factor * DCTSIZE));
}
@@ -373,7 +483,7 @@ master_selection (j_decompress_ptr cinfo)
jinit_inverse_dct(cinfo);
/* Entropy decoding: either Huffman or arithmetic coding. */
if (cinfo->arith_code) {
- ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+ jinit_arith_decoder(cinfo);
} else {
if (cinfo->progressive_mode) {
#ifdef D_PROGRESSIVE_SUPPORTED
diff --git a/jdsample.c b/jdsample.c
index 80ffefb..fdd8579 100644
--- a/jdsample.c
+++ b/jdsample.c
@@ -237,11 +237,11 @@ h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
register JSAMPROW inptr, outptr;
register JSAMPLE invalue;
JSAMPROW outend;
- int inrow;
+ int outrow;
- for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
- inptr = input_data[inrow];
- outptr = output_data[inrow];
+ for (outrow = 0; outrow < cinfo->max_v_samp_factor; outrow++) {
+ inptr = input_data[outrow];
+ outptr = output_data[outrow];
outend = outptr + cinfo->output_width;
while (outptr < outend) {
invalue = *inptr++; /* don't need GETJSAMPLE() here */
@@ -308,11 +308,11 @@ h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
register JSAMPROW inptr, outptr;
register int invalue;
register JDIMENSION colctr;
- int inrow;
+ int outrow;
- for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
- inptr = input_data[inrow];
- outptr = output_data[inrow];
+ for (outrow = 0; outrow < cinfo->max_v_samp_factor; outrow++) {
+ inptr = input_data[outrow];
+ outptr = output_data[outrow];
/* Special case for first column */
invalue = GETJSAMPLE(*inptr++);
*outptr++ = (JSAMPLE) invalue;
@@ -418,7 +418,7 @@ jinit_upsampler (j_decompress_ptr cinfo)
/* jdmainct.c doesn't support context rows when min_DCT_scaled_size = 1,
* so don't ask for it.
*/
- do_fancy = cinfo->do_fancy_upsampling && cinfo->min_DCT_scaled_size > 1;
+ do_fancy = cinfo->do_fancy_upsampling && cinfo->min_DCT_v_scaled_size > 1;
/* Verify we can handle the sampling factors, select per-component methods,
* and create storage as needed.
@@ -428,10 +428,10 @@ jinit_upsampler (j_decompress_ptr cinfo)
/* Compute size of an "input group" after IDCT scaling. This many samples
* are to be converted to max_h_samp_factor * max_v_samp_factor pixels.
*/
- h_in_group = (compptr->h_samp_factor * compptr->DCT_scaled_size) /
- cinfo->min_DCT_scaled_size;
- v_in_group = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
- cinfo->min_DCT_scaled_size;
+ h_in_group = (compptr->h_samp_factor * compptr->DCT_h_scaled_size) /
+ cinfo->min_DCT_h_scaled_size;
+ v_in_group = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+ cinfo->min_DCT_v_scaled_size;
h_out_group = cinfo->max_h_samp_factor;
v_out_group = cinfo->max_v_samp_factor;
upsample->rowgroup_height[ci] = v_in_group; /* save for use later */
diff --git a/jdtrans.c b/jdtrans.c
index 6c0ab71..7a3b268 100644
--- a/jdtrans.c
+++ b/jdtrans.c
@@ -100,9 +100,9 @@ transdecode_master_selection (j_decompress_ptr cinfo)
cinfo->buffered_image = TRUE;
/* Entropy decoding: either Huffman or arithmetic coding. */
- if (cinfo->arith_code) {
- ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
- } else {
+ if (cinfo->arith_code)
+ jinit_arith_decoder(cinfo);
+ else {
if (cinfo->progressive_mode) {
#ifdef D_PROGRESSIVE_SUPPORTED
jinit_phuff_decoder(cinfo);
diff --git a/jerror.h b/jerror.h
index fc2fffe..715f898 100644
--- a/jerror.h
+++ b/jerror.h
@@ -45,8 +45,11 @@ JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
+JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
-JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported")
+JMESSAGE(JERR_BAD_DCTSIZE, "DCT scaled block size %dx%d not supported")
+JMESSAGE(JERR_BAD_DROP_SAMPLING,
+ "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
@@ -93,6 +96,7 @@ JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
+JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
@@ -170,6 +174,7 @@ JMESSAGE(JTRC_UNKNOWN_IDS,
JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
+JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
JMESSAGE(JWRN_BOGUS_PROGRESSION,
"Inconsistent progression sequence for component %d coefficient %d")
JMESSAGE(JWRN_EXTRANEOUS_DATA,
@@ -227,6 +232,15 @@ JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines")
(cinfo)->err->msg_parm.i[2] = (p3), \
(cinfo)->err->msg_parm.i[3] = (p4), \
(*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT6(cinfo,code,p1,p2,p3,p4,p5,p6) \
+ ((cinfo)->err->msg_code = (code), \
+ (cinfo)->err->msg_parm.i[0] = (p1), \
+ (cinfo)->err->msg_parm.i[1] = (p2), \
+ (cinfo)->err->msg_parm.i[2] = (p3), \
+ (cinfo)->err->msg_parm.i[3] = (p4), \
+ (cinfo)->err->msg_parm.i[4] = (p5), \
+ (cinfo)->err->msg_parm.i[5] = (p6), \
+ (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
#define ERREXITS(cinfo,code,str) \
((cinfo)->err->msg_code = (code), \
strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
diff --git a/jfdctflt.c b/jfdctflt.c
index 79d7a00..275a528 100644
--- a/jfdctflt.c
+++ b/jfdctflt.c
@@ -56,41 +56,46 @@
*/
GLOBAL(void)
-jpeg_fdct_float (FAST_FLOAT * data)
+jpeg_fdct_float (FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col)
{
FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
FAST_FLOAT *dataptr;
+ JSAMPROW elemptr;
int ctr;
/* Pass 1: process rows. */
dataptr = data;
- for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
- tmp0 = dataptr[0] + dataptr[7];
- tmp7 = dataptr[0] - dataptr[7];
- tmp1 = dataptr[1] + dataptr[6];
- tmp6 = dataptr[1] - dataptr[6];
- tmp2 = dataptr[2] + dataptr[5];
- tmp5 = dataptr[2] - dataptr[5];
- tmp3 = dataptr[3] + dataptr[4];
- tmp4 = dataptr[3] - dataptr[4];
-
+ for (ctr = 0; ctr < DCTSIZE; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Load data into workspace, applying unsigned->signed conversion */
+ tmp0 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]));
+ tmp7 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]));
+ tmp1 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]));
+ tmp6 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]));
+ tmp2 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]));
+ tmp5 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]));
+ tmp3 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]));
+ tmp4 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]));
+
/* Even part */
-
+
tmp10 = tmp0 + tmp3; /* phase 2 */
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
-
- dataptr[0] = tmp10 + tmp11; /* phase 3 */
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */
dataptr[4] = tmp10 - tmp11;
-
+
z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
dataptr[2] = tmp13 + z1; /* phase 5 */
dataptr[6] = tmp13 - z1;
-
+
/* Odd part */
tmp10 = tmp4 + tmp5; /* phase 2 */
@@ -126,21 +131,21 @@ jpeg_fdct_float (FAST_FLOAT * data)
tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
+
/* Even part */
-
+
tmp10 = tmp0 + tmp3; /* phase 2 */
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
-
+
dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
dataptr[DCTSIZE*4] = tmp10 - tmp11;
-
+
z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
dataptr[DCTSIZE*6] = tmp13 - z1;
-
+
/* Odd part */
tmp10 = tmp4 + tmp5; /* phase 2 */
diff --git a/jfdctfst.c b/jfdctfst.c
index ccb378a..350ce35 100644
--- a/jfdctfst.c
+++ b/jfdctfst.c
@@ -111,42 +111,47 @@
*/
GLOBAL(void)
-jpeg_fdct_ifast (DCTELEM * data)
+jpeg_fdct_ifast (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
{
DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
DCTELEM tmp10, tmp11, tmp12, tmp13;
DCTELEM z1, z2, z3, z4, z5, z11, z13;
DCTELEM *dataptr;
+ JSAMPROW elemptr;
int ctr;
SHIFT_TEMPS
/* Pass 1: process rows. */
dataptr = data;
- for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
- tmp0 = dataptr[0] + dataptr[7];
- tmp7 = dataptr[0] - dataptr[7];
- tmp1 = dataptr[1] + dataptr[6];
- tmp6 = dataptr[1] - dataptr[6];
- tmp2 = dataptr[2] + dataptr[5];
- tmp5 = dataptr[2] - dataptr[5];
- tmp3 = dataptr[3] + dataptr[4];
- tmp4 = dataptr[3] - dataptr[4];
-
+ for (ctr = 0; ctr < DCTSIZE; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Load data into workspace */
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
+ tmp7 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
+ tmp6 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
+ tmp5 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
+ tmp4 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
+
/* Even part */
-
+
tmp10 = tmp0 + tmp3; /* phase 2 */
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
-
- dataptr[0] = tmp10 + tmp11; /* phase 3 */
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */
dataptr[4] = tmp10 - tmp11;
-
+
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
dataptr[2] = tmp13 + z1; /* phase 5 */
dataptr[6] = tmp13 - z1;
-
+
/* Odd part */
tmp10 = tmp4 + tmp5; /* phase 2 */
@@ -182,21 +187,21 @@ jpeg_fdct_ifast (DCTELEM * data)
tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
+
/* Even part */
-
+
tmp10 = tmp0 + tmp3; /* phase 2 */
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
-
+
dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
dataptr[DCTSIZE*4] = tmp10 - tmp11;
-
+
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
dataptr[DCTSIZE*6] = tmp13 - z1;
-
+
/* Odd part */
tmp10 = tmp4 + tmp5; /* phase 2 */
diff --git a/jfdctint.c b/jfdctint.c
index 0a78b64..f58ee8c 100644
--- a/jfdctint.c
+++ b/jfdctint.c
@@ -2,6 +2,7 @@
* jfdctint.c
*
* Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modification developed 2003-2004 by Guido Vollbeding.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@@ -21,6 +22,23 @@
* The advantage of this method is that no data path contains more than one
* multiplication; this allows a very simple and accurate implementation in
* scaled fixed-point arithmetic, with a minimal number of shifts.
+ *
+ * We also provide FDCT routines with various input sample block sizes for
+ * direct resolution reduction or enlargement and for direct resolving the
+ * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
+ * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 output DCT block.
+ *
+ * For N<8 we fill the remaining block coefficients with zero.
+ * For N>8 we apply a partial N-point FDCT on the input samples, computing
+ * just the lower 8 frequency coefficients and discarding the rest.
+ *
+ * We must scale the output coefficients of the N-point FDCT appropriately
+ * to the standard 8-point FDCT level by 8/N per 1-D pass. This scaling
+ * is folded into the constant multipliers (pass 2) and/or final/initial
+ * shifting.
+ *
+ * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
+ * since there would be too many additional constants to pre-calculate.
*/
#define JPEG_INTERNALS
@@ -137,12 +155,13 @@
*/
GLOBAL(void)
-jpeg_fdct_islow (DCTELEM * data)
+jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
{
- INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ INT32 tmp0, tmp1, tmp2, tmp3;
INT32 tmp10, tmp11, tmp12, tmp13;
- INT32 z1, z2, z3, z4, z5;
+ INT32 z1;
DCTELEM *dataptr;
+ JSAMPROW elemptr;
int ctr;
SHIFT_TEMPS
@@ -151,62 +170,66 @@ jpeg_fdct_islow (DCTELEM * data)
/* furthermore, we scale the results by 2**PASS1_BITS. */
dataptr = data;
- for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
- tmp0 = dataptr[0] + dataptr[7];
- tmp7 = dataptr[0] - dataptr[7];
- tmp1 = dataptr[1] + dataptr[6];
- tmp6 = dataptr[1] - dataptr[6];
- tmp2 = dataptr[2] + dataptr[5];
- tmp5 = dataptr[2] - dataptr[5];
- tmp3 = dataptr[3] + dataptr[4];
- tmp4 = dataptr[3] - dataptr[4];
-
+ for (ctr = 0; ctr < DCTSIZE; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
/* Even part per LL&M figure 1 --- note that published figure is faulty;
* rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
*/
-
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
+
tmp10 = tmp0 + tmp3;
- tmp13 = tmp0 - tmp3;
+ tmp12 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
- tmp12 = tmp1 - tmp2;
-
- dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
+ tmp13 = tmp1 - tmp2;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
-
+
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
- dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+ dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
CONST_BITS-PASS1_BITS);
- dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+ dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, - FIX_1_847759065),
CONST_BITS-PASS1_BITS);
-
+
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
- * cK represents cos(K*pi/16).
- * i0..i3 in the paper are tmp4..tmp7 here.
+ * cK represents sqrt(2) * cos(K*pi/16).
+ * i0..i3 in the paper are tmp0..tmp3 here.
*/
-
- z1 = tmp4 + tmp7;
- z2 = tmp5 + tmp6;
- z3 = tmp4 + tmp6;
- z4 = tmp5 + tmp7;
- z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-
- tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
- tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
- tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
- tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
- z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
- z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
- z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
- z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-
- z3 += z5;
- z4 += z5;
-
- dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
- dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
- dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
- dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
-
+
+ tmp10 = tmp0 + tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp12 = tmp0 + tmp2;
+ tmp13 = tmp1 + tmp3;
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
+
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
+ tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
+ tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
+
+ tmp12 += z1;
+ tmp13 += z1;
+
+ dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
+ dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
+ dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
+ dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
+
dataptr += DCTSIZE; /* advance pointer to next row */
}
@@ -217,67 +240,4066 @@ jpeg_fdct_islow (DCTELEM * data)
dataptr = data;
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ /* Even part per LL&M figure 1 --- note that published figure is faulty;
+ * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+ */
+
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
- tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
- tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
- tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
- tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
+
+ tmp10 = tmp0 + tmp3;
+ tmp12 = tmp0 - tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp13 = tmp1 - tmp2;
+
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+ tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+ dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
+ dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
+
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, - FIX_1_847759065),
+ CONST_BITS+PASS1_BITS);
+
+ /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+ * cK represents sqrt(2) * cos(K*pi/16).
+ * i0..i3 in the paper are tmp0..tmp3 here.
+ */
+
+ tmp10 = tmp0 + tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp12 = tmp0 + tmp2;
+ tmp13 = tmp1 + tmp3;
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
+
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
+ tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
+ tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
+
+ tmp12 += z1;
+ tmp13 += z1;
+
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12,
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13,
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12,
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13,
+ CONST_BITS+PASS1_BITS);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+#ifdef DCT_SCALING_SUPPORTED
+
+
+/*
+ * Perform the forward DCT on a 7x7 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3;
+ INT32 tmp10, tmp11, tmp12;
+ INT32 z1, z2, z3;
+ DCTELEM *dataptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* cK represents sqrt(2) * cos(K*pi/14). */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 7; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
+ tmp3 = GETJSAMPLE(elemptr[3]);
+
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
+ tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
+
+ z1 = tmp0 + tmp2;
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
+ tmp3 += tmp3;
+ z1 -= tmp3;
+ z1 -= tmp3;
+ z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */
+ z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */
+ z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */
+ dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
+ z1 -= z2;
+ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */
+ dataptr[4] = (DCTELEM)
+ DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
+ CONST_BITS-PASS1_BITS);
+ dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
+
+ /* Odd part */
+
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */
+ tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */
+ tmp0 = tmp1 - tmp2;
+ tmp1 += tmp2;
+ tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
+ tmp1 += tmp2;
+ tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */
+ tmp0 += tmp3;
+ tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */
+
+ dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
+ dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
+ dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * We must also scale the output by (8/7)**2 = 64/49, which we fold
+ * into the constant multipliers:
+ * cK now represents sqrt(2) * cos(K*pi/14) * 64/49.
+ */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 7; ctr++) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
+ tmp3 = dataptr[DCTSIZE*3];
+
+ tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
+ tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
+ tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
+
+ z1 = tmp0 + tmp2;
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
+ CONST_BITS+PASS1_BITS);
+ tmp3 += tmp3;
+ z1 -= tmp3;
+ z1 -= tmp3;
+ z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */
+ z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */
+ z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS);
+ z1 -= z2;
+ z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */
+ dataptr[DCTSIZE*4] = (DCTELEM)
+ DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS);
+
+ /* Odd part */
+
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */
+ tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */
+ tmp0 = tmp1 - tmp2;
+ tmp1 += tmp2;
+ tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
+ tmp1 += tmp2;
+ tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */
+ tmp0 += tmp3;
+ tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 6x6 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2;
+ INT32 tmp10, tmp11, tmp12;
+ DCTELEM *dataptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* cK represents sqrt(2) * cos(K*pi/12). */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 6; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
+ tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
+
+ tmp10 = tmp0 + tmp2;
+ tmp12 = tmp0 - tmp2;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
+ dataptr[2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
+ CONST_BITS-PASS1_BITS);
+ dataptr[4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
+ CONST_BITS-PASS1_BITS);
+
+ /* Odd part */
+
+ tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
+ CONST_BITS-PASS1_BITS);
+
+ dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
+ dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
+ dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * We must also scale the output by (8/6)**2 = 16/9, which we fold
+ * into the constant multipliers:
+ * cK now represents sqrt(2) * cos(K*pi/12) * 16/9.
+ */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 6; ctr++) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
+ tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
+
+ tmp10 = tmp0 + tmp2;
+ tmp12 = tmp0 - tmp2;
+
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
+ CONST_BITS+PASS1_BITS);
+
+ /* Odd part */
+
+ tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*3] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*5] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */
+ CONST_BITS+PASS1_BITS);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 5x5 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2;
+ INT32 tmp10, tmp11;
+ DCTELEM *dataptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* We scale the results further by 2 as part of output adaption */
+ /* scaling for different DCT size. */
+ /* cK represents sqrt(2) * cos(K*pi/10). */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 5; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
+ tmp2 = GETJSAMPLE(elemptr[2]);
+
+ tmp10 = tmp0 + tmp1;
+ tmp11 = tmp0 - tmp1;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1));
+ tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */
+ tmp10 -= tmp2 << 2;
+ tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */
+ dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1);
+ dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1);
+
+ /* Odd part */
+
+ tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */
+
+ dataptr[1] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
+ CONST_BITS-PASS1_BITS-1);
+ dataptr[3] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
+ CONST_BITS-PASS1_BITS-1);
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * We must also scale the output by (8/5)**2 = 64/25, which we partially
+ * fold into the constant multipliers (other part was done in pass 1):
+ * cK now represents sqrt(2) * cos(K*pi/10) * 32/25.
+ */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 5; ctr++) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
+ tmp2 = dataptr[DCTSIZE*2];
+
+ tmp10 = tmp0 + tmp1;
+ tmp11 = tmp0 - tmp1;
+
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */
+ CONST_BITS+PASS1_BITS);
+ tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */
+ tmp10 -= tmp2 << 2;
+ tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
+
+ /* Odd part */
+
+ tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*3] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
+ CONST_BITS+PASS1_BITS);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 4x4 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1;
+ INT32 tmp10, tmp11;
+ DCTELEM *dataptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* We must also scale the output by (8/4)**2 = 2**2, which we add here. */
+ /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 4; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
+
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2));
+ dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2));
+
+ /* Odd part */
+
+ tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
+
+ dataptr[1] = (DCTELEM)
+ DESCALE(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+ CONST_BITS-PASS1_BITS-2);
+ dataptr[3] = (DCTELEM)
+ DESCALE(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+ CONST_BITS-PASS1_BITS-2);
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 4; ctr++) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
+
+ tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
+ tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
+
+ dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp0 + tmp1, PASS1_BITS);
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp0 - tmp1, PASS1_BITS);
+
+ /* Odd part */
+
+ tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM)
+ DESCALE(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*3] = (DCTELEM)
+ DESCALE(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+ CONST_BITS+PASS1_BITS);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 3x3 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2;
+ DCTELEM *dataptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* We scale the results further by 2**2 as part of output adaption */
+ /* scaling for different DCT size. */
+ /* cK represents sqrt(2) * cos(K*pi/6). */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 3; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
+ tmp1 = GETJSAMPLE(elemptr[1]);
+
+ tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2));
+ dataptr[2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
+ CONST_BITS-PASS1_BITS-2);
+
+ /* Odd part */
+
+ dataptr[1] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */
+ CONST_BITS-PASS1_BITS-2);
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * We must also scale the output by (8/3)**2 = 64/9, which we partially
+ * fold into the constant multipliers (other part was done in pass 1):
+ * cK now represents sqrt(2) * cos(K*pi/6) * 16/9.
+ */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 3; ctr++) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
+ tmp1 = dataptr[DCTSIZE*1];
+
+ tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
+ CONST_BITS+PASS1_BITS);
+
+ /* Odd part */
+
+ dataptr[DCTSIZE*1] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */
+ CONST_BITS+PASS1_BITS);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 2x2 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3;
+ JSAMPROW elemptr;
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT. */
+
+ /* Row 0 */
+ elemptr = sample_data[0] + start_col;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
+ tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
+
+ /* Row 1 */
+ elemptr = sample_data[1] + start_col;
+
+ tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
+ tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
+
+ /* Pass 2: process columns.
+ * We leave the results scaled up by an overall factor of 8.
+ * We must also scale the output by (8/2)**2 = 2**4.
+ */
+
+ /* Column 0 */
+ /* Apply unsigned->signed conversion */
+ data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4);
+ data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp2) << 4);
+
+ /* Column 1 */
+ data[DCTSIZE*0+1] = (DCTELEM) ((tmp1 + tmp3) << 4);
+ data[DCTSIZE*1+1] = (DCTELEM) ((tmp1 - tmp3) << 4);
+}
+
+
+/*
+ * Perform the forward DCT on a 1x1 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ /* We leave the result scaled up by an overall factor of 8. */
+ /* We must also scale the output by (8/1)**2 = 2**6. */
+ /* Apply unsigned->signed conversion */
+ data[0] = (DCTELEM)
+ ((GETJSAMPLE(sample_data[0][start_col]) - CENTERJSAMPLE) << 6);
+}
+
+
+/*
+ * Perform the forward DCT on a 9x9 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
+ INT32 tmp10, tmp11, tmp12, tmp13;
+ INT32 z1, z2;
+ DCTELEM workspace[8];
+ DCTELEM *dataptr;
+ DCTELEM *wsptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* we scale the results further by 2 as part of output adaption */
+ /* scaling for different DCT size. */
+ /* cK represents sqrt(2) * cos(K*pi/18). */
+
+ dataptr = data;
+ ctr = 0;
+ for (;;) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]);
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]);
+ tmp4 = GETJSAMPLE(elemptr[4]);
+
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]);
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]);
+ tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]);
+ tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]);
+
+ z1 = tmp0 + tmp2 + tmp3;
+ z2 = tmp1 + tmp4;
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1);
+ dataptr[6] = (DCTELEM)
+ DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)), /* c6 */
+ CONST_BITS-1);
+ z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049)); /* c2 */
+ z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */
+ dataptr[2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441)) /* c4 */
+ + z1 + z2, CONST_BITS-1);
+ dataptr[4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608)) /* c8 */
+ + z1 - z2, CONST_BITS-1);
+
+ /* Odd part */
+
+ dataptr[3] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */
+ CONST_BITS-1);
+
+ tmp11 = MULTIPLY(tmp11, FIX(1.224744871)); /* c3 */
+ tmp0 = MULTIPLY(tmp10 + tmp12, FIX(0.909038955)); /* c5 */
+ tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.483689525)); /* c7 */
+
+ dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS-1);
+
+ tmp2 = MULTIPLY(tmp12 - tmp13, FIX(1.392728481)); /* c1 */
+
+ dataptr[5] = (DCTELEM) DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS-1);
+ dataptr[7] = (DCTELEM) DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS-1);
+
+ ctr++;
+
+ if (ctr != DCTSIZE) {
+ if (ctr == 9)
+ break; /* Done. */
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ } else
+ dataptr = workspace; /* switch pointer to extended workspace */
+ }
+
+ /* Pass 2: process columns.
+ * We leave the results scaled up by an overall factor of 8.
+ * We must also scale the output by (8/9)**2 = 64/81, which we partially
+ * fold into the constant multipliers and final/initial shifting:
+ * cK now represents sqrt(2) * cos(K*pi/18) * 128/81.
+ */
+
+ dataptr = data;
+ wsptr = workspace;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*0];
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*7];
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*6];
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*5];
+ tmp4 = dataptr[DCTSIZE*4];
+
+ tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*0];
+ tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*7];
+ tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*6];
+ tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*5];
+
+ z1 = tmp0 + tmp2 + tmp3;
+ z2 = tmp1 + tmp4;
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(z1 + z2, FIX(1.580246914)), /* 128/81 */
+ CONST_BITS+2);
+ dataptr[DCTSIZE*6] = (DCTELEM)
+ DESCALE(MULTIPLY(z1 - z2 - z2, FIX(1.117403309)), /* c6 */
+ CONST_BITS+2);
+ z1 = MULTIPLY(tmp0 - tmp2, FIX(2.100031287)); /* c2 */
+ z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(1.117403309)); /* c6 */
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.711961190)) /* c4 */
+ + z1 + z2, CONST_BITS+2);
+ dataptr[DCTSIZE*4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.388070096)) /* c8 */
+ + z1 - z2, CONST_BITS+2);
+
+ /* Odd part */
+
+ dataptr[DCTSIZE*3] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.935399303)), /* c3 */
+ CONST_BITS+2);
+
+ tmp11 = MULTIPLY(tmp11, FIX(1.935399303)); /* c3 */
+ tmp0 = MULTIPLY(tmp10 + tmp12, FIX(1.436506004)); /* c5 */
+ tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.764348879)); /* c7 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM)
+ DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS+2);
+
+ tmp2 = MULTIPLY(tmp12 - tmp13, FIX(2.200854883)); /* c1 */
+
+ dataptr[DCTSIZE*5] = (DCTELEM)
+ DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS+2);
+ dataptr[DCTSIZE*7] = (DCTELEM)
+ DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS+2);
+
+ dataptr++; /* advance pointer to next column */
+ wsptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 10x10 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+ DCTELEM workspace[8*2];
+ DCTELEM *dataptr;
+ DCTELEM *wsptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* we scale the results further by 2 as part of output adaption */
+ /* scaling for different DCT size. */
+ /* cK represents sqrt(2) * cos(K*pi/20). */
+
+ dataptr = data;
+ ctr = 0;
+ for (;;) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
+ tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
+
+ tmp10 = tmp0 + tmp4;
+ tmp13 = tmp0 - tmp4;
+ tmp11 = tmp1 + tmp3;
+ tmp14 = tmp1 - tmp3;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1);
+ tmp12 += tmp12;
+ dataptr[4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
+ MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */
+ CONST_BITS-1);
+ tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */
+ dataptr[2] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */
+ CONST_BITS-1);
+ dataptr[6] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */
+ CONST_BITS-1);
+
+ /* Odd part */
+
+ tmp10 = tmp0 + tmp4;
+ tmp11 = tmp1 - tmp3;
+ dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << 1);
+ tmp2 <<= CONST_BITS;
+ dataptr[1] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */
+ MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */
+ MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */
+ MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */
+ CONST_BITS-1);
+ tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */
+ MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */
+ tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */
+ (tmp11 << (CONST_BITS - 1)) - tmp2;
+ dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-1);
+ dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-1);
+
+ ctr++;
+
+ if (ctr != DCTSIZE) {
+ if (ctr == 10)
+ break; /* Done. */
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ } else
+ dataptr = workspace; /* switch pointer to extended workspace */
+ }
+
+ /* Pass 2: process columns.
+ * We leave the results scaled up by an overall factor of 8.
+ * We must also scale the output by (8/10)**2 = 16/25, which we partially
+ * fold into the constant multipliers and final/initial shifting:
+ * cK now represents sqrt(2) * cos(K*pi/20) * 32/25.
+ */
+
+ dataptr = data;
+ wsptr = workspace;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
+ tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
+ tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
+
+ tmp10 = tmp0 + tmp4;
+ tmp13 = tmp0 - tmp4;
+ tmp11 = tmp1 + tmp3;
+ tmp14 = tmp1 - tmp3;
+
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
+ tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
+ tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
+ CONST_BITS+2);
+ tmp12 += tmp12;
+ dataptr[DCTSIZE*4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
+ MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */
+ CONST_BITS+2);
+ tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */
+ CONST_BITS+2);
+ dataptr[DCTSIZE*6] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */
+ CONST_BITS+2);
+
+ /* Odd part */
+
+ tmp10 = tmp0 + tmp4;
+ tmp11 = tmp1 - tmp3;
+ dataptr[DCTSIZE*5] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */
+ CONST_BITS+2);
+ tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */
+ dataptr[DCTSIZE*1] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */
+ MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */
+ MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */
+ MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */
+ CONST_BITS+2);
+ tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */
+ MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */
+ tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */
+ MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+2);
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+2);
+
+ dataptr++; /* advance pointer to next column */
+ wsptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on an 11x11 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+ INT32 z1, z2, z3;
+ DCTELEM workspace[8*3];
+ DCTELEM *dataptr;
+ DCTELEM *wsptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* we scale the results further by 2 as part of output adaption */
+ /* scaling for different DCT size. */
+ /* cK represents sqrt(2) * cos(K*pi/22). */
+
+ dataptr = data;
+ ctr = 0;
+ for (;;) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]);
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]);
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]);
+ tmp5 = GETJSAMPLE(elemptr[5]);
+
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]);
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]);
+ tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]);
+ tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]);
+ tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1);
+ tmp5 += tmp5;
+ tmp0 -= tmp5;
+ tmp1 -= tmp5;
+ tmp2 -= tmp5;
+ tmp3 -= tmp5;
+ tmp4 -= tmp5;
+ z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) + /* c2 */
+ MULTIPLY(tmp2 + tmp4, FIX(0.201263574)); /* c10 */
+ z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931)); /* c6 */
+ z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156)); /* c4 */
+ dataptr[2] = (DCTELEM)
+ DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */
+ - MULTIPLY(tmp4, FIX(1.390975730)), /* c4+c10 */
+ CONST_BITS-1);
+ dataptr[4] = (DCTELEM)
+ DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */
+ - MULTIPLY(tmp2, FIX(1.356927976)) /* c2 */
+ + MULTIPLY(tmp4, FIX(0.587485545)), /* c8 */
+ CONST_BITS-1);
+ dataptr[6] = (DCTELEM)
+ DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.620527200)) /* c2+c4-c6 */
+ - MULTIPLY(tmp2, FIX(0.788749120)), /* c8+c10 */
+ CONST_BITS-1);
+
+ /* Odd part */
+
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.286413905)); /* c3 */
+ tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.068791298)); /* c5 */
+ tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.764581576)); /* c7 */
+ tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.719967871)) /* c7+c5+c3-c1 */
+ + MULTIPLY(tmp14, FIX(0.398430003)); /* c9 */
+ tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.764581576)); /* -c7 */
+ tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.399818907)); /* -c1 */
+ tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.276416582)) /* c9+c7+c1-c3 */
+ - MULTIPLY(tmp14, FIX(1.068791298)); /* c5 */
+ tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.398430003)); /* c9 */
+ tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(1.989053629)) /* c9+c5+c3-c7 */
+ + MULTIPLY(tmp14, FIX(1.399818907)); /* c1 */
+ tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.305598626)) /* c1+c5-c9-c7 */
+ - MULTIPLY(tmp14, FIX(1.286413905)); /* c3 */
+
+ dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-1);
+ dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-1);
+ dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-1);
+ dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-1);
+
+ ctr++;
+
+ if (ctr != DCTSIZE) {
+ if (ctr == 11)
+ break; /* Done. */
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ } else
+ dataptr = workspace; /* switch pointer to extended workspace */
+ }
+
+ /* Pass 2: process columns.
+ * We leave the results scaled up by an overall factor of 8.
+ * We must also scale the output by (8/11)**2 = 64/121, which we partially
+ * fold into the constant multipliers and final/initial shifting:
+ * cK now represents sqrt(2) * cos(K*pi/22) * 128/121.
+ */
+
+ dataptr = data;
+ wsptr = workspace;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*2];
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*1];
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*0];
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*7];
+ tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*6];
+ tmp5 = dataptr[DCTSIZE*5];
+
+ tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*2];
+ tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*1];
+ tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*0];
+ tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*7];
+ tmp14 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*6];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5,
+ FIX(1.057851240)), /* 128/121 */
+ CONST_BITS+2);
+ tmp5 += tmp5;
+ tmp0 -= tmp5;
+ tmp1 -= tmp5;
+ tmp2 -= tmp5;
+ tmp3 -= tmp5;
+ tmp4 -= tmp5;
+ z1 = MULTIPLY(tmp0 + tmp3, FIX(1.435427942)) + /* c2 */
+ MULTIPLY(tmp2 + tmp4, FIX(0.212906922)); /* c10 */
+ z2 = MULTIPLY(tmp1 - tmp3, FIX(0.979689713)); /* c6 */
+ z3 = MULTIPLY(tmp0 - tmp1, FIX(1.258538479)); /* c4 */
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.077210542)) /* c2+c8-c6 */
+ - MULTIPLY(tmp4, FIX(1.471445400)), /* c4+c10 */
+ CONST_BITS+2);
+ dataptr[DCTSIZE*4] = (DCTELEM)
+ DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.065941844)) /* c4-c6-c10 */
+ - MULTIPLY(tmp2, FIX(1.435427942)) /* c2 */
+ + MULTIPLY(tmp4, FIX(0.621472312)), /* c8 */
+ CONST_BITS+2);
+ dataptr[DCTSIZE*6] = (DCTELEM)
+ DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.714276708)) /* c2+c4-c6 */
+ - MULTIPLY(tmp2, FIX(0.834379234)), /* c8+c10 */
+ CONST_BITS+2);
+
+ /* Odd part */
+
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.360834544)); /* c3 */
+ tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.130622199)); /* c5 */
+ tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.808813568)); /* c7 */
+ tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.819470145)) /* c7+c5+c3-c1 */
+ + MULTIPLY(tmp14, FIX(0.421479672)); /* c9 */
+ tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.808813568)); /* -c7 */
+ tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.480800167)); /* -c1 */
+ tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.350258864)) /* c9+c7+c1-c3 */
+ - MULTIPLY(tmp14, FIX(1.130622199)); /* c5 */
+ tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.421479672)); /* c9 */
+ tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(2.104122847)) /* c9+c5+c3-c7 */
+ + MULTIPLY(tmp14, FIX(1.480800167)); /* c1 */
+ tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.381129125)) /* c1+c5-c9-c7 */
+ - MULTIPLY(tmp14, FIX(1.360834544)); /* c3 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
+
+ dataptr++; /* advance pointer to next column */
+ wsptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 12x12 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+ DCTELEM workspace[8*4];
+ DCTELEM *dataptr;
+ DCTELEM *wsptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT. */
+ /* cK represents sqrt(2) * cos(K*pi/24). */
+
+ dataptr = data;
+ ctr = 0;
+ for (;;) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
+
+ tmp10 = tmp0 + tmp5;
+ tmp13 = tmp0 - tmp5;
+ tmp11 = tmp1 + tmp4;
+ tmp14 = tmp1 - tmp4;
+ tmp12 = tmp2 + tmp3;
+ tmp15 = tmp2 - tmp3;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
+ tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE);
+ dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15);
+ dataptr[4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
+ CONST_BITS);
+ dataptr[2] = (DCTELEM)
+ DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
+ CONST_BITS);
+
+ /* Odd part */
+
+ tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */
+ tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */
+ tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */
+ tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
+ + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */
+ tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
+ tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
+ + MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */
+ tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
+ - MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */
+ tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
+ - MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */
+
+ dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS);
+ dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS);
+ dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS);
+ dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS);
+
+ ctr++;
+
+ if (ctr != DCTSIZE) {
+ if (ctr == 12)
+ break; /* Done. */
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ } else
+ dataptr = workspace; /* switch pointer to extended workspace */
+ }
+
+ /* Pass 2: process columns.
+ * We leave the results scaled up by an overall factor of 8.
+ * We must also scale the output by (8/12)**2 = 4/9, which we partially
+ * fold into the constant multipliers and final shifting:
+ * cK now represents sqrt(2) * cos(K*pi/24) * 8/9.
+ */
+
+ dataptr = data;
+ wsptr = workspace;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
+ tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
+ tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
+ tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
+
+ tmp10 = tmp0 + tmp5;
+ tmp13 = tmp0 - tmp5;
+ tmp11 = tmp1 + tmp4;
+ tmp14 = tmp1 - tmp4;
+ tmp12 = tmp2 + tmp3;
+ tmp15 = tmp2 - tmp3;
+
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
+ tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
+ tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
+ tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
+ tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
+ CONST_BITS+1);
+ dataptr[DCTSIZE*6] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
+ CONST_BITS+1);
+ dataptr[DCTSIZE*4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */
+ CONST_BITS+1);
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */
+ MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */
+ CONST_BITS+1);
+
+ /* Odd part */
+
+ tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */
+ tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */
+ tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */
+ tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
+ + MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */
+ tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
+ tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
+ + MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */
+ tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
+ - MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */
+ tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
+ - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+1);
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+1);
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+1);
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+1);
+
+ dataptr++; /* advance pointer to next column */
+ wsptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 13x13 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+ INT32 z1, z2;
+ DCTELEM workspace[8*5];
+ DCTELEM *dataptr;
+ DCTELEM *wsptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT. */
+ /* cK represents sqrt(2) * cos(K*pi/26). */
+
+ dataptr = data;
+ ctr = 0;
+ for (;;) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]);
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]);
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]);
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]);
+ tmp6 = GETJSAMPLE(elemptr[6]);
+
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]);
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]);
+ tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]);
+ tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]);
+ tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]);
+ tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE);
+ tmp6 += tmp6;
+ tmp0 -= tmp6;
+ tmp1 -= tmp6;
+ tmp2 -= tmp6;
+ tmp3 -= tmp6;
+ tmp4 -= tmp6;
+ tmp5 -= tmp6;
+ dataptr[2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) + /* c2 */
+ MULTIPLY(tmp1, FIX(1.058554052)) + /* c6 */
+ MULTIPLY(tmp2, FIX(0.501487041)) - /* c10 */
+ MULTIPLY(tmp3, FIX(0.170464608)) - /* c12 */
+ MULTIPLY(tmp4, FIX(0.803364869)) - /* c8 */
+ MULTIPLY(tmp5, FIX(1.252223920)), /* c4 */
+ CONST_BITS);
+ z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */
+ MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */
+ MULTIPLY(tmp1 - tmp5, FIX(0.316450131)); /* (c8-c12)/2 */
+ z2 = MULTIPLY(tmp0 + tmp2, FIX(0.096834934)) - /* (c4-c6)/2 */
+ MULTIPLY(tmp3 + tmp4, FIX(0.937303064)) + /* (c2+c10)/2 */
+ MULTIPLY(tmp1 + tmp5, FIX(0.486914739)); /* (c8+c12)/2 */
+
+ dataptr[4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS);
+ dataptr[6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS);
+
+ /* Odd part */
+
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.322312651)); /* c3 */
+ tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.163874945)); /* c5 */
+ tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.937797057)) + /* c7 */
+ MULTIPLY(tmp14 + tmp15, FIX(0.338443458)); /* c11 */
+ tmp0 = tmp1 + tmp2 + tmp3 -
+ MULTIPLY(tmp10, FIX(2.020082300)) + /* c3+c5+c7-c1 */
+ MULTIPLY(tmp14, FIX(0.318774355)); /* c9-c11 */
+ tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.937797057)) - /* c7 */
+ MULTIPLY(tmp11 + tmp12, FIX(0.338443458)); /* c11 */
+ tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.163874945)); /* -c5 */
+ tmp1 += tmp4 + tmp5 +
+ MULTIPLY(tmp11, FIX(0.837223564)) - /* c5+c9+c11-c3 */
+ MULTIPLY(tmp14, FIX(2.341699410)); /* c1+c7 */
+ tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.657217813)); /* -c9 */
+ tmp2 += tmp4 + tmp6 -
+ MULTIPLY(tmp12, FIX(1.572116027)) + /* c1+c5-c9-c11 */
+ MULTIPLY(tmp15, FIX(2.260109708)); /* c3+c7 */
+ tmp3 += tmp5 + tmp6 +
+ MULTIPLY(tmp13, FIX(2.205608352)) - /* c3+c5+c9-c7 */
+ MULTIPLY(tmp15, FIX(1.742345811)); /* c1+c11 */
+
+ dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
+ dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
+ dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
+ dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
+
+ ctr++;
+
+ if (ctr != DCTSIZE) {
+ if (ctr == 13)
+ break; /* Done. */
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ } else
+ dataptr = workspace; /* switch pointer to extended workspace */
+ }
+
+ /* Pass 2: process columns.
+ * We leave the results scaled up by an overall factor of 8.
+ * We must also scale the output by (8/13)**2 = 64/169, which we partially
+ * fold into the constant multipliers and final shifting:
+ * cK now represents sqrt(2) * cos(K*pi/26) * 128/169.
+ */
+
+ dataptr = data;
+ wsptr = workspace;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*4];
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*3];
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*2];
+ tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*1];
+ tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*0];
+ tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*7];
+ tmp6 = dataptr[DCTSIZE*6];
+
+ tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*4];
+ tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*3];
+ tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*2];
+ tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*1];
+ tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*0];
+ tmp15 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*7];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6,
+ FIX(0.757396450)), /* 128/169 */
+ CONST_BITS+1);
+ tmp6 += tmp6;
+ tmp0 -= tmp6;
+ tmp1 -= tmp6;
+ tmp2 -= tmp6;
+ tmp3 -= tmp6;
+ tmp4 -= tmp6;
+ tmp5 -= tmp6;
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0, FIX(1.039995521)) + /* c2 */
+ MULTIPLY(tmp1, FIX(0.801745081)) + /* c6 */
+ MULTIPLY(tmp2, FIX(0.379824504)) - /* c10 */
+ MULTIPLY(tmp3, FIX(0.129109289)) - /* c12 */
+ MULTIPLY(tmp4, FIX(0.608465700)) - /* c8 */
+ MULTIPLY(tmp5, FIX(0.948429952)), /* c4 */
+ CONST_BITS+1);
+ z1 = MULTIPLY(tmp0 - tmp2, FIX(0.875087516)) - /* (c4+c6)/2 */
+ MULTIPLY(tmp3 - tmp4, FIX(0.330085509)) - /* (c2-c10)/2 */
+ MULTIPLY(tmp1 - tmp5, FIX(0.239678205)); /* (c8-c12)/2 */
+ z2 = MULTIPLY(tmp0 + tmp2, FIX(0.073342435)) - /* (c4-c6)/2 */
+ MULTIPLY(tmp3 + tmp4, FIX(0.709910013)) + /* (c2+c10)/2 */
+ MULTIPLY(tmp1 + tmp5, FIX(0.368787494)); /* (c8+c12)/2 */
+
+ dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+1);
+ dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS+1);
+
+ /* Odd part */
+
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.001514908)); /* c3 */
+ tmp2 = MULTIPLY(tmp10 + tmp12, FIX(0.881514751)); /* c5 */
+ tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.710284161)) + /* c7 */
+ MULTIPLY(tmp14 + tmp15, FIX(0.256335874)); /* c11 */
+ tmp0 = tmp1 + tmp2 + tmp3 -
+ MULTIPLY(tmp10, FIX(1.530003162)) + /* c3+c5+c7-c1 */
+ MULTIPLY(tmp14, FIX(0.241438564)); /* c9-c11 */
+ tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.710284161)) - /* c7 */
+ MULTIPLY(tmp11 + tmp12, FIX(0.256335874)); /* c11 */
+ tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(0.881514751)); /* -c5 */
+ tmp1 += tmp4 + tmp5 +
+ MULTIPLY(tmp11, FIX(0.634110155)) - /* c5+c9+c11-c3 */
+ MULTIPLY(tmp14, FIX(1.773594819)); /* c1+c7 */
+ tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.497774438)); /* -c9 */
+ tmp2 += tmp4 + tmp6 -
+ MULTIPLY(tmp12, FIX(1.190715098)) + /* c1+c5-c9-c11 */
+ MULTIPLY(tmp15, FIX(1.711799069)); /* c3+c7 */
+ tmp3 += tmp5 + tmp6 +
+ MULTIPLY(tmp13, FIX(1.670519935)) - /* c3+c5+c9-c7 */
+ MULTIPLY(tmp15, FIX(1.319646532)); /* c1+c11 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+1);
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+1);
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+1);
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+1);
+
+ dataptr++; /* advance pointer to next column */
+ wsptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 14x14 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+ DCTELEM workspace[8*6];
+ DCTELEM *dataptr;
+ DCTELEM *wsptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT. */
+ /* cK represents sqrt(2) * cos(K*pi/28). */
+
+ dataptr = data;
+ ctr = 0;
+ for (;;) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
+ tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
+ tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
+
+ tmp10 = tmp0 + tmp6;
+ tmp14 = tmp0 - tmp6;
+ tmp11 = tmp1 + tmp5;
+ tmp15 = tmp1 - tmp5;
+ tmp12 = tmp2 + tmp4;
+ tmp16 = tmp2 - tmp4;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
+ tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
+ tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE);
+ tmp13 += tmp13;
+ dataptr[4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
+ MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
+ MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */
+ CONST_BITS);
+
+ tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */
+
+ dataptr[2] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */
+ + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */
+ CONST_BITS);
+ dataptr[6] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */
+ - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */
+ CONST_BITS);
+
+ /* Odd part */
+
+ tmp10 = tmp1 + tmp2;
+ tmp11 = tmp5 - tmp4;
+ dataptr[7] = (DCTELEM) (tmp0 - tmp10 + tmp3 - tmp11 - tmp6);
+ tmp3 <<= CONST_BITS;
+ tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */
+ tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */
+ tmp10 += tmp11 - tmp3;
+ tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */
+ MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */
+ dataptr[5] = (DCTELEM)
+ DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
+ + MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */
+ CONST_BITS);
+ tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */
+ MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */
+ dataptr[3] = (DCTELEM)
+ DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
+ - MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */
+ CONST_BITS);
+ dataptr[1] = (DCTELEM)
+ DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
+ MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */
+ CONST_BITS);
+
+ ctr++;
+
+ if (ctr != DCTSIZE) {
+ if (ctr == 14)
+ break; /* Done. */
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ } else
+ dataptr = workspace; /* switch pointer to extended workspace */
+ }
+
+ /* Pass 2: process columns.
+ * We leave the results scaled up by an overall factor of 8.
+ * We must also scale the output by (8/14)**2 = 16/49, which we partially
+ * fold into the constant multipliers and final shifting:
+ * cK now represents sqrt(2) * cos(K*pi/28) * 32/49.
+ */
+
+ dataptr = data;
+ wsptr = workspace;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
+ tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
+ tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
+ tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
+ tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
+
+ tmp10 = tmp0 + tmp6;
+ tmp14 = tmp0 - tmp6;
+ tmp11 = tmp1 + tmp5;
+ tmp15 = tmp1 - tmp5;
+ tmp12 = tmp2 + tmp4;
+ tmp16 = tmp2 - tmp4;
+
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
+ tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
+ tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
+ tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
+ tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
+ tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
+ FIX(0.653061224)), /* 32/49 */
+ CONST_BITS+1);
+ tmp13 += tmp13;
+ dataptr[DCTSIZE*4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
+ MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
+ MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */
+ CONST_BITS+1);
+
+ tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */
+
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */
+ + MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */
+ CONST_BITS+1);
+ dataptr[DCTSIZE*6] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */
+ - MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */
+ CONST_BITS+1);
+
+ /* Odd part */
+
+ tmp10 = tmp1 + tmp2;
+ tmp11 = tmp5 - tmp4;
+ dataptr[DCTSIZE*7] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
+ FIX(0.653061224)), /* 32/49 */
+ CONST_BITS+1);
+ tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */
+ tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */
+ tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */
+ tmp10 += tmp11 - tmp3;
+ tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */
+ MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */
+ dataptr[DCTSIZE*5] = (DCTELEM)
+ DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
+ + MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */
+ CONST_BITS+1);
+ tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */
+ MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */
+ dataptr[DCTSIZE*3] = (DCTELEM)
+ DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
+ - MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */
+ CONST_BITS+1);
+ dataptr[DCTSIZE*1] = (DCTELEM)
+ DESCALE(tmp11 + tmp12 + tmp3
+ - MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */
+ - MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */
+ CONST_BITS+1);
+
+ dataptr++; /* advance pointer to next column */
+ wsptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 15x15 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+ INT32 z1, z2, z3;
+ DCTELEM workspace[8*7];
+ DCTELEM *dataptr;
+ DCTELEM *wsptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT. */
+ /* cK represents sqrt(2) * cos(K*pi/30). */
+
+ dataptr = data;
+ ctr = 0;
+ for (;;) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]);
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]);
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]);
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]);
+ tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]);
+ tmp7 = GETJSAMPLE(elemptr[7]);
+
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]);
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]);
+ tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]);
+ tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]);
+ tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]);
+ tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]);
+ tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]);
+
+ z1 = tmp0 + tmp4 + tmp5;
+ z2 = tmp1 + tmp3 + tmp6;
+ z3 = tmp2 + tmp7;
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE);
+ z3 += z3;
+ dataptr[6] = (DCTELEM)
+ DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */
+ MULTIPLY(z2 - z3, FIX(0.437016024)), /* c12 */
+ CONST_BITS);
+ tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
+ z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) - /* c2+c14 */
+ MULTIPLY(tmp6 - tmp2, FIX(2.238241955)); /* c4+c8 */
+ z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) - /* c8-c14 */
+ MULTIPLY(tmp0 - tmp2, FIX(0.091361227)); /* c2-c4 */
+ z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) + /* c2 */
+ MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) + /* c8 */
+ MULTIPLY(tmp1 - tmp4, FIX(0.790569415)); /* (c6+c12)/2 */
+
+ dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS);
+ dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS);
+
+ /* Odd part */
+
+ tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
+ FIX(1.224744871)); /* c5 */
+ tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.344997024)) + /* c3 */
+ MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.831253876)); /* c9 */
+ tmp12 = MULTIPLY(tmp12, FIX(1.224744871)); /* c5 */
+ tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.406466353)) + /* c1 */
+ MULTIPLY(tmp11 + tmp14, FIX(1.344997024)) + /* c3 */
+ MULTIPLY(tmp13 + tmp15, FIX(0.575212477)); /* c11 */
+ tmp0 = MULTIPLY(tmp13, FIX(0.475753014)) - /* c7-c11 */
+ MULTIPLY(tmp14, FIX(0.513743148)) + /* c3-c9 */
+ MULTIPLY(tmp16, FIX(1.700497885)) + tmp4 + tmp12; /* c1+c13 */
+ tmp3 = MULTIPLY(tmp10, - FIX(0.355500862)) - /* -(c1-c7) */
+ MULTIPLY(tmp11, FIX(2.176250899)) - /* c3+c9 */
+ MULTIPLY(tmp15, FIX(0.869244010)) + tmp4 - tmp12; /* c11+c13 */
+
+ dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
+ dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
+ dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
+ dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
+
+ ctr++;
+
+ if (ctr != DCTSIZE) {
+ if (ctr == 15)
+ break; /* Done. */
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ } else
+ dataptr = workspace; /* switch pointer to extended workspace */
+ }
+
+ /* Pass 2: process columns.
+ * We leave the results scaled up by an overall factor of 8.
+ * We must also scale the output by (8/15)**2 = 64/225, which we partially
+ * fold into the constant multipliers and final shifting:
+ * cK now represents sqrt(2) * cos(K*pi/30) * 256/225.
+ */
+
+ dataptr = data;
+ wsptr = workspace;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*6];
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*5];
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*4];
+ tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*3];
+ tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*2];
+ tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*1];
+ tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*0];
+ tmp7 = dataptr[DCTSIZE*7];
+
+ tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*6];
+ tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*5];
+ tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*4];
+ tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*3];
+ tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*2];
+ tmp15 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*1];
+ tmp16 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*0];
+
+ z1 = tmp0 + tmp4 + tmp5;
+ z2 = tmp1 + tmp3 + tmp6;
+ z3 = tmp2 + tmp7;
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(z1 + z2 + z3, FIX(1.137777778)), /* 256/225 */
+ CONST_BITS+2);
+ z3 += z3;
+ dataptr[DCTSIZE*6] = (DCTELEM)
+ DESCALE(MULTIPLY(z1 - z3, FIX(1.301757503)) - /* c6 */
+ MULTIPLY(z2 - z3, FIX(0.497227121)), /* c12 */
+ CONST_BITS+2);
+ tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
+ z1 = MULTIPLY(tmp3 - tmp2, FIX(1.742091575)) - /* c2+c14 */
+ MULTIPLY(tmp6 - tmp2, FIX(2.546621957)); /* c4+c8 */
+ z2 = MULTIPLY(tmp5 - tmp2, FIX(0.908479156)) - /* c8-c14 */
+ MULTIPLY(tmp0 - tmp2, FIX(0.103948774)); /* c2-c4 */
+ z3 = MULTIPLY(tmp0 - tmp3, FIX(1.573898926)) + /* c2 */
+ MULTIPLY(tmp6 - tmp5, FIX(1.076671805)) + /* c8 */
+ MULTIPLY(tmp1 - tmp4, FIX(0.899492312)); /* (c6+c12)/2 */
+
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS+2);
+ dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS+2);
+
+ /* Odd part */
+
+ tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
+ FIX(1.393487498)); /* c5 */
+ tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.530307725)) + /* c3 */
+ MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.945782187)); /* c9 */
+ tmp12 = MULTIPLY(tmp12, FIX(1.393487498)); /* c5 */
+ tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.600246161)) + /* c1 */
+ MULTIPLY(tmp11 + tmp14, FIX(1.530307725)) + /* c3 */
+ MULTIPLY(tmp13 + tmp15, FIX(0.654463974)); /* c11 */
+ tmp0 = MULTIPLY(tmp13, FIX(0.541301207)) - /* c7-c11 */
+ MULTIPLY(tmp14, FIX(0.584525538)) + /* c3-c9 */
+ MULTIPLY(tmp16, FIX(1.934788705)) + tmp4 + tmp12; /* c1+c13 */
+ tmp3 = MULTIPLY(tmp10, - FIX(0.404480980)) - /* -(c1-c7) */
+ MULTIPLY(tmp11, FIX(2.476089912)) - /* c3+c9 */
+ MULTIPLY(tmp15, FIX(0.989006518)) + tmp4 - tmp12; /* c11+c13 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
+
+ dataptr++; /* advance pointer to next column */
+ wsptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 16x16 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
+ DCTELEM workspace[DCTSIZE2];
+ DCTELEM *dataptr;
+ DCTELEM *wsptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* cK represents sqrt(2) * cos(K*pi/32). */
+
+ dataptr = data;
+ ctr = 0;
+ for (;;) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
+ tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
+ tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
+
+ tmp10 = tmp0 + tmp7;
+ tmp14 = tmp0 - tmp7;
+ tmp11 = tmp1 + tmp6;
+ tmp15 = tmp1 - tmp6;
+ tmp12 = tmp2 + tmp5;
+ tmp16 = tmp2 - tmp5;
+ tmp13 = tmp3 + tmp4;
+ tmp17 = tmp3 - tmp4;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
+ tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
+ tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
+ tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
+ dataptr[4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
+ MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
+ CONST_BITS-PASS1_BITS);
+
+ tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
+ MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
+
+ dataptr[2] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
+ + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
+ CONST_BITS-PASS1_BITS);
+ dataptr[6] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
+ - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
+ CONST_BITS-PASS1_BITS);
+
+ /* Odd part */
+
+ tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
+ MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
+ MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
+ MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
+ tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
+ MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
+ tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
+ MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
+ tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
+ MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
+ tmp10 = tmp11 + tmp12 + tmp13 -
+ MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
+ MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
+ tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
+ - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
+ tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
+ + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
+ tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
+ + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
+
+ dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
+ dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
+ dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+ dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
+
+ ctr++;
+
+ if (ctr != DCTSIZE) {
+ if (ctr == DCTSIZE * 2)
+ break; /* Done. */
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ } else
+ dataptr = workspace; /* switch pointer to extended workspace */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * We must also scale the output by (8/16)**2 = 1/2**2.
+ */
+
+ dataptr = data;
+ wsptr = workspace;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
+ tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
+ tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
+ tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
+ tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
+ tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
+
+ tmp10 = tmp0 + tmp7;
+ tmp14 = tmp0 - tmp7;
+ tmp11 = tmp1 + tmp6;
+ tmp15 = tmp1 - tmp6;
+ tmp12 = tmp2 + tmp5;
+ tmp16 = tmp2 - tmp5;
+ tmp13 = tmp3 + tmp4;
+ tmp17 = tmp3 - tmp4;
+
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
+ tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
+ tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
+ tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
+ tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
+ tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
+ tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+2);
+ dataptr[DCTSIZE*4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
+ MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
+ CONST_BITS+PASS1_BITS+2);
+
+ tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
+ MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
+
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
+ + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+10 */
+ CONST_BITS+PASS1_BITS+2);
+ dataptr[DCTSIZE*6] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
+ - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
+ CONST_BITS+PASS1_BITS+2);
+
+ /* Odd part */
+
+ tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
+ MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
+ MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
+ MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
+ tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
+ MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
+ tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
+ MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
+ tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
+ MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
+ tmp10 = tmp11 + tmp12 + tmp13 -
+ MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
+ MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
+ tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
+ - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
+ tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
+ + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
+ tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
+ + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+2);
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+2);
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+2);
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+2);
+
+ dataptr++; /* advance pointer to next column */
+ wsptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 16x8 sample block.
+ *
+ * 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
+ INT32 z1;
+ DCTELEM *dataptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */
+
+ dataptr = data;
+ ctr = 0;
+ for (ctr = 0; ctr < DCTSIZE; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
+ tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
+ tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
+
+ tmp10 = tmp0 + tmp7;
+ tmp14 = tmp0 - tmp7;
+ tmp11 = tmp1 + tmp6;
+ tmp15 = tmp1 - tmp6;
+ tmp12 = tmp2 + tmp5;
+ tmp16 = tmp2 - tmp5;
+ tmp13 = tmp3 + tmp4;
+ tmp17 = tmp3 - tmp4;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
+ tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
+ tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
+ tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
+ dataptr[4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
+ MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
+ CONST_BITS-PASS1_BITS);
+
+ tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
+ MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
+
+ dataptr[2] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
+ + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
+ CONST_BITS-PASS1_BITS);
+ dataptr[6] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
+ - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
+ CONST_BITS-PASS1_BITS);
+
+ /* Odd part */
+
+ tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
+ MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
+ MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
+ MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
+ tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
+ MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
+ tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
+ MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
+ tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
+ MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
+ tmp10 = tmp11 + tmp12 + tmp13 -
+ MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
+ MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
+ tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
+ - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
+ tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
+ + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
+ tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
+ + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
+
+ dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
+ dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
+ dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+ dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * We must also scale the output by 8/16 = 1/2.
+ */
+
+ dataptr = data;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
/* Even part per LL&M figure 1 --- note that published figure is faulty;
* rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
*/
-
+
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+
+ tmp10 = tmp0 + tmp3;
+ tmp12 = tmp0 - tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp13 = tmp1 - tmp2;
+
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+ tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+ dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1);
+ dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1);
+
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
+ CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, - FIX_1_847759065),
+ CONST_BITS+PASS1_BITS+1);
+
+ /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+ * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+ * i0..i3 in the paper are tmp0..tmp3 here.
+ */
+
+ tmp10 = tmp0 + tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp12 = tmp0 + tmp2;
+ tmp13 = tmp1 + tmp3;
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
+
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
+ tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
+ tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
+
+ tmp12 += z1;
+ tmp13 += z1;
+
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12,
+ CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13,
+ CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12,
+ CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13,
+ CONST_BITS+PASS1_BITS+1);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 14x7 sample block.
+ *
+ * 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+ INT32 z1, z2, z3;
+ DCTELEM *dataptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Zero bottom row of output coefficient block. */
+ MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 7; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
+ tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
+ tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
+
+ tmp10 = tmp0 + tmp6;
+ tmp14 = tmp0 - tmp6;
+ tmp11 = tmp1 + tmp5;
+ tmp15 = tmp1 - tmp5;
+ tmp12 = tmp2 + tmp4;
+ tmp16 = tmp2 - tmp4;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
+ tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
+ tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS);
+ tmp13 += tmp13;
+ dataptr[4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
+ MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
+ MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */
+ CONST_BITS-PASS1_BITS);
+
+ tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */
+
+ dataptr[2] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */
+ + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */
+ CONST_BITS-PASS1_BITS);
+ dataptr[6] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */
+ - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */
+ CONST_BITS-PASS1_BITS);
+
+ /* Odd part */
+
+ tmp10 = tmp1 + tmp2;
+ tmp11 = tmp5 - tmp4;
+ dataptr[7] = (DCTELEM) ((tmp0 - tmp10 + tmp3 - tmp11 - tmp6) << PASS1_BITS);
+ tmp3 <<= CONST_BITS;
+ tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */
+ tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */
+ tmp10 += tmp11 - tmp3;
+ tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */
+ MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */
+ dataptr[5] = (DCTELEM)
+ DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
+ + MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */
+ CONST_BITS-PASS1_BITS);
+ tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */
+ MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */
+ dataptr[3] = (DCTELEM)
+ DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
+ - MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */
+ CONST_BITS-PASS1_BITS);
+ dataptr[1] = (DCTELEM)
+ DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
+ MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */
+ CONST_BITS-PASS1_BITS);
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * We must also scale the output by (8/14)*(8/7) = 32/49, which we
+ * partially fold into the constant multipliers and final shifting:
+ * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14) * 64/49.
+ */
+
+ dataptr = data;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
+ tmp3 = dataptr[DCTSIZE*3];
+
+ tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
+ tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
+ tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
+
+ z1 = tmp0 + tmp2;
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
+ CONST_BITS+PASS1_BITS+1);
+ tmp3 += tmp3;
+ z1 -= tmp3;
+ z1 -= tmp3;
+ z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */
+ z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */
+ z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS+1);
+ z1 -= z2;
+ z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */
+ dataptr[DCTSIZE*4] = (DCTELEM)
+ DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
+ CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS+1);
+
+ /* Odd part */
+
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */
+ tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */
+ tmp0 = tmp1 - tmp2;
+ tmp1 += tmp2;
+ tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
+ tmp1 += tmp2;
+ tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */
+ tmp0 += tmp3;
+ tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 12x6 sample block.
+ *
+ * 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+ DCTELEM *dataptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Zero 2 bottom rows of output coefficient block. */
+ MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 6; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
+
+ tmp10 = tmp0 + tmp5;
+ tmp13 = tmp0 - tmp5;
+ tmp11 = tmp1 + tmp4;
+ tmp14 = tmp1 - tmp4;
+ tmp12 = tmp2 + tmp3;
+ tmp15 = tmp2 - tmp3;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
+ tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS);
+ dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS);
+ dataptr[4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
+ CONST_BITS-PASS1_BITS);
+ dataptr[2] = (DCTELEM)
+ DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
+ CONST_BITS-PASS1_BITS);
+
+ /* Odd part */
+
+ tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */
+ tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */
+ tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */
+ tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
+ + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */
+ tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
+ tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
+ + MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */
+ tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
+ - MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */
+ tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
+ - MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */
+
+ dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
+ dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
+ dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+ dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * We must also scale the output by (8/12)*(8/6) = 8/9, which we
+ * partially fold into the constant multipliers and final shifting:
+ * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
+ */
+
+ dataptr = data;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
+ tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
+
+ tmp10 = tmp0 + tmp2;
+ tmp12 = tmp0 - tmp2;
+
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */
+ CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */
+ CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
+ CONST_BITS+PASS1_BITS+1);
+
+ /* Odd part */
+
+ tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
+ CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*3] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */
+ CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*5] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */
+ CONST_BITS+PASS1_BITS+1);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 10x5 sample block.
+ *
+ * 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+ DCTELEM *dataptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Zero 3 bottom rows of output coefficient block. */
+ MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 5; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
+ tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
+
+ tmp10 = tmp0 + tmp4;
+ tmp13 = tmp0 - tmp4;
+ tmp11 = tmp1 + tmp3;
+ tmp14 = tmp1 - tmp3;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS);
+ tmp12 += tmp12;
+ dataptr[4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
+ MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */
+ CONST_BITS-PASS1_BITS);
+ tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */
+ dataptr[2] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */
+ CONST_BITS-PASS1_BITS);
+ dataptr[6] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */
+ CONST_BITS-PASS1_BITS);
+
+ /* Odd part */
+
+ tmp10 = tmp0 + tmp4;
+ tmp11 = tmp1 - tmp3;
+ dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << PASS1_BITS);
+ tmp2 <<= CONST_BITS;
+ dataptr[1] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */
+ MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */
+ MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */
+ MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */
+ CONST_BITS-PASS1_BITS);
+ tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */
+ MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */
+ tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */
+ (tmp11 << (CONST_BITS - 1)) - tmp2;
+ dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_BITS);
+ dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_BITS);
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * We must also scale the output by (8/10)*(8/5) = 32/25, which we
+ * fold into the constant multipliers:
+ * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10) * 32/25.
+ */
+
+ dataptr = data;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
+ tmp2 = dataptr[DCTSIZE*2];
+
+ tmp10 = tmp0 + tmp1;
+ tmp11 = tmp0 - tmp1;
+
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */
+ CONST_BITS+PASS1_BITS);
+ tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */
+ tmp10 -= tmp2 << 2;
+ tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
+
+ /* Odd part */
+
+ tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*3] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
+ CONST_BITS+PASS1_BITS);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on an 8x4 sample block.
+ *
+ * 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3;
+ INT32 tmp10, tmp11, tmp12, tmp13;
+ INT32 z1;
+ DCTELEM *dataptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Zero 4 bottom rows of output coefficient block. */
+ MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* We must also scale the output by 8/4 = 2, which we add here. */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 4; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part per LL&M figure 1 --- note that published figure is faulty;
+ * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+ */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
+
+ tmp10 = tmp0 + tmp3;
+ tmp12 = tmp0 - tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp13 = tmp1 - tmp2;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
+ dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
+
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+ dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
+ CONST_BITS-PASS1_BITS-1);
+ dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, - FIX_1_847759065),
+ CONST_BITS-PASS1_BITS-1);
+
+ /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+ * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+ * i0..i3 in the paper are tmp0..tmp3 here.
+ */
+
+ tmp10 = tmp0 + tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp12 = tmp0 + tmp2;
+ tmp13 = tmp1 + tmp3;
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
+
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
+ tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
+ tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
+
+ tmp12 += z1;
+ tmp13 += z1;
+
+ dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS-1);
+ dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS-1);
+ dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS-1);
+ dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS-1);
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+ */
+
+ dataptr = data;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
+
+ tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
+ tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
+
+ dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp0 + tmp1, PASS1_BITS);
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp0 - tmp1, PASS1_BITS);
+
+ /* Odd part */
+
+ tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM)
+ DESCALE(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*3] = (DCTELEM)
+ DESCALE(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+ CONST_BITS+PASS1_BITS);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 6x3 sample block.
+ *
+ * 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2;
+ INT32 tmp10, tmp11, tmp12;
+ DCTELEM *dataptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* We scale the results further by 2 as part of output adaption */
+ /* scaling for different DCT size. */
+ /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 3; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
+ tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
+
+ tmp10 = tmp0 + tmp2;
+ tmp12 = tmp0 - tmp2;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1));
+ dataptr[2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
+ CONST_BITS-PASS1_BITS-1);
+ dataptr[4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
+ CONST_BITS-PASS1_BITS-1);
+
+ /* Odd part */
+
+ tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
+ CONST_BITS-PASS1_BITS-1);
+
+ dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1)));
+ dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1));
+ dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1)));
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
+ * fold into the constant multipliers (other part was done in pass 1):
+ * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6) * 16/9.
+ */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 6; ctr++) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
+ tmp1 = dataptr[DCTSIZE*1];
+
+ tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
+ CONST_BITS+PASS1_BITS);
+
+ /* Odd part */
+
+ dataptr[DCTSIZE*1] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */
+ CONST_BITS+PASS1_BITS);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 4x2 sample block.
+ *
+ * 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1;
+ INT32 tmp10, tmp11;
+ DCTELEM *dataptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. */
+ /* 4-point FDCT kernel, */
+ /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 2; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
+
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+3));
+ dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+3));
+
+ /* Odd part */
+
+ tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
+
+ dataptr[1] = (DCTELEM)
+ DESCALE(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+ CONST_BITS-PASS1_BITS-3);
+ dataptr[3] = (DCTELEM)
+ DESCALE(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+ CONST_BITS-PASS1_BITS-3);
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 4; ctr++) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0];
+ tmp1 = dataptr[DCTSIZE*1];
+
+ dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp0 + tmp1, PASS1_BITS);
+
+ /* Odd part */
+
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 - tmp1, PASS1_BITS);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 2x1 sample block.
+ *
+ * 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1;
+ JSAMPROW elemptr;
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ elemptr = sample_data[0] + start_col;
+
+ tmp0 = GETJSAMPLE(elemptr[0]);
+ tmp1 = GETJSAMPLE(elemptr[1]);
+
+ /* We leave the results scaled up by an overall factor of 8.
+ * We must also scale the output by (8/2)*(8/1) = 2**5.
+ */
+
+ /* Even part */
+ /* Apply unsigned->signed conversion */
+ data[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
+
+ /* Odd part */
+ data[1] = (DCTELEM) ((tmp0 - tmp1) << 5);
+}
+
+
+/*
+ * Perform the forward DCT on an 8x16 sample block.
+ *
+ * 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
+ INT32 z1;
+ DCTELEM workspace[DCTSIZE2];
+ DCTELEM *dataptr;
+ DCTELEM *wsptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+ dataptr = data;
+ ctr = 0;
+ for (;;) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part per LL&M figure 1 --- note that published figure is faulty;
+ * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+ */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
+
+ tmp10 = tmp0 + tmp3;
+ tmp12 = tmp0 - tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp13 = tmp1 - tmp2;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
+ dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
+
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+ dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
+ CONST_BITS-PASS1_BITS);
+ dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, - FIX_1_847759065),
+ CONST_BITS-PASS1_BITS);
+
+ /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+ * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+ * i0..i3 in the paper are tmp0..tmp3 here.
+ */
+
tmp10 = tmp0 + tmp3;
- tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
- tmp12 = tmp1 - tmp2;
-
+ tmp12 = tmp0 + tmp2;
+ tmp13 = tmp1 + tmp3;
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
+
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
+ tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
+ tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
+
+ tmp12 += z1;
+ tmp13 += z1;
+
+ dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
+ dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
+ dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
+ dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
+
+ ctr++;
+
+ if (ctr != DCTSIZE) {
+ if (ctr == DCTSIZE * 2)
+ break; /* Done. */
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ } else
+ dataptr = workspace; /* switch pointer to extended workspace */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * We must also scale the output by 8/16 = 1/2.
+ * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
+ */
+
+ dataptr = data;
+ wsptr = workspace;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
+ tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
+ tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
+ tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
+ tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
+ tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
+
+ tmp10 = tmp0 + tmp7;
+ tmp14 = tmp0 - tmp7;
+ tmp11 = tmp1 + tmp6;
+ tmp15 = tmp1 - tmp6;
+ tmp12 = tmp2 + tmp5;
+ tmp16 = tmp2 - tmp5;
+ tmp13 = tmp3 + tmp4;
+ tmp17 = tmp3 - tmp4;
+
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
+ tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
+ tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
+ tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
+ tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
+ tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
+ tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+1);
+ dataptr[DCTSIZE*4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
+ MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
+ CONST_BITS+PASS1_BITS+1);
+
+ tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
+ MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
+
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
+ + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
+ CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*6] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
+ - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
+ CONST_BITS+PASS1_BITS+1);
+
+ /* Odd part */
+
+ tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
+ MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
+ MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
+ MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
+ tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
+ MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
+ tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
+ MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
+ tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
+ MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
+ tmp10 = tmp11 + tmp12 + tmp13 -
+ MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
+ MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
+ tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
+ - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
+ tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
+ + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
+ tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
+ + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+1);
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+1);
+
+ dataptr++; /* advance pointer to next column */
+ wsptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 7x14 sample block.
+ *
+ * 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+ INT32 z1, z2, z3;
+ DCTELEM workspace[8*6];
+ DCTELEM *dataptr;
+ DCTELEM *wsptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */
+
+ dataptr = data;
+ ctr = 0;
+ for (;;) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
+ tmp3 = GETJSAMPLE(elemptr[3]);
+
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
+ tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
+
+ z1 = tmp0 + tmp2;
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
+ tmp3 += tmp3;
+ z1 -= tmp3;
+ z1 -= tmp3;
+ z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */
+ z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */
+ z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */
+ dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
+ z1 -= z2;
+ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */
+ dataptr[4] = (DCTELEM)
+ DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
+ CONST_BITS-PASS1_BITS);
+ dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
+
+ /* Odd part */
+
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */
+ tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */
+ tmp0 = tmp1 - tmp2;
+ tmp1 += tmp2;
+ tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
+ tmp1 += tmp2;
+ tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */
+ tmp0 += tmp3;
+ tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */
+
+ dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
+ dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
+ dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
+
+ ctr++;
+
+ if (ctr != DCTSIZE) {
+ if (ctr == 14)
+ break; /* Done. */
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ } else
+ dataptr = workspace; /* switch pointer to extended workspace */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * We must also scale the output by (8/7)*(8/14) = 32/49, which we
+ * fold into the constant multipliers:
+ * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28) * 32/49.
+ */
+
+ dataptr = data;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 7; ctr++) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
+ tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
+ tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
+ tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
+ tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
+
+ tmp10 = tmp0 + tmp6;
+ tmp14 = tmp0 - tmp6;
+ tmp11 = tmp1 + tmp5;
+ tmp15 = tmp1 - tmp5;
+ tmp12 = tmp2 + tmp4;
+ tmp16 = tmp2 - tmp4;
+
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
+ tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
+ tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
+ tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
+ tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
+ tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
+ FIX(0.653061224)), /* 32/49 */
+ CONST_BITS+PASS1_BITS);
+ tmp13 += tmp13;
+ dataptr[DCTSIZE*4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
+ MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
+ MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */
+ CONST_BITS+PASS1_BITS);
+
+ tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */
+
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */
+ + MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*6] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */
+ - MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */
+ CONST_BITS+PASS1_BITS);
+
+ /* Odd part */
+
+ tmp10 = tmp1 + tmp2;
+ tmp11 = tmp5 - tmp4;
+ dataptr[DCTSIZE*7] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
+ FIX(0.653061224)), /* 32/49 */
+ CONST_BITS+PASS1_BITS);
+ tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */
+ tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */
+ tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */
+ tmp10 += tmp11 - tmp3;
+ tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */
+ MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */
+ dataptr[DCTSIZE*5] = (DCTELEM)
+ DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
+ + MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */
+ CONST_BITS+PASS1_BITS);
+ tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */
+ MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */
+ dataptr[DCTSIZE*3] = (DCTELEM)
+ DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
+ - MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*1] = (DCTELEM)
+ DESCALE(tmp11 + tmp12 + tmp3
+ - MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */
+ - MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */
+ CONST_BITS+PASS1_BITS);
+
+ dataptr++; /* advance pointer to next column */
+ wsptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 6x12 sample block.
+ *
+ * 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+ DCTELEM workspace[8*4];
+ DCTELEM *dataptr;
+ DCTELEM *wsptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
+
+ dataptr = data;
+ ctr = 0;
+ for (;;) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
+ tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
+
+ tmp10 = tmp0 + tmp2;
+ tmp12 = tmp0 - tmp2;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
+ dataptr[2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
+ CONST_BITS-PASS1_BITS);
+ dataptr[4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
+ CONST_BITS-PASS1_BITS);
+
+ /* Odd part */
+
+ tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
+ CONST_BITS-PASS1_BITS);
+
+ dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
+ dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
+ dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
+
+ ctr++;
+
+ if (ctr != DCTSIZE) {
+ if (ctr == 12)
+ break; /* Done. */
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ } else
+ dataptr = workspace; /* switch pointer to extended workspace */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * We must also scale the output by (8/6)*(8/12) = 8/9, which we
+ * fold into the constant multipliers:
+ * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24) * 8/9.
+ */
+
+ dataptr = data;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 6; ctr++) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
+ tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
+ tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
+ tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
+
+ tmp10 = tmp0 + tmp5;
+ tmp13 = tmp0 - tmp5;
+ tmp11 = tmp1 + tmp4;
+ tmp14 = tmp1 - tmp4;
+ tmp12 = tmp2 + tmp3;
+ tmp15 = tmp2 - tmp3;
+
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
+ tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
+ tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
+ tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
+ tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*6] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */
+ MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */
+ CONST_BITS+PASS1_BITS);
+
+ /* Odd part */
+
+ tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */
+ tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */
+ tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */
+ tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
+ + MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */
+ tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
+ tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
+ + MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */
+ tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
+ - MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */
+ tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
+ - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS);
+
+ dataptr++; /* advance pointer to next column */
+ wsptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 5x10 sample block.
+ *
+ * 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+ DCTELEM workspace[8*2];
+ DCTELEM *dataptr;
+ DCTELEM *wsptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */
+
+ dataptr = data;
+ ctr = 0;
+ for (;;) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
+ tmp2 = GETJSAMPLE(elemptr[2]);
+
+ tmp10 = tmp0 + tmp1;
+ tmp11 = tmp0 - tmp1;
+
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS);
+ tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */
+ tmp10 -= tmp2 << 2;
+ tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */
+ dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS);
+ dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS);
+
+ /* Odd part */
+
+ tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */
+
+ dataptr[1] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
+ CONST_BITS-PASS1_BITS);
+ dataptr[3] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
+ CONST_BITS-PASS1_BITS);
+
+ ctr++;
+
+ if (ctr != DCTSIZE) {
+ if (ctr == 10)
+ break; /* Done. */
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ } else
+ dataptr = workspace; /* switch pointer to extended workspace */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * We must also scale the output by (8/5)*(8/10) = 32/25, which we
+ * fold into the constant multipliers:
+ * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20) * 32/25.
+ */
+
+ dataptr = data;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 5; ctr++) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
+ tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
+ tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
+
+ tmp10 = tmp0 + tmp4;
+ tmp13 = tmp0 - tmp4;
+ tmp11 = tmp1 + tmp3;
+ tmp14 = tmp1 - tmp3;
+
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
+ tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
+ tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
+ CONST_BITS+PASS1_BITS);
+ tmp12 += tmp12;
+ dataptr[DCTSIZE*4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
+ MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */
+ CONST_BITS+PASS1_BITS);
+ tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*6] = (DCTELEM)
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */
+ CONST_BITS+PASS1_BITS);
+
+ /* Odd part */
+
+ tmp10 = tmp0 + tmp4;
+ tmp11 = tmp1 - tmp3;
+ dataptr[DCTSIZE*5] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */
+ CONST_BITS+PASS1_BITS);
+ tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */
+ dataptr[DCTSIZE*1] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */
+ MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */
+ MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */
+ MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */
+ CONST_BITS+PASS1_BITS);
+ tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */
+ MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */
+ tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */
+ MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+PASS1_BITS);
+
+ dataptr++; /* advance pointer to next column */
+ wsptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 4x8 sample block.
+ *
+ * 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3;
+ INT32 tmp10, tmp11, tmp12, tmp13;
+ INT32 z1;
+ DCTELEM *dataptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* We must also scale the output by 8/4 = 2, which we add here. */
+ /* 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */
+
+ dataptr = data;
+ for (ctr = 0; ctr < DCTSIZE; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
+
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1));
+ dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1));
+
+ /* Odd part */
+
+ tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
+
+ dataptr[1] = (DCTELEM)
+ DESCALE(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+ CONST_BITS-PASS1_BITS-1);
+ dataptr[3] = (DCTELEM)
+ DESCALE(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+ CONST_BITS-PASS1_BITS-1);
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 4; ctr++) {
+ /* Even part per LL&M figure 1 --- note that published figure is faulty;
+ * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+ */
+
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+
+ tmp10 = tmp0 + tmp3;
+ tmp12 = tmp0 - tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp13 = tmp1 - tmp2;
+
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+ tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
-
+
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
- dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+ dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, - FIX_1_847759065),
CONST_BITS+PASS1_BITS);
-
+
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
- * cK represents cos(K*pi/16).
- * i0..i3 in the paper are tmp4..tmp7 here.
+ * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+ * i0..i3 in the paper are tmp0..tmp3 here.
*/
-
- z1 = tmp4 + tmp7;
- z2 = tmp5 + tmp6;
- z3 = tmp4 + tmp6;
- z4 = tmp5 + tmp7;
- z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-
- tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
- tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
- tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
- tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
- z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
- z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
- z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
- z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-
- z3 += z5;
- z4 += z5;
-
- dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
+
+ tmp10 = tmp0 + tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp12 = tmp0 + tmp2;
+ tmp13 = tmp1 + tmp3;
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
+
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
+ tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
+ tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
+
+ tmp12 += z1;
+ tmp13 += z1;
+
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12,
CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13,
CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12,
CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13,
CONST_BITS+PASS1_BITS);
-
+
dataptr++; /* advance pointer to next column */
}
}
+
+/*
+ * Perform the forward DCT on a 3x6 sample block.
+ *
+ * 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1, tmp2;
+ INT32 tmp10, tmp11, tmp12;
+ DCTELEM *dataptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+ /* We scale the results further by 2 as part of output adaption */
+ /* scaling for different DCT size. */
+ /* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 6; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
+ tmp1 = GETJSAMPLE(elemptr[1]);
+
+ tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM)
+ ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1));
+ dataptr[2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
+ CONST_BITS-PASS1_BITS-1);
+
+ /* Odd part */
+
+ dataptr[1] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */
+ CONST_BITS-PASS1_BITS-1);
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
+ * fold into the constant multipliers (other part was done in pass 1):
+ * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
+ */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 3; ctr++) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
+ tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
+
+ tmp10 = tmp0 + tmp2;
+ tmp12 = tmp0 - tmp2;
+
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
+
+ dataptr[DCTSIZE*0] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*2] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*4] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
+ CONST_BITS+PASS1_BITS);
+
+ /* Odd part */
+
+ tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*3] = (DCTELEM)
+ DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */
+ CONST_BITS+PASS1_BITS);
+ dataptr[DCTSIZE*5] = (DCTELEM)
+ DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */
+ CONST_BITS+PASS1_BITS);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 2x4 sample block.
+ *
+ * 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1;
+ INT32 tmp10, tmp11;
+ DCTELEM *dataptr;
+ JSAMPROW elemptr;
+ int ctr;
+ SHIFT_TEMPS
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT. */
+ /* We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 4; ctr++) {
+ elemptr = sample_data[ctr] + start_col;
+
+ /* Even part */
+
+ tmp0 = GETJSAMPLE(elemptr[0]);
+ tmp1 = GETJSAMPLE(elemptr[1]);
+
+ /* Apply unsigned->signed conversion */
+ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 3);
+
+ /* Odd part */
+
+ dataptr[1] = (DCTELEM) ((tmp0 - tmp1) << 3);
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+
+ /* Pass 2: process columns.
+ * We leave the results scaled up by an overall factor of 8.
+ * 4-point FDCT kernel,
+ * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+ */
+
+ dataptr = data;
+ for (ctr = 0; ctr < 2; ctr++) {
+ /* Even part */
+
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
+
+ tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
+ tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
+
+ dataptr[DCTSIZE*0] = (DCTELEM) (tmp0 + tmp1);
+ dataptr[DCTSIZE*2] = (DCTELEM) (tmp0 - tmp1);
+
+ /* Odd part */
+
+ tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
+
+ dataptr[DCTSIZE*1] = (DCTELEM)
+ DESCALE(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), CONST_BITS); /* c2-c6 */
+ dataptr[DCTSIZE*3] = (DCTELEM)
+ DESCALE(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), CONST_BITS); /* c2+c6 */
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+
+/*
+ * Perform the forward DCT on a 1x2 sample block.
+ *
+ * 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+ INT32 tmp0, tmp1;
+
+ /* Pre-zero output coefficient block. */
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+ tmp0 = GETJSAMPLE(sample_data[0][start_col]);
+ tmp1 = GETJSAMPLE(sample_data[1][start_col]);
+
+ /* We leave the results scaled up by an overall factor of 8.
+ * We must also scale the output by (8/1)*(8/2) = 2**5.
+ */
+
+ /* Even part */
+ /* Apply unsigned->signed conversion */
+ data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
+
+ /* Odd part */
+ data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp1) << 5);
+}
+
+#endif /* DCT_SCALING_SUPPORTED */
#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/jidctint.c b/jidctint.c
index a72b320..5582fd4 100644
--- a/jidctint.c
+++ b/jidctint.c
@@ -2,6 +2,7 @@
* jidctint.c
*
* Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modification developed 2002-2004 by Guido Vollbeding.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@@ -23,6 +24,28 @@
* The advantage of this method is that no data path contains more than one
* multiplication; this allows a very simple and accurate implementation in
* scaled fixed-point arithmetic, with a minimal number of shifts.
+ *
+ * We also provide IDCT routines with various output sample block sizes for
+ * direct resolution reduction or enlargement and for direct resolving the
+ * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
+ * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
+ *
+ * For N<8 we simply take the corresponding low-frequency coefficients of
+ * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
+ * to yield the downscaled outputs.
+ * This can be seen as direct low-pass downsampling from the DCT domain
+ * point of view rather than the usual spatial domain point of view,
+ * yielding significant computational savings and results at least
+ * as good as common bilinear (averaging) spatial downsampling.
+ *
+ * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
+ * lower frequencies and higher frequencies assumed to be zero.
+ * It turns out that the computational effort is similar to the 8x8 IDCT
+ * regarding the output size.
+ * Furthermore, the scaling and descaling is the same for all IDCT sizes.
+ *
+ * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
+ * since there would be too many additional constants to pre-calculate.
*/
#define JPEG_INTERNALS
@@ -386,4 +409,4655 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
}
}
+#ifdef IDCT_SCALING_SUPPORTED
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 7x7 output block.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/14).
+ */
+
+GLOBAL(void)
+jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
+ INT32 z1, z2, z3;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[7*7]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp13 <<= CONST_BITS;
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+ tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
+ tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
+ tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+ tmp0 = z1 + z3;
+ z2 -= tmp0;
+ tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
+ tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
+ tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
+ tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+
+ tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
+ tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
+ tmp0 = tmp1 - tmp2;
+ tmp1 += tmp2;
+ tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
+ tmp1 += tmp2;
+ z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
+ tmp0 += z2;
+ tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
+
+ /* Final output stage */
+
+ wsptr[7*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[7*6] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[7*1] = (int) DESCALE(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[7*5] = (int) DESCALE(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[7*2] = (int) DESCALE(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+ wsptr[7*4] = (int) DESCALE(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+ wsptr[7*3] = (int) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 7 rows from work array, store into output array. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 7; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp13 = (INT32) wsptr[0];
+ tmp13 <<= CONST_BITS;
+
+ z1 = (INT32) wsptr[2];
+ z2 = (INT32) wsptr[4];
+ z3 = (INT32) wsptr[6];
+
+ tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
+ tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
+ tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+ tmp0 = z1 + z3;
+ z2 -= tmp0;
+ tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
+ tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
+ tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
+ tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z3 = (INT32) wsptr[5];
+
+ tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
+ tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
+ tmp0 = tmp1 - tmp2;
+ tmp1 += tmp2;
+ tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
+ tmp1 += tmp2;
+ z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
+ tmp0 += z2;
+ tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp10 - tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp11 - tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp12 - tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 7; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 6x6 output block.
+ *
+ * Optimized algorithm with 3 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/12).
+ */
+
+GLOBAL(void)
+jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+ INT32 z1, z2, z3;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[6*6]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp0 <<= CONST_BITS;
+ tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
+ tmp1 = tmp0 + tmp10;
+ tmp11 = DESCALE(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
+ tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
+ tmp10 = tmp1 + tmp0;
+ tmp12 = tmp1 - tmp0;
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+ tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+ tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+ tmp1 = (z1 - z2 - z3) << PASS1_BITS;
+
+ /* Final output stage */
+
+ wsptr[6*0] = DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[6*5] = DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[6*1] = (int) (tmp11 + tmp1);
+ wsptr[6*4] = (int) (tmp11 - tmp1);
+ wsptr[6*2] = DESCALE(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+ wsptr[6*3] = DESCALE(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 6 rows from work array, store into output array. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 6; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp0 = (INT32) wsptr[0];
+ tmp0 <<= CONST_BITS;
+ tmp2 = (INT32) wsptr[4];
+ tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
+ tmp1 = tmp0 + tmp10;
+ tmp11 = tmp0 - tmp10 - tmp10;
+ tmp10 = (INT32) wsptr[2];
+ tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
+ tmp10 = tmp1 + tmp0;
+ tmp12 = tmp1 - tmp0;
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z3 = (INT32) wsptr[5];
+ tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+ z1 <<= CONST_BITS;
+ z2 <<= CONST_BITS;
+ z3 <<= CONST_BITS;
+ tmp0 = tmp1 + z1 + z2;
+ tmp2 = tmp1 - z2 + z3;
+ tmp1 = z1 - z2 - z3;
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp10 - tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp11 - tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp12 - tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 6; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 5x5 output block.
+ *
+ * Optimized algorithm with 5 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/10).
+ */
+
+GLOBAL(void)
+jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
+ INT32 z1, z2, z3;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[5*5]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp12 <<= CONST_BITS;
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+ z3 = tmp12 + z2;
+ tmp10 = z3 + z1;
+ tmp11 = z3 - z1;
+ tmp12 -= z2 << 2;
+
+ /* Odd part */
+
+ z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
+ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
+ tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
+
+ /* Final output stage */
+
+ wsptr[5*0] = DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[5*4] = DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[5*1] = DESCALE(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[5*3] = DESCALE(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[5*2] = DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 5 rows from work array, store into output array. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 5; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp12 = (INT32) wsptr[0];
+ tmp12 <<= CONST_BITS;
+ tmp0 = (INT32) wsptr[2];
+ tmp1 = (INT32) wsptr[4];
+ z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+ z3 = tmp12 + z2;
+ tmp10 = z3 + z1;
+ tmp11 = z3 - z1;
+ tmp12 -= z2 << 2;
+
+ /* Odd part */
+
+ z2 = (INT32) wsptr[1];
+ z3 = (INT32) wsptr[3];
+
+ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
+ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
+ tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp10 - tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp11 - tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 5; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 4x4 output block.
+ *
+ * Optimized algorithm with 3 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+ */
+
+GLOBAL(void)
+jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp2, tmp10, tmp12;
+ INT32 z1, z2, z3;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[4*4]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+
+ tmp10 = (tmp0 + tmp2) << PASS1_BITS;
+ tmp12 = (tmp0 - tmp2) << PASS1_BITS;
+
+ /* Odd part */
+ /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+ z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+ z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
+ tmp0 = DESCALE(z1 + MULTIPLY(z3, - FIX_1_847759065), /* -(c2+c6) */
+ CONST_BITS-PASS1_BITS);
+ tmp2 = DESCALE(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
+ CONST_BITS-PASS1_BITS);
+
+ /* Final output stage */
+
+ wsptr[4*0] = (int) (tmp10 + tmp2);
+ wsptr[4*3] = (int) (tmp10 - tmp2);
+ wsptr[4*1] = (int) (tmp12 + tmp0);
+ wsptr[4*2] = (int) (tmp12 - tmp0);
+ }
+
+ /* Pass 2: process 4 rows from work array, store into output array. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 4; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp0 = (INT32) wsptr[0];
+ tmp2 = (INT32) wsptr[2];
+
+ tmp10 = (tmp0 + tmp2) << CONST_BITS;
+ tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+ /* Odd part */
+ /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+ z2 = (INT32) wsptr[1];
+ z3 = (INT32) wsptr[3];
+
+ z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
+ tmp0 = z1 + MULTIPLY(z3, - FIX_1_847759065); /* -(c2+c6) */
+ tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 4; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 3x3 output block.
+ *
+ * Optimized algorithm with 2 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/6).
+ */
+
+GLOBAL(void)
+jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp2, tmp10, tmp12;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[3*3]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp0 <<= CONST_BITS;
+ tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+ tmp10 = tmp0 + tmp12;
+ tmp2 = tmp0 - tmp12 - tmp12;
+
+ /* Odd part */
+
+ tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+ /* Final output stage */
+
+ wsptr[3*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[3*2] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[3*1] = (int) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 3 rows from work array, store into output array. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 3; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp0 = (INT32) wsptr[0];
+ tmp0 <<= CONST_BITS;
+ tmp2 = (INT32) wsptr[2];
+ tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+ tmp10 = tmp0 + tmp12;
+ tmp2 = tmp0 - tmp12 - tmp12;
+
+ /* Odd part */
+
+ tmp12 = (INT32) wsptr[1];
+ tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp10 - tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 3; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 2x2 output block.
+ *
+ * Multiplication-less algorithm.
+ */
+
+GLOBAL(void)
+jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+ ISLOW_MULT_TYPE * quantptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input. */
+
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+
+ /* Column 0 */
+ tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+ tmp0 = tmp4 + tmp5;
+ tmp2 = tmp4 - tmp5;
+
+ /* Column 1 */
+ tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
+ tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
+
+ tmp1 = tmp4 + tmp5;
+ tmp3 = tmp4 - tmp5;
+
+ /* Pass 2: process 2 rows, store into output array. */
+
+ /* Row 0 */
+ outptr = output_buf[0] + output_col;
+
+ outptr[0] = range_limit[(int) DESCALE(tmp0 + tmp1, 3) & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp0 - tmp1, 3) & RANGE_MASK];
+
+ /* Row 1 */
+ outptr = output_buf[1] + output_col;
+
+ outptr[0] = range_limit[(int) DESCALE(tmp2 + tmp3, 3) & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp2 - tmp3, 3) & RANGE_MASK];
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 1x1 output block.
+ *
+ * We hardly need an inverse DCT routine for this: just take the
+ * average pixel value, which is one-eighth of the DC coefficient.
+ */
+
+GLOBAL(void)
+jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ int dcval;
+ ISLOW_MULT_TYPE * quantptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ SHIFT_TEMPS
+
+ /* 1x1 is trivial: just take the DC coefficient divided by 8. */
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
+ dcval = (int) DESCALE((INT32) dcval, 3);
+
+ output_buf[0][output_col] = range_limit[dcval & RANGE_MASK];
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 9x9 output block.
+ *
+ * Optimized algorithm with 10 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/18).
+ */
+
+GLOBAL(void)
+jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
+ INT32 z1, z2, z3, z4;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[8*9]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp0 <<= CONST_BITS;
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+ tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
+ tmp1 = tmp0 + tmp3;
+ tmp2 = tmp0 - tmp3 - tmp3;
+
+ tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
+ tmp11 = tmp2 + tmp0;
+ tmp14 = tmp2 - tmp0 - tmp0;
+
+ tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
+ tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
+ tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
+
+ tmp10 = tmp1 + tmp0 - tmp3;
+ tmp12 = tmp1 - tmp0 + tmp2;
+ tmp13 = tmp1 - tmp2 + tmp3;
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+ z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
+
+ tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
+ tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
+ tmp0 = tmp2 + tmp3 - z2;
+ tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
+ tmp2 += z2 - tmp1;
+ tmp3 += z2 + tmp1;
+ tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
+
+ /* Final output stage */
+
+ wsptr[8*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[8*8] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[8*1] = (int) DESCALE(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[8*7] = (int) DESCALE(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[8*2] = (int) DESCALE(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+ wsptr[8*6] = (int) DESCALE(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+ wsptr[8*3] = (int) DESCALE(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
+ wsptr[8*5] = (int) DESCALE(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
+ wsptr[8*4] = (int) DESCALE(tmp14, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 9 rows from work array, store into output array. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 9; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp0 = (INT32) wsptr[0];
+ tmp0 <<= CONST_BITS;
+
+ z1 = (INT32) wsptr[2];
+ z2 = (INT32) wsptr[4];
+ z3 = (INT32) wsptr[6];
+
+ tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
+ tmp1 = tmp0 + tmp3;
+ tmp2 = tmp0 - tmp3 - tmp3;
+
+ tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
+ tmp11 = tmp2 + tmp0;
+ tmp14 = tmp2 - tmp0 - tmp0;
+
+ tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
+ tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
+ tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
+
+ tmp10 = tmp1 + tmp0 - tmp3;
+ tmp12 = tmp1 - tmp0 + tmp2;
+ tmp13 = tmp1 - tmp2 + tmp3;
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z3 = (INT32) wsptr[5];
+ z4 = (INT32) wsptr[7];
+
+ z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
+
+ tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
+ tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
+ tmp0 = tmp2 + tmp3 - z2;
+ tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
+ tmp2 += z2 - tmp1;
+ tmp3 += z2 + tmp1;
+ tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[8] = range_limit[(int) DESCALE(tmp10 - tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[7] = range_limit[(int) DESCALE(tmp11 - tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp12 - tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp3,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp13 - tmp3,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 8; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 10x10 output block.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/20).
+ */
+
+GLOBAL(void)
+jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+ INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
+ INT32 z1, z2, z3, z4;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[8*10]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp0 <<= CONST_BITS;
+ tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ z1 = MULTIPLY(tmp1, FIX(1.144122806)); /* c4 */
+ z2 = MULTIPLY(tmp1, FIX(0.437016024)); /* c8 */
+ tmp10 = tmp0 + z1;
+ tmp11 = tmp0 - z2;
+
+ tmp22 = DESCALE(tmp0 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
+ CONST_BITS-PASS1_BITS);
+
+ z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
+ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+ tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+ tmp20 = tmp10 + tmp0;
+ tmp24 = tmp10 - tmp0;
+ tmp21 = tmp11 + tmp1;
+ tmp23 = tmp11 - tmp1;
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+ tmp0 = z2 + z4;
+ tmp1 = z2 - z4;
+
+ tmp12 = MULTIPLY(tmp1, FIX(0.309016994)); /* (c3-c7)/2 */
+ tmp13 = z3 << CONST_BITS;
+
+ z2 = MULTIPLY(tmp0, FIX(0.951056516)); /* (c3+c7)/2 */
+ z4 = tmp13 + tmp12;
+
+ tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+ tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+ z2 = MULTIPLY(tmp0, FIX(0.587785252)); /* (c1-c9)/2 */
+ z4 = tmp13 - tmp12 - (tmp1 << (CONST_BITS - 1));
+
+ tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+ tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+ tmp12 = (z1 - tmp1 - z3) << PASS1_BITS;
+
+ /* Final output stage */
+
+ wsptr[8*0] = (int) DESCALE(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*9] = (int) DESCALE(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*1] = (int) DESCALE(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*8] = (int) DESCALE(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*2] = (int) (tmp22 + tmp12);
+ wsptr[8*7] = (int) (tmp22 - tmp12);
+ wsptr[8*3] = (int) DESCALE(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[8*6] = (int) DESCALE(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[8*4] = (int) DESCALE(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[8*5] = (int) DESCALE(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 10 rows from work array, store into output array. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 10; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp0 = (INT32) wsptr[0];
+ tmp0 <<= CONST_BITS;
+ tmp1 = (INT32) wsptr[4];
+ z1 = MULTIPLY(tmp1, FIX(1.144122806)); /* c4 */
+ z2 = MULTIPLY(tmp1, FIX(0.437016024)); /* c8 */
+ tmp10 = tmp0 + z1;
+ tmp11 = tmp0 - z2;
+
+ tmp22 = tmp0 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
+
+ z2 = (INT32) wsptr[2];
+ z3 = (INT32) wsptr[6];
+
+ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
+ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+ tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+ tmp20 = tmp10 + tmp0;
+ tmp24 = tmp10 - tmp0;
+ tmp21 = tmp11 + tmp1;
+ tmp23 = tmp11 - tmp1;
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z3 = (INT32) wsptr[5];
+ z3 <<= CONST_BITS;
+ z4 = (INT32) wsptr[7];
+
+ tmp0 = z2 + z4;
+ tmp1 = z2 - z4;
+
+ tmp12 = MULTIPLY(tmp1, FIX(0.309016994)); /* (c3-c7)/2 */
+
+ z2 = MULTIPLY(tmp0, FIX(0.951056516)); /* (c3+c7)/2 */
+ z4 = z3 + tmp12;
+
+ tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+ tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+ z2 = MULTIPLY(tmp0, FIX(0.587785252)); /* (c1-c9)/2 */
+ z4 = z3 - tmp12 - (tmp1 << (CONST_BITS - 1));
+
+ tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+ tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+ tmp12 = ((z1 - tmp1) << CONST_BITS) - z3;
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp20 + tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[9] = range_limit[(int) DESCALE(tmp20 - tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp21 + tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[8] = range_limit[(int) DESCALE(tmp21 - tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp22 + tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[7] = range_limit[(int) DESCALE(tmp22 - tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp23 + tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp23 - tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp24 + tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp24 - tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 8; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 11x11 output block.
+ *
+ * Optimized algorithm with 24 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/22).
+ */
+
+GLOBAL(void)
+jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+ INT32 z1, z2, z3, z4;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[8*11]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp10 <<= CONST_BITS;
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+ tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
+ tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
+ z4 = z1 + z3;
+ tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
+ z4 -= z2;
+ tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
+ tmp21 = tmp20 + tmp23 + tmp25 -
+ MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
+ tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
+ tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
+ tmp24 += tmp25;
+ tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
+ tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
+ MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
+ tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+ tmp11 = z1 + z2;
+ tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
+ tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
+ tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
+ tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
+ tmp10 = tmp11 + tmp12 + tmp13 -
+ MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
+ z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
+ tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
+ tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
+ z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
+ tmp11 += z1;
+ tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
+ tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
+ MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
+ MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
+
+ /* Final output stage */
+
+ wsptr[8*0] = (int) DESCALE(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*10] = (int) DESCALE(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*1] = (int) DESCALE(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*9] = (int) DESCALE(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*2] = (int) DESCALE(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*8] = (int) DESCALE(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*3] = (int) DESCALE(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[8*7] = (int) DESCALE(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[8*4] = (int) DESCALE(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[8*6] = (int) DESCALE(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[8*5] = (int) DESCALE(tmp25, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 11 rows from work array, store into output array. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 11; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp10 = (INT32) wsptr[0];
+ tmp10 <<= CONST_BITS;
+
+ z1 = (INT32) wsptr[2];
+ z2 = (INT32) wsptr[4];
+ z3 = (INT32) wsptr[6];
+
+ tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
+ tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
+ z4 = z1 + z3;
+ tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
+ z4 -= z2;
+ tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
+ tmp21 = tmp20 + tmp23 + tmp25 -
+ MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
+ tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
+ tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
+ tmp24 += tmp25;
+ tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
+ tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
+ MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
+ tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z3 = (INT32) wsptr[5];
+ z4 = (INT32) wsptr[7];
+
+ tmp11 = z1 + z2;
+ tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
+ tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
+ tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
+ tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
+ tmp10 = tmp11 + tmp12 + tmp13 -
+ MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
+ z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
+ tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
+ tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
+ z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
+ tmp11 += z1;
+ tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
+ tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
+ MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
+ MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp20 + tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[10] = range_limit[(int) DESCALE(tmp20 - tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp21 + tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[9] = range_limit[(int) DESCALE(tmp21 - tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp22 + tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[8] = range_limit[(int) DESCALE(tmp22 - tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp23 + tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[7] = range_limit[(int) DESCALE(tmp23 - tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp24 + tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp24 - tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp25,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 8; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 12x12 output block.
+ *
+ * Optimized algorithm with 15 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/24).
+ */
+
+GLOBAL(void)
+jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+ INT32 z1, z2, z3, z4;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[8*12]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp0 <<= CONST_BITS;
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ tmp1 = MULTIPLY(z1, FIX(1.224744871)); /* c4 */
+
+ tmp10 = tmp0 + tmp1;
+ tmp11 = tmp0 - tmp1;
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ tmp1 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+ z1 <<= CONST_BITS;
+ z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+ z2 <<= CONST_BITS;
+
+ tmp12 = z1 - z2;
+
+ tmp21 = tmp0 + tmp12;
+ tmp24 = tmp0 - tmp12;
+
+ tmp12 = tmp1 + z2;
+
+ tmp20 = tmp10 + tmp12;
+ tmp25 = tmp10 - tmp12;
+
+ tmp12 = tmp1 - z1 - z2;
+
+ tmp22 = tmp11 + tmp12;
+ tmp23 = tmp11 - tmp12;
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+ tmp0 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
+ tmp1 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
+
+ tmp10 = z1 - z4;
+ z2 -= z3;
+ tmp12 = MULTIPLY(tmp10 + z2, FIX_0_541196100); /* c9 */
+ tmp11 = tmp12 + MULTIPLY(tmp10, FIX_0_765366865); /* c3-c9 */
+ tmp14 = tmp12 + MULTIPLY(z2, - FIX_1_847759065); /* c3+c9 */
+
+ z2 = z1 + z3;
+ tmp15 = MULTIPLY(z2 + z4, FIX(0.860918669)); /* c7 */
+ tmp12 = tmp15 + MULTIPLY(z2, FIX(0.261052384)); /* c5-c7 */
+ tmp10 = tmp12 + tmp0 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
+ tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
+ tmp12 += tmp13 + tmp1 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+ tmp13 += tmp15 - tmp0 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+ tmp15 += tmp1 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
+ MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
+
+ /* Final output stage */
+
+ wsptr[8*0] = (int) DESCALE(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*11] = (int) DESCALE(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*1] = (int) DESCALE(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*10] = (int) DESCALE(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*2] = (int) DESCALE(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*9] = (int) DESCALE(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*3] = (int) DESCALE(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[8*8] = (int) DESCALE(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[8*4] = (int) DESCALE(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[8*7] = (int) DESCALE(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[8*5] = (int) DESCALE(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+ wsptr[8*6] = (int) DESCALE(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 12 rows from work array, store into output array. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 12; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp0 = (INT32) wsptr[0];
+ tmp0 <<= CONST_BITS;
+
+ z1 = (INT32) wsptr[4];
+ tmp1 = MULTIPLY(z1, FIX(1.224744871)); /* c4 */
+
+ tmp10 = tmp0 + tmp1;
+ tmp11 = tmp0 - tmp1;
+
+ z1 = (INT32) wsptr[2];
+ tmp1 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+ z1 <<= CONST_BITS;
+ z2 = (INT32) wsptr[6];
+ z2 <<= CONST_BITS;
+
+ tmp12 = z1 - z2;
+
+ tmp21 = tmp0 + tmp12;
+ tmp24 = tmp0 - tmp12;
+
+ tmp12 = tmp1 + z2;
+
+ tmp20 = tmp10 + tmp12;
+ tmp25 = tmp10 - tmp12;
+
+ tmp12 = tmp1 - z1 - z2;
+
+ tmp22 = tmp11 + tmp12;
+ tmp23 = tmp11 - tmp12;
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z3 = (INT32) wsptr[5];
+ z4 = (INT32) wsptr[7];
+
+ tmp0 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
+ tmp1 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
+
+ tmp10 = z1 - z4;
+ z2 -= z3;
+ tmp12 = MULTIPLY(tmp10 + z2, FIX_0_541196100); /* c9 */
+ tmp11 = tmp12 + MULTIPLY(tmp10, FIX_0_765366865); /* c3-c9 */
+ tmp14 = tmp12 + MULTIPLY(z2, - FIX_1_847759065); /* c3+c9 */
+
+ z2 = z1 + z3;
+ tmp15 = MULTIPLY(z2 + z4, FIX(0.860918669)); /* c7 */
+ tmp12 = tmp15 + MULTIPLY(z2, FIX(0.261052384)); /* c5-c7 */
+ tmp10 = tmp12 + tmp0 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
+ tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
+ tmp12 += tmp13 + tmp1 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+ tmp13 += tmp15 - tmp0 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+ tmp15 += tmp1 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
+ MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp20 + tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[11] = range_limit[(int) DESCALE(tmp20 - tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp21 + tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[10] = range_limit[(int) DESCALE(tmp21 - tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp22 + tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[9] = range_limit[(int) DESCALE(tmp22 - tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp23 + tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[8] = range_limit[(int) DESCALE(tmp23 - tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp24 + tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[7] = range_limit[(int) DESCALE(tmp24 - tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp25 + tmp15,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp25 - tmp15,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 8; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 13x13 output block.
+ *
+ * Optimized algorithm with 29 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/26).
+ */
+
+GLOBAL(void)
+jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+ INT32 z1, z2, z3, z4;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[8*13]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ z1 <<= CONST_BITS;
+
+ z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+ tmp10 = z3 + z4;
+ tmp11 = z3 - z4;
+
+ tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
+ tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
+
+ tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
+ tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
+
+ tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
+ tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
+
+ tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
+ tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
+
+ tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
+ tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
+
+ tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
+ tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
+
+ tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+ tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
+ tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
+ tmp15 = z1 + z4;
+ tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
+ tmp10 = tmp11 + tmp12 + tmp13 -
+ MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
+ tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
+ tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
+ tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
+ tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
+ tmp11 += tmp14;
+ tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
+ tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
+ tmp12 += tmp14;
+ tmp13 += tmp14;
+ tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
+ tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
+ MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
+ z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
+ tmp14 += z1;
+ tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
+ MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
+
+ /* Final output stage */
+
+ wsptr[8*0] = (int) DESCALE(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*12] = (int) DESCALE(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*1] = (int) DESCALE(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*11] = (int) DESCALE(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*2] = (int) DESCALE(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*10] = (int) DESCALE(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*3] = (int) DESCALE(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[8*9] = (int) DESCALE(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[8*4] = (int) DESCALE(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[8*8] = (int) DESCALE(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[8*5] = (int) DESCALE(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+ wsptr[8*7] = (int) DESCALE(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+ wsptr[8*6] = (int) DESCALE(tmp26, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 13 rows from work array, store into output array. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 13; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ z1 = (INT32) wsptr[0];
+ z1 <<= CONST_BITS;
+
+ z2 = (INT32) wsptr[2];
+ z3 = (INT32) wsptr[4];
+ z4 = (INT32) wsptr[6];
+
+ tmp10 = z3 + z4;
+ tmp11 = z3 - z4;
+
+ tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
+ tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
+
+ tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
+ tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
+
+ tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
+ tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
+
+ tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
+ tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
+
+ tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
+ tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
+
+ tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
+ tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
+
+ tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z3 = (INT32) wsptr[5];
+ z4 = (INT32) wsptr[7];
+
+ tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
+ tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
+ tmp15 = z1 + z4;
+ tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
+ tmp10 = tmp11 + tmp12 + tmp13 -
+ MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
+ tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
+ tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
+ tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
+ tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
+ tmp11 += tmp14;
+ tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
+ tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
+ tmp12 += tmp14;
+ tmp13 += tmp14;
+ tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
+ tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
+ MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
+ z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
+ tmp14 += z1;
+ tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
+ MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp20 + tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[12] = range_limit[(int) DESCALE(tmp20 - tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp21 + tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[11] = range_limit[(int) DESCALE(tmp21 - tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp22 + tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[10] = range_limit[(int) DESCALE(tmp22 - tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp23 + tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[9] = range_limit[(int) DESCALE(tmp23 - tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp24 + tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[8] = range_limit[(int) DESCALE(tmp24 - tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp25 + tmp15,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[7] = range_limit[(int) DESCALE(tmp25 - tmp15,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp26,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 8; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 14x14 output block.
+ *
+ * Optimized algorithm with 20 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/28).
+ */
+
+GLOBAL(void)
+jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+ INT32 z1, z2, z3, z4;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[8*14]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ z1 <<= CONST_BITS;
+ z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
+ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
+ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
+
+ tmp10 = z1 + z2;
+ tmp11 = z1 + z3;
+ tmp12 = z1 - z4;
+
+ tmp23 = DESCALE(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
+ CONST_BITS-PASS1_BITS);
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+ z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
+
+ tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+ tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+ tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
+ MULTIPLY(z2, FIX(1.378756276)); /* c2 */
+
+ tmp20 = tmp10 + tmp13;
+ tmp26 = tmp10 - tmp13;
+ tmp21 = tmp11 + tmp14;
+ tmp25 = tmp11 - tmp14;
+ tmp22 = tmp12 + tmp15;
+ tmp24 = tmp12 - tmp15;
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+ tmp13 = z4 << CONST_BITS;
+
+ tmp14 = z1 + z3;
+ tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
+ tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
+ tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+ tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
+ tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
+ z1 -= z2;
+ tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
+ tmp16 += tmp15;
+ z1 += z4;
+ z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
+ tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
+ tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
+ z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
+ tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+ tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
+
+ tmp13 = (z1 - z3) << PASS1_BITS;
+
+ /* Final output stage */
+
+ wsptr[8*0] = (int) DESCALE(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*13] = (int) DESCALE(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*1] = (int) DESCALE(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*12] = (int) DESCALE(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*2] = (int) DESCALE(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*11] = (int) DESCALE(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*3] = (int) (tmp23 + tmp13);
+ wsptr[8*10] = (int) (tmp23 - tmp13);
+ wsptr[8*4] = (int) DESCALE(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[8*9] = (int) DESCALE(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[8*5] = (int) DESCALE(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+ wsptr[8*8] = (int) DESCALE(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+ wsptr[8*6] = (int) DESCALE(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+ wsptr[8*7] = (int) DESCALE(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 14 rows from work array, store into output array. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 14; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ z1 = (INT32) wsptr[0];
+ z1 <<= CONST_BITS;
+ z4 = (INT32) wsptr[4];
+ z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
+ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
+ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
+
+ tmp10 = z1 + z2;
+ tmp11 = z1 + z3;
+ tmp12 = z1 - z4;
+
+ tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
+
+ z1 = (INT32) wsptr[2];
+ z2 = (INT32) wsptr[6];
+
+ z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
+
+ tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+ tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+ tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
+ MULTIPLY(z2, FIX(1.378756276)); /* c2 */
+
+ tmp20 = tmp10 + tmp13;
+ tmp26 = tmp10 - tmp13;
+ tmp21 = tmp11 + tmp14;
+ tmp25 = tmp11 - tmp14;
+ tmp22 = tmp12 + tmp15;
+ tmp24 = tmp12 - tmp15;
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z3 = (INT32) wsptr[5];
+ z4 = (INT32) wsptr[7];
+ z4 <<= CONST_BITS;
+
+ tmp14 = z1 + z3;
+ tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
+ tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
+ tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+ tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
+ tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
+ z1 -= z2;
+ tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
+ tmp16 += tmp15;
+ tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
+ tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
+ tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
+ tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
+ tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+ tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
+
+ tmp13 = ((z1 - z3) << CONST_BITS) + z4;
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp20 + tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[13] = range_limit[(int) DESCALE(tmp20 - tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp21 + tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[12] = range_limit[(int) DESCALE(tmp21 - tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp22 + tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[11] = range_limit[(int) DESCALE(tmp22 - tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp23 + tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[10] = range_limit[(int) DESCALE(tmp23 - tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp24 + tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[9] = range_limit[(int) DESCALE(tmp24 - tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp25 + tmp15,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[8] = range_limit[(int) DESCALE(tmp25 - tmp15,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp26 + tmp16,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[7] = range_limit[(int) DESCALE(tmp26 - tmp16,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 8; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 15x15 output block.
+ *
+ * Optimized algorithm with 22 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/30).
+ */
+
+GLOBAL(void)
+jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+ INT32 z1, z2, z3, z4;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[8*15]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ z1 <<= CONST_BITS;
+
+ z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+ tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
+ tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
+
+ tmp12 = z1 - tmp10;
+ tmp13 = z1 + tmp11;
+ z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
+
+ z4 = z2 - z3;
+ z3 += z2;
+ tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
+ tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
+ z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
+
+ tmp20 = tmp13 + tmp10 + tmp11;
+ tmp23 = tmp12 - tmp10 + tmp11 + z2;
+
+ tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
+ tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
+
+ tmp25 = tmp13 - tmp10 - tmp11;
+ tmp26 = tmp12 + tmp10 - tmp11 - z2;
+
+ tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
+ tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
+
+ tmp21 = tmp12 + tmp10 + tmp11;
+ tmp24 = tmp13 - tmp10 + tmp11;
+ tmp11 += tmp11;
+ tmp22 = z1 + tmp11; /* c10 = c6-c12 */
+ tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
+ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+ tmp13 = z2 - z4;
+ tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
+ tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
+ tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
+
+ tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
+ tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
+ z2 = z1 - z4;
+ tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
+
+ tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
+ tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
+ tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
+ z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
+ tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
+ tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
+
+ /* Final output stage */
+
+ wsptr[8*0] = (int) DESCALE(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*14] = (int) DESCALE(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*1] = (int) DESCALE(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*13] = (int) DESCALE(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*2] = (int) DESCALE(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*12] = (int) DESCALE(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*3] = (int) DESCALE(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[8*11] = (int) DESCALE(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[8*4] = (int) DESCALE(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[8*10] = (int) DESCALE(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[8*5] = (int) DESCALE(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+ wsptr[8*9] = (int) DESCALE(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+ wsptr[8*6] = (int) DESCALE(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+ wsptr[8*8] = (int) DESCALE(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+ wsptr[8*7] = (int) DESCALE(tmp27, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 15 rows from work array, store into output array. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 15; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ z1 = (INT32) wsptr[0];
+ z1 <<= CONST_BITS;
+
+ z2 = (INT32) wsptr[2];
+ z3 = (INT32) wsptr[4];
+ z4 = (INT32) wsptr[6];
+
+ tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
+ tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
+
+ tmp12 = z1 - tmp10;
+ tmp13 = z1 + tmp11;
+ z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
+
+ z4 = z2 - z3;
+ z3 += z2;
+ tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
+ tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
+ z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
+
+ tmp20 = tmp13 + tmp10 + tmp11;
+ tmp23 = tmp12 - tmp10 + tmp11 + z2;
+
+ tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
+ tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
+
+ tmp25 = tmp13 - tmp10 - tmp11;
+ tmp26 = tmp12 + tmp10 - tmp11 - z2;
+
+ tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
+ tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
+
+ tmp21 = tmp12 + tmp10 + tmp11;
+ tmp24 = tmp13 - tmp10 + tmp11;
+ tmp11 += tmp11;
+ tmp22 = z1 + tmp11; /* c10 = c6-c12 */
+ tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z4 = (INT32) wsptr[5];
+ z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
+ z4 = (INT32) wsptr[7];
+
+ tmp13 = z2 - z4;
+ tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
+ tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
+ tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
+
+ tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
+ tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
+ z2 = z1 - z4;
+ tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
+
+ tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
+ tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
+ tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
+ z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
+ tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
+ tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp20 + tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[14] = range_limit[(int) DESCALE(tmp20 - tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp21 + tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[13] = range_limit[(int) DESCALE(tmp21 - tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp22 + tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[12] = range_limit[(int) DESCALE(tmp22 - tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp23 + tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[11] = range_limit[(int) DESCALE(tmp23 - tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp24 + tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[10] = range_limit[(int) DESCALE(tmp24 - tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp25 + tmp15,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[9] = range_limit[(int) DESCALE(tmp25 - tmp15,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp26 + tmp16,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[8] = range_limit[(int) DESCALE(tmp26 - tmp16,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[7] = range_limit[(int) DESCALE(tmp27,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 8; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 16x16 output block.
+ *
+ * Optimized algorithm with 28 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/32).
+ */
+
+GLOBAL(void)
+jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+ INT32 z1, z2, z3, z4;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[8*16]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp0 <<= CONST_BITS;
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
+ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
+
+ tmp10 = tmp0 + tmp1;
+ tmp11 = tmp0 - tmp1;
+ tmp12 = tmp0 + tmp2;
+ tmp13 = tmp0 - tmp2;
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+ z3 = z1 - z2;
+ z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
+ z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
+
+ tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
+ tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
+ tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+ tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+ tmp20 = tmp10 + tmp0;
+ tmp27 = tmp10 - tmp0;
+ tmp21 = tmp12 + tmp1;
+ tmp26 = tmp12 - tmp1;
+ tmp22 = tmp13 + tmp2;
+ tmp25 = tmp13 - tmp2;
+ tmp23 = tmp11 + tmp3;
+ tmp24 = tmp11 - tmp3;
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+ tmp11 = z1 + z3;
+
+ tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
+ tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
+ tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
+ tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
+ tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
+ tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
+ tmp0 = tmp1 + tmp2 + tmp3 -
+ MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
+ tmp13 = tmp10 + tmp11 + tmp12 -
+ MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
+ z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
+ tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
+ tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
+ z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
+ tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
+ tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
+ z2 += z4;
+ z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
+ tmp1 += z1;
+ tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
+ z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
+ tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
+ tmp12 += z2;
+ z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+ tmp2 += z2;
+ tmp3 += z2;
+ z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
+ tmp10 += z2;
+ tmp11 += z2;
+
+ /* Final output stage */
+
+ wsptr[8*0] = (int) DESCALE(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[8*15] = (int) DESCALE(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[8*1] = (int) DESCALE(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[8*14] = (int) DESCALE(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[8*2] = (int) DESCALE(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
+ wsptr[8*13] = (int) DESCALE(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
+ wsptr[8*3] = (int) DESCALE(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
+ wsptr[8*12] = (int) DESCALE(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
+ wsptr[8*4] = (int) DESCALE(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*11] = (int) DESCALE(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*5] = (int) DESCALE(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*10] = (int) DESCALE(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*6] = (int) DESCALE(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*9] = (int) DESCALE(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*7] = (int) DESCALE(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[8*8] = (int) DESCALE(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 16 rows from work array, store into output array. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 16; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp0 = (INT32) wsptr[0];
+ tmp0 <<= CONST_BITS;
+
+ z1 = (INT32) wsptr[4];
+ tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
+ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
+
+ tmp10 = tmp0 + tmp1;
+ tmp11 = tmp0 - tmp1;
+ tmp12 = tmp0 + tmp2;
+ tmp13 = tmp0 - tmp2;
+
+ z1 = (INT32) wsptr[2];
+ z2 = (INT32) wsptr[6];
+ z3 = z1 - z2;
+ z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
+ z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
+
+ tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
+ tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
+ tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+ tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+ tmp20 = tmp10 + tmp0;
+ tmp27 = tmp10 - tmp0;
+ tmp21 = tmp12 + tmp1;
+ tmp26 = tmp12 - tmp1;
+ tmp22 = tmp13 + tmp2;
+ tmp25 = tmp13 - tmp2;
+ tmp23 = tmp11 + tmp3;
+ tmp24 = tmp11 - tmp3;
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z3 = (INT32) wsptr[5];
+ z4 = (INT32) wsptr[7];
+
+ tmp11 = z1 + z3;
+
+ tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
+ tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
+ tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
+ tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
+ tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
+ tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
+ tmp0 = tmp1 + tmp2 + tmp3 -
+ MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
+ tmp13 = tmp10 + tmp11 + tmp12 -
+ MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
+ z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
+ tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
+ tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
+ z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
+ tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
+ tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
+ z2 += z4;
+ z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
+ tmp1 += z1;
+ tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
+ z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
+ tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
+ tmp12 += z2;
+ z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+ tmp2 += z2;
+ tmp3 += z2;
+ z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
+ tmp10 += z2;
+ tmp11 += z2;
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp20 + tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[15] = range_limit[(int) DESCALE(tmp20 - tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp21 + tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[14] = range_limit[(int) DESCALE(tmp21 - tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp22 + tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[13] = range_limit[(int) DESCALE(tmp22 - tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp23 + tmp3,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[12] = range_limit[(int) DESCALE(tmp23 - tmp3,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp24 + tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[11] = range_limit[(int) DESCALE(tmp24 - tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp25 + tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[10] = range_limit[(int) DESCALE(tmp25 - tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp26 + tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[9] = range_limit[(int) DESCALE(tmp26 - tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[7] = range_limit[(int) DESCALE(tmp27 + tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[8] = range_limit[(int) DESCALE(tmp27 - tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 8; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 16x8 output block.
+ *
+ * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+ INT32 z1, z2, z3, z4, z5;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[8*8]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+ /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = DCTSIZE; ctr > 0; ctr--) {
+ /* Due to quantization, we will usually find that many of the input
+ * coefficients are zero, especially the AC terms. We can exploit this
+ * by short-circuiting the IDCT calculation for any column in which all
+ * the AC terms are zero. In that case each output is equal to the
+ * DC coefficient (with scale factor as needed).
+ * With typical images and quantization tables, half or more of the
+ * column DCT calculations can be simplified this way.
+ */
+
+ if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+ inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+ inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+ inptr[DCTSIZE*7] == 0) {
+ /* AC terms all zero */
+ int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+
+ wsptr[DCTSIZE*0] = dcval;
+ wsptr[DCTSIZE*1] = dcval;
+ wsptr[DCTSIZE*2] = dcval;
+ wsptr[DCTSIZE*3] = dcval;
+ wsptr[DCTSIZE*4] = dcval;
+ wsptr[DCTSIZE*5] = dcval;
+ wsptr[DCTSIZE*6] = dcval;
+ wsptr[DCTSIZE*7] = dcval;
+
+ inptr++; /* advance pointers to next column */
+ quantptr++;
+ wsptr++;
+ continue;
+ }
+
+ /* Even part: reverse the even part of the forward DCT. */
+ /* The rotator is sqrt(2)*c(-6). */
+
+ z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+ z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+ tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
+ tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+
+ z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+
+ tmp0 = (z2 + z3) << CONST_BITS;
+ tmp1 = (z2 - z3) << CONST_BITS;
+
+ tmp10 = tmp0 + tmp3;
+ tmp13 = tmp0 - tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp12 = tmp1 - tmp2;
+
+ /* Odd part per figure 8; the matrix is unitary and hence its
+ * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
+ */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+ tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+ z1 = tmp0 + tmp3;
+ z2 = tmp1 + tmp2;
+ z3 = tmp0 + tmp2;
+ z4 = tmp1 + tmp3;
+ z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+ tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+ tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+
+ z3 += z5;
+ z4 += z5;
+
+ tmp0 += z1 + z3;
+ tmp1 += z2 + z4;
+ tmp2 += z2 + z3;
+ tmp3 += z1 + z4;
+
+ /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+ wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+ wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+ wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+ wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+ wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+
+ inptr++; /* advance pointers to next column */
+ quantptr++;
+ wsptr++;
+ }
+
+ /* Pass 2: process 8 rows from work array, store into output array.
+ * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
+ */
+ wsptr = workspace;
+ for (ctr = 0; ctr < 8; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp0 = (INT32) wsptr[0];
+ tmp0 <<= CONST_BITS;
+
+ z1 = (INT32) wsptr[4];
+ tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
+ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
+
+ tmp10 = tmp0 + tmp1;
+ tmp11 = tmp0 - tmp1;
+ tmp12 = tmp0 + tmp2;
+ tmp13 = tmp0 - tmp2;
+
+ z1 = (INT32) wsptr[2];
+ z2 = (INT32) wsptr[6];
+ z3 = z1 - z2;
+ z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
+ z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
+
+ tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
+ tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
+ tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+ tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+ tmp20 = tmp10 + tmp0;
+ tmp27 = tmp10 - tmp0;
+ tmp21 = tmp12 + tmp1;
+ tmp26 = tmp12 - tmp1;
+ tmp22 = tmp13 + tmp2;
+ tmp25 = tmp13 - tmp2;
+ tmp23 = tmp11 + tmp3;
+ tmp24 = tmp11 - tmp3;
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z3 = (INT32) wsptr[5];
+ z4 = (INT32) wsptr[7];
+
+ tmp11 = z1 + z3;
+
+ tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
+ tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
+ tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
+ tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
+ tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
+ tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
+ tmp0 = tmp1 + tmp2 + tmp3 -
+ MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
+ tmp13 = tmp10 + tmp11 + tmp12 -
+ MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
+ z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
+ tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
+ tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
+ z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
+ tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
+ tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
+ z2 += z4;
+ z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
+ tmp1 += z1;
+ tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
+ z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
+ tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
+ tmp12 += z2;
+ z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+ tmp2 += z2;
+ tmp3 += z2;
+ z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
+ tmp10 += z2;
+ tmp11 += z2;
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp20 + tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[15] = range_limit[(int) DESCALE(tmp20 - tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp21 + tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[14] = range_limit[(int) DESCALE(tmp21 - tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp22 + tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[13] = range_limit[(int) DESCALE(tmp22 - tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp23 + tmp3,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[12] = range_limit[(int) DESCALE(tmp23 - tmp3,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp24 + tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[11] = range_limit[(int) DESCALE(tmp24 - tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp25 + tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[10] = range_limit[(int) DESCALE(tmp25 - tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp26 + tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[9] = range_limit[(int) DESCALE(tmp26 - tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[7] = range_limit[(int) DESCALE(tmp27 + tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[8] = range_limit[(int) DESCALE(tmp27 - tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 8; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 14x7 output block.
+ *
+ * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+ INT32 z1, z2, z3, z4;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[8*7]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array.
+ * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
+ */
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp23 <<= CONST_BITS;
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+ tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
+ tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
+ tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+ tmp10 = z1 + z3;
+ z2 -= tmp10;
+ tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
+ tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
+ tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
+ tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+
+ tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
+ tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
+ tmp10 = tmp11 - tmp12;
+ tmp11 += tmp12;
+ tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
+ tmp11 += tmp12;
+ z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
+ tmp10 += z2;
+ tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
+
+ /* Final output stage */
+
+ wsptr[8*0] = (int) DESCALE(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*6] = (int) DESCALE(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*1] = (int) DESCALE(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*5] = (int) DESCALE(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*2] = (int) DESCALE(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*4] = (int) DESCALE(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*3] = (int) DESCALE(tmp23, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 7 rows from work array, store into output array.
+ * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
+ */
+ wsptr = workspace;
+ for (ctr = 0; ctr < 7; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ z1 = (INT32) wsptr[0];
+ z1 <<= CONST_BITS;
+ z4 = (INT32) wsptr[4];
+ z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
+ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
+ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
+
+ tmp10 = z1 + z2;
+ tmp11 = z1 + z3;
+ tmp12 = z1 - z4;
+
+ tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
+
+ z1 = (INT32) wsptr[2];
+ z2 = (INT32) wsptr[6];
+
+ z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
+
+ tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+ tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+ tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
+ MULTIPLY(z2, FIX(1.378756276)); /* c2 */
+
+ tmp20 = tmp10 + tmp13;
+ tmp26 = tmp10 - tmp13;
+ tmp21 = tmp11 + tmp14;
+ tmp25 = tmp11 - tmp14;
+ tmp22 = tmp12 + tmp15;
+ tmp24 = tmp12 - tmp15;
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z3 = (INT32) wsptr[5];
+ z4 = (INT32) wsptr[7];
+ z4 <<= CONST_BITS;
+
+ tmp14 = z1 + z3;
+ tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
+ tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
+ tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+ tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
+ tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
+ z1 -= z2;
+ tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
+ tmp16 += tmp15;
+ tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
+ tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
+ tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
+ tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
+ tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+ tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
+
+ tmp13 = ((z1 - z3) << CONST_BITS) + z4;
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp20 + tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[13] = range_limit[(int) DESCALE(tmp20 - tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp21 + tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[12] = range_limit[(int) DESCALE(tmp21 - tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp22 + tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[11] = range_limit[(int) DESCALE(tmp22 - tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp23 + tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[10] = range_limit[(int) DESCALE(tmp23 - tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp24 + tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[9] = range_limit[(int) DESCALE(tmp24 - tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp25 + tmp15,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[8] = range_limit[(int) DESCALE(tmp25 - tmp15,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp26 + tmp16,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[7] = range_limit[(int) DESCALE(tmp26 - tmp16,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 8; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 12x6 output block.
+ *
+ * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+ INT32 z1, z2, z3, z4;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[8*6]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array.
+ * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+ */
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp10 <<= CONST_BITS;
+ tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
+ tmp11 = tmp10 + tmp20;
+ tmp21 = DESCALE(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
+ tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
+ tmp20 = tmp11 + tmp10;
+ tmp22 = tmp11 - tmp10;
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+ tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
+ tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
+ tmp11 = (z1 - z2 - z3) << PASS1_BITS;
+
+ /* Final output stage */
+
+ wsptr[8*0] = DESCALE(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*5] = DESCALE(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*1] = (int) (tmp21 + tmp11);
+ wsptr[8*4] = (int) (tmp21 - tmp11);
+ wsptr[8*2] = DESCALE(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*3] = DESCALE(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 6 rows from work array, store into output array.
+ * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
+ */
+ wsptr = workspace;
+ for (ctr = 0; ctr < 6; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp0 = (INT32) wsptr[0];
+ tmp0 <<= CONST_BITS;
+
+ z1 = (INT32) wsptr[4];
+ tmp1 = MULTIPLY(z1, FIX(1.224744871)); /* c4 */
+
+ tmp10 = tmp0 + tmp1;
+ tmp11 = tmp0 - tmp1;
+
+ z1 = (INT32) wsptr[2];
+ tmp1 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+ z1 <<= CONST_BITS;
+ z2 = (INT32) wsptr[6];
+ z2 <<= CONST_BITS;
+
+ tmp12 = z1 - z2;
+
+ tmp21 = tmp0 + tmp12;
+ tmp24 = tmp0 - tmp12;
+
+ tmp12 = tmp1 + z2;
+
+ tmp20 = tmp10 + tmp12;
+ tmp25 = tmp10 - tmp12;
+
+ tmp12 = tmp1 - z1 - z2;
+
+ tmp22 = tmp11 + tmp12;
+ tmp23 = tmp11 - tmp12;
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z3 = (INT32) wsptr[5];
+ z4 = (INT32) wsptr[7];
+
+ tmp0 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
+ tmp1 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
+
+ tmp10 = z1 - z4;
+ z2 -= z3;
+ tmp12 = MULTIPLY(tmp10 + z2, FIX_0_541196100); /* c9 */
+ tmp11 = tmp12 + MULTIPLY(tmp10, FIX_0_765366865); /* c3-c9 */
+ tmp14 = tmp12 + MULTIPLY(z2, - FIX_1_847759065); /* c3+c9 */
+
+ z2 = z1 + z3;
+ tmp15 = MULTIPLY(z2 + z4, FIX(0.860918669)); /* c7 */
+ tmp12 = tmp15 + MULTIPLY(z2, FIX(0.261052384)); /* c5-c7 */
+ tmp10 = tmp12 + tmp0 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
+ tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
+ tmp12 += tmp13 + tmp1 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+ tmp13 += tmp15 - tmp0 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+ tmp15 += tmp1 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
+ MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp20 + tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[11] = range_limit[(int) DESCALE(tmp20 - tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp21 + tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[10] = range_limit[(int) DESCALE(tmp21 - tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp22 + tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[9] = range_limit[(int) DESCALE(tmp22 - tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp23 + tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[8] = range_limit[(int) DESCALE(tmp23 - tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp24 + tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[7] = range_limit[(int) DESCALE(tmp24 - tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp25 + tmp15,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp25 - tmp15,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 8; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 10x5 output block.
+ *
+ * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+ INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
+ INT32 z1, z2, z3, z4;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[8*5]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array.
+ * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
+ */
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp12 <<= CONST_BITS;
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+ z3 = tmp12 + z2;
+ tmp10 = z3 + z1;
+ tmp11 = z3 - z1;
+ tmp12 -= z2 << 2;
+
+ /* Odd part */
+
+ z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
+ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
+ tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
+
+ /* Final output stage */
+
+ wsptr[8*0] = DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[8*4] = DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[8*1] = DESCALE(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[8*3] = DESCALE(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[8*2] = DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 5 rows from work array, store into output array.
+ * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
+ */
+ wsptr = workspace;
+ for (ctr = 0; ctr < 5; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp0 = (INT32) wsptr[0];
+ tmp0 <<= CONST_BITS;
+ tmp1 = (INT32) wsptr[4];
+ z1 = MULTIPLY(tmp1, FIX(1.144122806)); /* c4 */
+ z2 = MULTIPLY(tmp1, FIX(0.437016024)); /* c8 */
+ tmp10 = tmp0 + z1;
+ tmp11 = tmp0 - z2;
+
+ tmp22 = tmp0 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
+
+ z2 = (INT32) wsptr[2];
+ z3 = (INT32) wsptr[6];
+
+ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
+ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+ tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+ tmp20 = tmp10 + tmp0;
+ tmp24 = tmp10 - tmp0;
+ tmp21 = tmp11 + tmp1;
+ tmp23 = tmp11 - tmp1;
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z3 = (INT32) wsptr[5];
+ z3 <<= CONST_BITS;
+ z4 = (INT32) wsptr[7];
+
+ tmp0 = z2 + z4;
+ tmp1 = z2 - z4;
+
+ tmp12 = MULTIPLY(tmp1, FIX(0.309016994)); /* (c3-c7)/2 */
+
+ z2 = MULTIPLY(tmp0, FIX(0.951056516)); /* (c3+c7)/2 */
+ z4 = z3 + tmp12;
+
+ tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+ tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+ z2 = MULTIPLY(tmp0, FIX(0.587785252)); /* (c1-c9)/2 */
+ z4 = z3 - tmp12 - (tmp1 << (CONST_BITS - 1));
+
+ tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+ tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+ tmp12 = ((z1 - tmp1) << CONST_BITS) - z3;
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp20 + tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[9] = range_limit[(int) DESCALE(tmp20 - tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp21 + tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[8] = range_limit[(int) DESCALE(tmp21 - tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp22 + tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[7] = range_limit[(int) DESCALE(tmp22 - tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp23 + tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp23 - tmp13,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp24 + tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp24 - tmp14,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 8; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 8x4 output block.
+ *
+ * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3;
+ INT32 tmp10, tmp11, tmp12, tmp13;
+ INT32 z1, z2, z3, z4, z5;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[8*4]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array.
+ * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+ */
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+
+ tmp10 = (tmp0 + tmp2) << PASS1_BITS;
+ tmp12 = (tmp0 - tmp2) << PASS1_BITS;
+
+ /* Odd part */
+ /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+ z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+ z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
+ tmp0 = DESCALE(z1 + MULTIPLY(z3, - FIX_1_847759065), /* -(c2+c6) */
+ CONST_BITS-PASS1_BITS);
+ tmp2 = DESCALE(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
+ CONST_BITS-PASS1_BITS);
+
+ /* Final output stage */
+
+ wsptr[8*0] = (int) (tmp10 + tmp2);
+ wsptr[8*3] = (int) (tmp10 - tmp2);
+ wsptr[8*1] = (int) (tmp12 + tmp0);
+ wsptr[8*2] = (int) (tmp12 - tmp0);
+ }
+
+ /* Pass 2: process rows from work array, store into output array. */
+ /* Note that we must descale the results by a factor of 8 == 2**3, */
+ /* and also undo the PASS1_BITS scaling. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 4; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part: reverse the even part of the forward DCT. */
+ /* The rotator is sqrt(2)*c(-6). */
+
+ z2 = (INT32) wsptr[2];
+ z3 = (INT32) wsptr[6];
+
+ z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+ tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
+ tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+
+ tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
+ tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
+
+ tmp10 = tmp0 + tmp3;
+ tmp13 = tmp0 - tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp12 = tmp1 - tmp2;
+
+ /* Odd part per figure 8; the matrix is unitary and hence its
+ * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
+ */
+
+ tmp0 = (INT32) wsptr[7];
+ tmp1 = (INT32) wsptr[5];
+ tmp2 = (INT32) wsptr[3];
+ tmp3 = (INT32) wsptr[1];
+
+ z1 = tmp0 + tmp3;
+ z2 = tmp1 + tmp2;
+ z3 = tmp0 + tmp2;
+ z4 = tmp1 + tmp3;
+ z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+ tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+ tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+
+ z3 += z5;
+ z4 += z5;
+
+ tmp0 += z1 + z3;
+ tmp1 += z2 + z4;
+ tmp2 += z2 + z3;
+ tmp3 += z1 + z4;
+
+ /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += DCTSIZE; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 6x3 output block.
+ *
+ * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+ INT32 z1, z2, z3;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[6*3]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array.
+ * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
+ */
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp0 <<= CONST_BITS;
+ tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+ tmp10 = tmp0 + tmp12;
+ tmp2 = tmp0 - tmp12 - tmp12;
+
+ /* Odd part */
+
+ tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+ /* Final output stage */
+
+ wsptr[6*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[6*2] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[6*1] = (int) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 3 rows from work array, store into output array.
+ * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+ */
+ wsptr = workspace;
+ for (ctr = 0; ctr < 3; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp0 = (INT32) wsptr[0];
+ tmp0 <<= CONST_BITS;
+ tmp2 = (INT32) wsptr[4];
+ tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
+ tmp1 = tmp0 + tmp10;
+ tmp11 = tmp0 - tmp10 - tmp10;
+ tmp10 = (INT32) wsptr[2];
+ tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
+ tmp10 = tmp1 + tmp0;
+ tmp12 = tmp1 - tmp0;
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z3 = (INT32) wsptr[5];
+ tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+ z1 <<= CONST_BITS;
+ z2 <<= CONST_BITS;
+ z3 <<= CONST_BITS;
+ tmp0 = tmp1 + z1 + z2;
+ tmp2 = tmp1 - z2 + z3;
+ tmp1 = z1 - z2 - z3;
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp10 - tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp11 - tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp12 - tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 6; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 4x2 output block.
+ *
+ * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp2, tmp10, tmp12;
+ INT32 z1, z2, z3;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ INT32 * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ INT32 workspace[4*2]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+
+ /* Odd part */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+ /* Final output stage */
+
+ wsptr[4*0] = tmp10 + tmp0;
+ wsptr[4*1] = tmp10 - tmp0;
+ }
+
+ /* Pass 2: process 2 rows from work array, store into output array.
+ * 4-point IDCT kernel,
+ * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+ */
+ wsptr = workspace;
+ for (ctr = 0; ctr < 2; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp0 = wsptr[0];
+ tmp2 = wsptr[2];
+
+ tmp10 = (tmp0 + tmp2) << CONST_BITS;
+ tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+ /* Odd part */
+ /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+ z2 = wsptr[1];
+ z3 = wsptr[3];
+
+ z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
+ tmp0 = z1 + MULTIPLY(z3, - FIX_1_847759065); /* -(c2+c6) */
+ tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2,
+ CONST_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2,
+ CONST_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0,
+ CONST_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0,
+ CONST_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 4; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 2x1 output block.
+ *
+ * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp10;
+ ISLOW_MULT_TYPE * quantptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ SHIFT_TEMPS
+
+ /* Pass 1: empty. */
+
+ /* Pass 2: process 1 row from input, store into output array. */
+
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ outptr = output_buf[0] + output_col;
+
+ /* Even part */
+
+ tmp10 = DEQUANTIZE(coef_block[0], quantptr[0]);
+
+ /* Odd part */
+
+ tmp0 = DEQUANTIZE(coef_block[1], quantptr[1]);
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0, 3) & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp10 - tmp0, 3) & RANGE_MASK];
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 8x16 output block.
+ *
+ * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+ INT32 z1, z2, z3, z4, z5;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[8*16]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array.
+ * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
+ */
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp0 <<= CONST_BITS;
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
+ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
+
+ tmp10 = tmp0 + tmp1;
+ tmp11 = tmp0 - tmp1;
+ tmp12 = tmp0 + tmp2;
+ tmp13 = tmp0 - tmp2;
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+ z3 = z1 - z2;
+ z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
+ z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
+
+ tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
+ tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
+ tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+ tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+ tmp20 = tmp10 + tmp0;
+ tmp27 = tmp10 - tmp0;
+ tmp21 = tmp12 + tmp1;
+ tmp26 = tmp12 - tmp1;
+ tmp22 = tmp13 + tmp2;
+ tmp25 = tmp13 - tmp2;
+ tmp23 = tmp11 + tmp3;
+ tmp24 = tmp11 - tmp3;
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+ tmp11 = z1 + z3;
+
+ tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
+ tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
+ tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
+ tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
+ tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
+ tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
+ tmp0 = tmp1 + tmp2 + tmp3 -
+ MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
+ tmp13 = tmp10 + tmp11 + tmp12 -
+ MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
+ z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
+ tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
+ tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
+ z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
+ tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
+ tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
+ z2 += z4;
+ z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
+ tmp1 += z1;
+ tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
+ z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
+ tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
+ tmp12 += z2;
+ z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+ tmp2 += z2;
+ tmp3 += z2;
+ z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
+ tmp10 += z2;
+ tmp11 += z2;
+
+ /* Final output stage */
+
+ wsptr[8*0] = (int) DESCALE(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[8*15] = (int) DESCALE(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[8*1] = (int) DESCALE(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[8*14] = (int) DESCALE(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[8*2] = (int) DESCALE(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
+ wsptr[8*13] = (int) DESCALE(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
+ wsptr[8*3] = (int) DESCALE(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
+ wsptr[8*12] = (int) DESCALE(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
+ wsptr[8*4] = (int) DESCALE(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*11] = (int) DESCALE(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[8*5] = (int) DESCALE(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*10] = (int) DESCALE(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[8*6] = (int) DESCALE(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*9] = (int) DESCALE(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[8*7] = (int) DESCALE(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[8*8] = (int) DESCALE(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process rows from work array, store into output array. */
+ /* Note that we must descale the results by a factor of 8 == 2**3, */
+ /* and also undo the PASS1_BITS scaling. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 16; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part: reverse the even part of the forward DCT. */
+ /* The rotator is sqrt(2)*c(-6). */
+
+ z2 = (INT32) wsptr[2];
+ z3 = (INT32) wsptr[6];
+
+ z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+ tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
+ tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+
+ tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
+ tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
+
+ tmp10 = tmp0 + tmp3;
+ tmp13 = tmp0 - tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp12 = tmp1 - tmp2;
+
+ /* Odd part per figure 8; the matrix is unitary and hence its
+ * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
+ */
+
+ tmp0 = (INT32) wsptr[7];
+ tmp1 = (INT32) wsptr[5];
+ tmp2 = (INT32) wsptr[3];
+ tmp3 = (INT32) wsptr[1];
+
+ z1 = tmp0 + tmp3;
+ z2 = tmp1 + tmp2;
+ z3 = tmp0 + tmp2;
+ z4 = tmp1 + tmp3;
+ z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+ tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+ tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+
+ z3 += z5;
+ z4 += z5;
+
+ tmp0 += z1 + z3;
+ tmp1 += z2 + z4;
+ tmp2 += z2 + z3;
+ tmp3 += z1 + z4;
+
+ /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += DCTSIZE; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 7x14 output block.
+ *
+ * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+ INT32 z1, z2, z3, z4;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[7*14]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array.
+ * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
+ */
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ z1 <<= CONST_BITS;
+ z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
+ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
+ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
+
+ tmp10 = z1 + z2;
+ tmp11 = z1 + z3;
+ tmp12 = z1 - z4;
+
+ tmp23 = DESCALE(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
+ CONST_BITS-PASS1_BITS);
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+ z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
+
+ tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+ tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+ tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
+ MULTIPLY(z2, FIX(1.378756276)); /* c2 */
+
+ tmp20 = tmp10 + tmp13;
+ tmp26 = tmp10 - tmp13;
+ tmp21 = tmp11 + tmp14;
+ tmp25 = tmp11 - tmp14;
+ tmp22 = tmp12 + tmp15;
+ tmp24 = tmp12 - tmp15;
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+ tmp13 = z4 << CONST_BITS;
+
+ tmp14 = z1 + z3;
+ tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
+ tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
+ tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+ tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
+ tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
+ z1 -= z2;
+ tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
+ tmp16 += tmp15;
+ z1 += z4;
+ z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
+ tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
+ tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
+ z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
+ tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+ tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
+
+ tmp13 = (z1 - z3) << PASS1_BITS;
+
+ /* Final output stage */
+
+ wsptr[7*0] = (int) DESCALE(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[7*13] = (int) DESCALE(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[7*1] = (int) DESCALE(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[7*12] = (int) DESCALE(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[7*2] = (int) DESCALE(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[7*11] = (int) DESCALE(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[7*3] = (int) (tmp23 + tmp13);
+ wsptr[7*10] = (int) (tmp23 - tmp13);
+ wsptr[7*4] = (int) DESCALE(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[7*9] = (int) DESCALE(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[7*5] = (int) DESCALE(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+ wsptr[7*8] = (int) DESCALE(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+ wsptr[7*6] = (int) DESCALE(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+ wsptr[7*7] = (int) DESCALE(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 14 rows from work array, store into output array.
+ * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
+ */
+ wsptr = workspace;
+ for (ctr = 0; ctr < 14; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp23 = (INT32) wsptr[0];
+ tmp23 <<= CONST_BITS;
+
+ z1 = (INT32) wsptr[2];
+ z2 = (INT32) wsptr[4];
+ z3 = (INT32) wsptr[6];
+
+ tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
+ tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
+ tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+ tmp10 = z1 + z3;
+ z2 -= tmp10;
+ tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
+ tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
+ tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
+ tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+
+ tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
+ tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
+ tmp10 = tmp11 - tmp12;
+ tmp11 += tmp12;
+ tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
+ tmp11 += tmp12;
+ z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
+ tmp10 += z2;
+ tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp20 + tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[6] = range_limit[(int) DESCALE(tmp20 - tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp21 + tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp21 - tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp22 + tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp22 - tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp23,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 7; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 6x12 output block.
+ *
+ * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+ INT32 z1, z2, z3, z4;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[6*12]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array.
+ * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
+ */
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp0 <<= CONST_BITS;
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ tmp1 = MULTIPLY(z1, FIX(1.224744871)); /* c4 */
+
+ tmp10 = tmp0 + tmp1;
+ tmp11 = tmp0 - tmp1;
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ tmp1 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+ z1 <<= CONST_BITS;
+ z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+ z2 <<= CONST_BITS;
+
+ tmp12 = z1 - z2;
+
+ tmp21 = tmp0 + tmp12;
+ tmp24 = tmp0 - tmp12;
+
+ tmp12 = tmp1 + z2;
+
+ tmp20 = tmp10 + tmp12;
+ tmp25 = tmp10 - tmp12;
+
+ tmp12 = tmp1 - z1 - z2;
+
+ tmp22 = tmp11 + tmp12;
+ tmp23 = tmp11 - tmp12;
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+ tmp0 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
+ tmp1 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
+
+ tmp10 = z1 - z4;
+ z2 -= z3;
+ tmp12 = MULTIPLY(tmp10 + z2, FIX_0_541196100); /* c9 */
+ tmp11 = tmp12 + MULTIPLY(tmp10, FIX_0_765366865); /* c3-c9 */
+ tmp14 = tmp12 + MULTIPLY(z2, - FIX_1_847759065); /* c3+c9 */
+
+ z2 = z1 + z3;
+ tmp15 = MULTIPLY(z2 + z4, FIX(0.860918669)); /* c7 */
+ tmp12 = tmp15 + MULTIPLY(z2, FIX(0.261052384)); /* c5-c7 */
+ tmp10 = tmp12 + tmp0 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
+ tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
+ tmp12 += tmp13 + tmp1 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+ tmp13 += tmp15 - tmp0 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+ tmp15 += tmp1 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
+ MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
+
+ /* Final output stage */
+
+ wsptr[6*0] = (int) DESCALE(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[6*11] = (int) DESCALE(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[6*1] = (int) DESCALE(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[6*10] = (int) DESCALE(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[6*2] = (int) DESCALE(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[6*9] = (int) DESCALE(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+ wsptr[6*3] = (int) DESCALE(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[6*8] = (int) DESCALE(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[6*4] = (int) DESCALE(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[6*7] = (int) DESCALE(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[6*5] = (int) DESCALE(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+ wsptr[6*6] = (int) DESCALE(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 12 rows from work array, store into output array.
+ * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+ */
+ wsptr = workspace;
+ for (ctr = 0; ctr < 12; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp10 = (INT32) wsptr[0];
+ tmp10 <<= CONST_BITS;
+ tmp12 = (INT32) wsptr[4];
+ tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
+ tmp11 = tmp10 + tmp20;
+ tmp21 = tmp10 - tmp20 - tmp20;
+ tmp20 = (INT32) wsptr[2];
+ tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
+ tmp20 = tmp11 + tmp10;
+ tmp22 = tmp11 - tmp10;
+
+ /* Odd part */
+
+ z1 = (INT32) wsptr[1];
+ z2 = (INT32) wsptr[3];
+ z3 = (INT32) wsptr[5];
+ tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+ z1 <<= CONST_BITS;
+ z2 <<= CONST_BITS;
+ z3 <<= CONST_BITS;
+ tmp10 = tmp11 + z1 + z2;
+ tmp12 = tmp11 - z2 + z3;
+ tmp11 = z1 - z2 - z3;
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp20 + tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[5] = range_limit[(int) DESCALE(tmp20 - tmp10,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp21 + tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp21 - tmp11,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp22 + tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp22 - tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 6; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 5x10 output block.
+ *
+ * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1;
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+ INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
+ INT32 z1, z2, z3, z4;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[5*10]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array.
+ * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
+ */
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp0 <<= CONST_BITS;
+ tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ z1 = MULTIPLY(tmp1, FIX(1.144122806)); /* c4 */
+ z2 = MULTIPLY(tmp1, FIX(0.437016024)); /* c8 */
+ tmp10 = tmp0 + z1;
+ tmp11 = tmp0 - z2;
+
+ tmp22 = DESCALE(tmp0 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
+ CONST_BITS-PASS1_BITS);
+
+ z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
+ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+ tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+ tmp20 = tmp10 + tmp0;
+ tmp24 = tmp10 - tmp0;
+ tmp21 = tmp11 + tmp1;
+ tmp23 = tmp11 - tmp1;
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+ tmp0 = z2 + z4;
+ tmp1 = z2 - z4;
+
+ tmp12 = MULTIPLY(tmp1, FIX(0.309016994)); /* (c3-c7)/2 */
+ tmp13 = z3 << CONST_BITS;
+
+ z2 = MULTIPLY(tmp0, FIX(0.951056516)); /* (c3+c7)/2 */
+ z4 = tmp13 + tmp12;
+
+ tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+ tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+ z2 = MULTIPLY(tmp0, FIX(0.587785252)); /* (c1-c9)/2 */
+ z4 = tmp13 - tmp12 - (tmp1 << (CONST_BITS - 1));
+
+ tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+ tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+ tmp12 = (z1 - tmp1 - z3) << PASS1_BITS;
+
+ /* Final output stage */
+
+ wsptr[5*0] = (int) DESCALE(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[5*9] = (int) DESCALE(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+ wsptr[5*1] = (int) DESCALE(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[5*8] = (int) DESCALE(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+ wsptr[5*2] = (int) (tmp22 + tmp12);
+ wsptr[5*7] = (int) (tmp22 - tmp12);
+ wsptr[5*3] = (int) DESCALE(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[5*6] = (int) DESCALE(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+ wsptr[5*4] = (int) DESCALE(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+ wsptr[5*5] = (int) DESCALE(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 10 rows from work array, store into output array.
+ * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
+ */
+ wsptr = workspace;
+ for (ctr = 0; ctr < 10; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp12 = (INT32) wsptr[0];
+ tmp12 <<= CONST_BITS;
+ tmp0 = (INT32) wsptr[2];
+ tmp1 = (INT32) wsptr[4];
+ z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+ z3 = tmp12 + z2;
+ tmp10 = z3 + z1;
+ tmp11 = z3 - z1;
+ tmp12 -= z2 << 2;
+
+ /* Odd part */
+
+ z2 = (INT32) wsptr[1];
+ z3 = (INT32) wsptr[3];
+
+ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
+ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
+ tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[4] = range_limit[(int) DESCALE(tmp10 - tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp11 - tmp1,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp12,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 5; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 4x8 output block.
+ *
+ * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp3;
+ INT32 tmp10, tmp11, tmp12, tmp13;
+ INT32 z1, z2, z3, z4, z5;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[4*8]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array. */
+ /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 4; ctr > 0; ctr--) {
+ /* Due to quantization, we will usually find that many of the input
+ * coefficients are zero, especially the AC terms. We can exploit this
+ * by short-circuiting the IDCT calculation for any column in which all
+ * the AC terms are zero. In that case each output is equal to the
+ * DC coefficient (with scale factor as needed).
+ * With typical images and quantization tables, half or more of the
+ * column DCT calculations can be simplified this way.
+ */
+
+ if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+ inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+ inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+ inptr[DCTSIZE*7] == 0) {
+ /* AC terms all zero */
+ int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+
+ wsptr[4*0] = dcval;
+ wsptr[4*1] = dcval;
+ wsptr[4*2] = dcval;
+ wsptr[4*3] = dcval;
+ wsptr[4*4] = dcval;
+ wsptr[4*5] = dcval;
+ wsptr[4*6] = dcval;
+ wsptr[4*7] = dcval;
+
+ inptr++; /* advance pointers to next column */
+ quantptr++;
+ wsptr++;
+ continue;
+ }
+
+ /* Even part: reverse the even part of the forward DCT. */
+ /* The rotator is sqrt(2)*c(-6). */
+
+ z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+ z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+ tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
+ tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+
+ z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+
+ tmp0 = (z2 + z3) << CONST_BITS;
+ tmp1 = (z2 - z3) << CONST_BITS;
+
+ tmp10 = tmp0 + tmp3;
+ tmp13 = tmp0 - tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp12 = tmp1 - tmp2;
+
+ /* Odd part per figure 8; the matrix is unitary and hence its
+ * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
+ */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+ tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+ z1 = tmp0 + tmp3;
+ z2 = tmp1 + tmp2;
+ z3 = tmp0 + tmp2;
+ z4 = tmp1 + tmp3;
+ z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+ tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+ tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+
+ z3 += z5;
+ z4 += z5;
+
+ tmp0 += z1 + z3;
+ tmp1 += z2 + z4;
+ tmp2 += z2 + z3;
+ tmp3 += z1 + z4;
+
+ /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+ wsptr[4*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+ wsptr[4*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+ wsptr[4*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+ wsptr[4*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+ wsptr[4*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[4*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+ wsptr[4*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[4*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+
+ inptr++; /* advance pointers to next column */
+ quantptr++;
+ wsptr++;
+ }
+
+ /* Pass 2: process 8 rows from work array, store into output array.
+ * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+ */
+ wsptr = workspace;
+ for (ctr = 0; ctr < 8; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp0 = (INT32) wsptr[0];
+ tmp2 = (INT32) wsptr[2];
+
+ tmp10 = (tmp0 + tmp2) << CONST_BITS;
+ tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+ /* Odd part */
+ /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+ z2 = (INT32) wsptr[1];
+ z3 = (INT32) wsptr[3];
+
+ z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
+ tmp0 = z1 + MULTIPLY(z3, - FIX_1_847759065); /* -(c2+c6) */
+ tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 4; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 3x6 output block.
+ *
+ * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+ INT32 z1, z2, z3;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ int * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ int workspace[3*6]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array.
+ * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+ */
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp0 <<= CONST_BITS;
+ tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+ tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
+ tmp1 = tmp0 + tmp10;
+ tmp11 = DESCALE(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
+ tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+ tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
+ tmp10 = tmp1 + tmp0;
+ tmp12 = tmp1 - tmp0;
+
+ /* Odd part */
+
+ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+ tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+ tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+ tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+ tmp1 = (z1 - z2 - z3) << PASS1_BITS;
+
+ /* Final output stage */
+
+ wsptr[3*0] = DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[3*5] = DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+ wsptr[3*1] = (int) (tmp11 + tmp1);
+ wsptr[3*4] = (int) (tmp11 - tmp1);
+ wsptr[3*2] = DESCALE(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+ wsptr[3*3] = DESCALE(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+ }
+
+ /* Pass 2: process 6 rows from work array, store into output array.
+ * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
+ */
+ wsptr = workspace;
+ for (ctr = 0; ctr < 6; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp0 = (INT32) wsptr[0];
+ tmp0 <<= CONST_BITS;
+ tmp2 = (INT32) wsptr[2];
+ tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+ tmp10 = tmp0 + tmp12;
+ tmp2 = tmp0 - tmp12 - tmp12;
+
+ /* Odd part */
+
+ tmp12 = (INT32) wsptr[1];
+ tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[2] = range_limit[(int) DESCALE(tmp10 - tmp0,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp2,
+ CONST_BITS+PASS1_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 3; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 2x4 output block.
+ *
+ * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp2, tmp10, tmp12;
+ INT32 z1, z2, z3;
+ JCOEFPTR inptr;
+ ISLOW_MULT_TYPE * quantptr;
+ INT32 * wsptr;
+ JSAMPROW outptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ int ctr;
+ INT32 workspace[2*4]; /* buffers data between passes */
+ SHIFT_TEMPS
+
+ /* Pass 1: process columns from input, store into work array.
+ * 4-point IDCT kernel,
+ * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+ */
+ inptr = coef_block;
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+ wsptr = workspace;
+ for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
+ /* Even part */
+
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+ tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+
+ tmp10 = (tmp0 + tmp2) << CONST_BITS;
+ tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+ /* Odd part */
+ /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+ z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+ z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+ z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
+ tmp0 = z1 + MULTIPLY(z3, - FIX_1_847759065); /* -(c2+c6) */
+ tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+
+ /* Final output stage */
+
+ wsptr[2*0] = tmp10 + tmp2;
+ wsptr[2*3] = tmp10 - tmp2;
+ wsptr[2*1] = tmp12 + tmp0;
+ wsptr[2*2] = tmp12 - tmp0;
+ }
+
+ /* Pass 2: process 4 rows from work array, store into output array. */
+
+ wsptr = workspace;
+ for (ctr = 0; ctr < 4; ctr++) {
+ outptr = output_buf[ctr] + output_col;
+
+ /* Even part */
+
+ tmp10 = wsptr[0];
+
+ /* Odd part */
+
+ tmp0 = wsptr[1];
+
+ /* Final output stage */
+
+ outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0, CONST_BITS+3)
+ & RANGE_MASK];
+ outptr[1] = range_limit[(int) DESCALE(tmp10 - tmp0, CONST_BITS+3)
+ & RANGE_MASK];
+
+ wsptr += 2; /* advance pointer to next row */
+ }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 1x2 output block.
+ *
+ * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ INT32 tmp0, tmp10;
+ ISLOW_MULT_TYPE * quantptr;
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+ SHIFT_TEMPS
+
+ /* Process 1 column from input, store into output array. */
+
+ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+
+ /* Even part */
+
+ tmp10 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
+
+ /* Odd part */
+
+ tmp0 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+ /* Final output stage */
+
+ output_buf[0][output_col] = range_limit[(int) DESCALE(tmp10 + tmp0, 3)
+ & RANGE_MASK];
+ output_buf[1][output_col] = range_limit[(int) DESCALE(tmp10 - tmp0, 3)
+ & RANGE_MASK];
+}
+
+#endif /* IDCT_SCALING_SUPPORTED */
#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/jmorecfg.h b/jmorecfg.h
index 54a7d1c..c88ab1f 100644
--- a/jmorecfg.h
+++ b/jmorecfg.h
@@ -266,9 +266,10 @@ typedef int boolean;
/* Encoder capability options: */
-#undef C_ARITH_CODING_SUPPORTED /* Arithmetic coding back end? */
+#define C_ARITH_CODING_SUPPORTED /* Arithmetic coding back end? */
#define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
#define C_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/
+#define DCT_SCALING_SUPPORTED /* Input rescaling via DCT? (Requires DCT_ISLOW)*/
#define ENTROPY_OPT_SUPPORTED /* Optimization of entropy coding parms? */
/* Note: if you selected 12-bit data precision, it is dangerous to turn off
* ENTROPY_OPT_SUPPORTED. The standard Huffman tables are only good for 8-bit
@@ -282,12 +283,12 @@ typedef int boolean;
/* Decoder capability options: */
-#undef D_ARITH_CODING_SUPPORTED /* Arithmetic coding back end? */
+#define D_ARITH_CODING_SUPPORTED /* Arithmetic coding back end? */
#define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
#define D_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/
+#define IDCT_SCALING_SUPPORTED /* Output rescaling via IDCT? */
#define SAVE_MARKERS_SUPPORTED /* jpeg_save_markers() needed? */
#define BLOCK_SMOOTHING_SUPPORTED /* Block smoothing? (Progressive only) */
-#define IDCT_SCALING_SUPPORTED /* Output rescaling via IDCT? */
#undef UPSAMPLE_SCALING_SUPPORTED /* Output rescaling at upsample stage? */
#define UPSAMPLE_MERGING_SUPPORTED /* Fast path for sloppy upsampling? */
#define QUANT_1PASS_SUPPORTED /* 1-pass color quantization? */
diff --git a/jpegint.h b/jpegint.h
index 95b00d4..3378133 100644
--- a/jpegint.h
+++ b/jpegint.h
@@ -345,6 +345,7 @@ EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo));
EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo));
EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo));
EXTERN(void) jinit_phuff_encoder JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_arith_encoder JPP((j_compress_ptr cinfo));
EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo));
/* Decompression module initialization routines */
EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo));
@@ -358,6 +359,7 @@ EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo));
EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo));
EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo));
EXTERN(void) jinit_phuff_decoder JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_arith_decoder JPP((j_decompress_ptr cinfo));
EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo));
EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo));
EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo));
diff --git a/jpeglib.h b/jpeglib.h
index d1be8dd..0b0684c 100644
--- a/jpeglib.h
+++ b/jpeglib.h
@@ -144,7 +144,8 @@ typedef struct {
* Values of 1,2,4,8 are likely to be supported. Note that different
* components may receive different IDCT scalings.
*/
- int DCT_scaled_size;
+ int DCT_h_scaled_size;
+ int DCT_v_scaled_size;
/* The downsampled dimensions are the component's actual, unpadded number
* of samples at the main buffer (preprocessing/compression interface), thus
* downsampled_width = ceil(image_width * Hi/Hmax)
@@ -291,6 +292,17 @@ struct jpeg_compress_struct {
* helper routines to simplify changing parameters.
*/
+ unsigned int scale_num, scale_denom; /* fraction by which to scale image */
+
+ JDIMENSION jpeg_width; /* scaled JPEG image width */
+ JDIMENSION jpeg_height; /* scaled JPEG image height */
+ /* Dimensions of actual JPEG image that will be written to file,
+ * derived from input dimensions by scaling factors above.
+ * These fields are computed by jpeg_start_compress().
+ * You can also use jpeg_calc_jpeg_dimensions() to determine these values
+ * in advance of calling jpeg_start_compress().
+ */
+
int data_precision; /* bits of precision in image data */
int num_components; /* # of color components in JPEG image */
@@ -298,14 +310,17 @@ struct jpeg_compress_struct {
jpeg_component_info * comp_info;
/* comp_info[i] describes component that appears i'th in SOF */
-
+
JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
- /* ptrs to coefficient quantization tables, or NULL if not defined */
-
+ int q_scale_factor[NUM_QUANT_TBLS];
+ /* ptrs to coefficient quantization tables, or NULL if not defined,
+ * and corresponding scale factors (percentage, initialized 100).
+ */
+
JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
/* ptrs to Huffman coding tables, or NULL if not defined */
-
+
UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
@@ -364,6 +379,9 @@ struct jpeg_compress_struct {
int max_h_samp_factor; /* largest h_samp_factor */
int max_v_samp_factor; /* largest v_samp_factor */
+ int min_DCT_h_scaled_size; /* smallest DCT_scaled_size of any component */
+ int min_DCT_v_scaled_size; /* smallest DCT_scaled_size of any component */
+
JDIMENSION total_iMCU_rows; /* # of iMCU rows to be input to coef ctlr */
/* The coefficient controller receives data in units of MCU rows as defined
* for fully interleaved scans (whether the JPEG file is interleaved or not).
@@ -575,7 +593,8 @@ struct jpeg_decompress_struct {
int max_h_samp_factor; /* largest h_samp_factor */
int max_v_samp_factor; /* largest v_samp_factor */
- int min_DCT_scaled_size; /* smallest DCT_scaled_size of any component */
+ int min_DCT_h_scaled_size; /* smallest DCT_scaled_size of any component */
+ int min_DCT_v_scaled_size; /* smallest DCT_scaled_size of any component */
JDIMENSION total_iMCU_rows; /* # of iMCU rows in image */
/* The coefficient controller's input and output progress is measured in
@@ -841,6 +860,7 @@ typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
#define jpeg_default_colorspace jDefColorspace
#define jpeg_set_quality jSetQuality
#define jpeg_set_linear_quality jSetLQuality
+#define jpeg_default_qtables jDefQTables
#define jpeg_add_quant_table jAddQuantTable
#define jpeg_quality_scaling jQualityScaling
#define jpeg_simple_progression jSimProgress
@@ -850,6 +870,7 @@ typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
#define jpeg_start_compress jStrtCompress
#define jpeg_write_scanlines jWrtScanlines
#define jpeg_finish_compress jFinCompress
+#define jpeg_calc_jpeg_dimensions jCjpegDimensions
#define jpeg_write_raw_data jWrtRawData
#define jpeg_write_marker jWrtMarker
#define jpeg_write_m_header jWrtMHeader
@@ -921,6 +942,8 @@ EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality,
EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo,
int scale_factor,
boolean force_baseline));
+EXTERN(void) jpeg_default_qtables JPP((j_compress_ptr cinfo,
+ boolean force_baseline));
EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl,
const unsigned int *basic_table,
int scale_factor,
@@ -940,6 +963,9 @@ EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo,
JDIMENSION num_lines));
EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo));
+/* Precalculate JPEG dimensions for current compression parameters. */
+EXTERN(void) jpeg_calc_jpeg_dimensions JPP((j_compress_ptr cinfo));
+
/* Replaces jpeg_write_scanlines when writing raw downsampled data. */
EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo,
JSAMPIMAGE data,
diff --git a/jpegtran.c b/jpegtran.c
index 20ef111..44c061a 100644
--- a/jpegtran.c
+++ b/jpegtran.c
@@ -1,7 +1,7 @@
/*
* jpegtran.c
*
- * Copyright (C) 1995-1997, Thomas G. Lane.
+ * Copyright (C) 1995-2001, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@@ -64,8 +64,10 @@ usage (void)
#endif
#if TRANSFORMS_SUPPORTED
fprintf(stderr, "Switches for modifying the image:\n");
+ fprintf(stderr, " -crop WxH+X+Y Crop to a rectangular subarea\n");
fprintf(stderr, " -grayscale Reduce to grayscale (omit color data)\n");
fprintf(stderr, " -flip [horizontal|vertical] Mirror image (left-right or top-bottom)\n");
+ fprintf(stderr, " -perfect Fail if there is non-transformable edge blocks\n");
fprintf(stderr, " -rotate [90|180|270] Rotate image (degrees clockwise)\n");
fprintf(stderr, " -transpose Transpose image\n");
fprintf(stderr, " -transverse Transverse transpose image\n");
@@ -133,7 +135,9 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
copyoption = JCOPYOPT_DEFAULT;
transformoption.transform = JXFORM_NONE;
transformoption.trim = FALSE;
+ transformoption.perfect = FALSE;
transformoption.force_grayscale = FALSE;
+ transformoption.crop = FALSE;
cinfo->err->trace_level = 0;
/* Scan command line options, adjust parameters */
@@ -160,7 +164,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
exit(EXIT_FAILURE);
#endif
- } else if (keymatch(arg, "copy", 1)) {
+ } else if (keymatch(arg, "copy", 2)) {
/* Select which extra markers to copy. */
if (++argn >= argc) /* advance to next argument */
usage();
@@ -173,6 +177,20 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
} else
usage();
+ } else if (keymatch(arg, "crop", 2)) {
+ /* Perform lossless cropping. */
+#if TRANSFORMS_SUPPORTED
+ if (++argn >= argc) /* advance to next argument */
+ usage();
+ if (! jtransform_parse_crop_spec(&transformoption, argv[argn])) {
+ fprintf(stderr, "%s: bogus -crop argument '%s'\n",
+ progname, argv[argn]);
+ exit(EXIT_FAILURE);
+ }
+#else
+ select_transform(JXFORM_NONE); /* force an error */
+#endif
+
} else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) {
/* Enable debug printouts. */
/* On first -d, print version identification */
@@ -233,7 +251,12 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
usage();
outfilename = argv[argn]; /* save it away for later use */
- } else if (keymatch(arg, "progressive", 1)) {
+ } else if (keymatch(arg, "perfect", 2)) {
+ /* Fail if there is any partial edge MCUs that the transform can't
+ * handle. */
+ transformoption.perfect = TRUE;
+
+ } else if (keymatch(arg, "progressive", 2)) {
/* Select simple progressive mode. */
#ifdef C_PROGRESSIVE_SUPPORTED
simple_progressive = TRUE;
@@ -342,8 +365,10 @@ main (int argc, char **argv)
jvirt_barray_ptr * src_coef_arrays;
jvirt_barray_ptr * dst_coef_arrays;
int file_index;
- FILE * input_file;
- FILE * output_file;
+ /* We assume all-in-memory processing and can therefore use only a
+ * single file pointer for sequential input and output operation.
+ */
+ FILE * fp;
/* On Mac, fetch a command line. */
#ifdef USE_CCOMMAND
@@ -406,24 +431,13 @@ main (int argc, char **argv)
/* Open the input file. */
if (file_index < argc) {
- if ((input_file = fopen(argv[file_index], READ_BINARY)) == NULL) {
- fprintf(stderr, "%s: can't open %s\n", progname, argv[file_index]);
+ if ((fp = fopen(argv[file_index], READ_BINARY)) == NULL) {
+ fprintf(stderr, "%s: can't open %s for reading\n", progname, argv[file_index]);
exit(EXIT_FAILURE);
}
} else {
/* default input file is stdin */
- input_file = read_stdin();
- }
-
- /* Open the output file. */
- if (outfilename != NULL) {
- if ((output_file = fopen(outfilename, WRITE_BINARY)) == NULL) {
- fprintf(stderr, "%s: can't open %s\n", progname, outfilename);
- exit(EXIT_FAILURE);
- }
- } else {
- /* default output file is stdout */
- output_file = write_stdout();
+ fp = read_stdin();
}
#ifdef PROGRESS_REPORT
@@ -431,7 +445,7 @@ main (int argc, char **argv)
#endif
/* Specify data source for decompression */
- jpeg_stdio_src(&srcinfo, input_file);
+ jpeg_stdio_src(&srcinfo, fp);
/* Enable saving of extra markers that we want to copy */
jcopy_markers_setup(&srcinfo, copyoption);
@@ -443,6 +457,15 @@ main (int argc, char **argv)
* jpeg_read_coefficients so that memory allocation will be done right.
*/
#if TRANSFORMS_SUPPORTED
+ /* Fails right away if -perfect is given and transformation is not perfect.
+ */
+ if (transformoption.perfect &&
+ !jtransform_perfect_transform(srcinfo.image_width, srcinfo.image_height,
+ srcinfo.max_h_samp_factor * DCTSIZE, srcinfo.max_v_samp_factor * DCTSIZE,
+ transformoption.transform)) {
+ fprintf(stderr, "%s: transformation is not perfect\n", progname);
+ exit(EXIT_FAILURE);
+ }
jtransform_request_workspace(&srcinfo, &transformoption);
#endif
@@ -463,11 +486,32 @@ main (int argc, char **argv)
dst_coef_arrays = src_coef_arrays;
#endif
+ /* Close input file, if we opened it.
+ * Note: we assume that jpeg_read_coefficients consumed all input
+ * until JPEG_REACHED_EOI, and that jpeg_finish_decompress will
+ * only consume more while (! cinfo->inputctl->eoi_reached).
+ * We cannot call jpeg_finish_decompress here since we still need the
+ * virtual arrays allocated from the source object for processing.
+ */
+ if (fp != stdin)
+ fclose(fp);
+
+ /* Open the output file. */
+ if (outfilename != NULL) {
+ if ((fp = fopen(outfilename, WRITE_BINARY)) == NULL) {
+ fprintf(stderr, "%s: can't open %s for writing\n", progname, outfilename);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ /* default output file is stdout */
+ fp = write_stdout();
+ }
+
/* Adjust default compression parameters by re-parsing the options */
file_index = parse_switches(&dstinfo, argc, argv, 0, TRUE);
/* Specify data destination for compression */
- jpeg_stdio_dest(&dstinfo, output_file);
+ jpeg_stdio_dest(&dstinfo, fp);
/* Start compressor (note no image data is actually written here) */
jpeg_write_coefficients(&dstinfo, dst_coef_arrays);
@@ -488,11 +532,9 @@ main (int argc, char **argv)
(void) jpeg_finish_decompress(&srcinfo);
jpeg_destroy_decompress(&srcinfo);
- /* Close files, if we opened them */
- if (input_file != stdin)
- fclose(input_file);
- if (output_file != stdout)
- fclose(output_file);
+ /* Close output file, if we opened it */
+ if (fp != stdout)
+ fclose(fp);
#ifdef PROGRESS_REPORT
end_progress_monitor((j_common_ptr) &dstinfo);
diff --git a/makefile.cfg b/makefile.cfg
index f25e42e..9c7ac9d 100644
--- a/makefile.cfg
+++ b/makefile.cfg
@@ -79,8 +79,8 @@ LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
- jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
- jquant2.c jutils.c jmemmgr.c
+ jfdctint.c jidctflt.c jidctfst.c jidctint.c jquant1.c \
+ jquant2.c jutils.c jmemmgr.c jaricom.c jcarith.c jdarith.c
# memmgr back ends: compile only one of these into a working library
SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
@@ -110,19 +110,19 @@ TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
$(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
# library object files common to compression and decompression
-COMOBJECTS= jcomapi.$(O) jutils.$(O) jerror.$(O) jmemmgr.$(O) $(SYSDEPMEM)
+COMOBJECTS= jcomapi.$(O) jutils.$(O) jerror.$(O) jmemmgr.$(O) jaricom.$(O) $(SYSDEPMEM)
# compression library object files
CLIBOBJECTS= jcapimin.$(O) jcapistd.$(O) jctrans.$(O) jcparam.$(O) \
jdatadst.$(O) jcinit.$(O) jcmaster.$(O) jcmarker.$(O) jcmainct.$(O) \
jcprepct.$(O) jccoefct.$(O) jccolor.$(O) jcsample.$(O) jchuff.$(O) \
jcphuff.$(O) jcdctmgr.$(O) jfdctfst.$(O) jfdctflt.$(O) \
- jfdctint.$(O)
+ jfdctint.$(O) jcarith.$(O)
# decompression library object files
DLIBOBJECTS= jdapimin.$(O) jdapistd.$(O) jdtrans.$(O) jdatasrc.$(O) \
jdmaster.$(O) jdinput.$(O) jdmarker.$(O) jdhuff.$(O) jdphuff.$(O) \
jdmainct.$(O) jdcoefct.$(O) jdpostct.$(O) jddctmgr.$(O) \
- jidctfst.$(O) jidctflt.$(O) jidctint.$(O) jidctred.$(O) \
- jdsample.$(O) jdcolor.$(O) jquant1.$(O) jquant2.$(O) jdmerge.$(O)
+ jidctfst.$(O) jidctflt.$(O) jidctint.$(O) jdsample.$(O) \
+ jdcolor.$(O) jquant1.$(O) jquant2.$(O) jdmerge.$(O) jdarith.$(O)
# These objectfiles are included in libjpeg.a
LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
# object files for sample applications (excluding library files)
@@ -288,7 +288,6 @@ jfdctint.$(O): jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h je
jidctflt.$(O): jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
jidctfst.$(O): jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
jidctint.$(O): jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.$(O): jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
jquant1.$(O): jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jquant2.$(O): jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jutils.$(O): jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
@@ -317,3 +316,6 @@ rdbmp.$(O): rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
wrbmp.$(O): wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
rdrle.$(O): rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
wrrle.$(O): wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+jcarith.$(O): jcarith.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdarith.$(O): jdarith.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jaricom.$(O): jaricom.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
diff --git a/rdswitch.c b/rdswitch.c
index 4f4bb4f..7a839af 100644
--- a/rdswitch.c
+++ b/rdswitch.c
@@ -9,6 +9,7 @@
* command-line switches. Switches processed here are:
* -qtables file Read quantization tables from text file
* -scans file Read scan script from text file
+ * -quality N[,N,...] Set quality ratings
* -qslots N[,N,...] Set component quantization table selectors
* -sample HxV[,HxV,...] Set component sampling factors
*/
@@ -70,8 +71,7 @@ read_text_integer (FILE * file, long * result, int * termchar)
GLOBAL(boolean)
-read_quant_tables (j_compress_ptr cinfo, char * filename,
- int scale_factor, boolean force_baseline)
+read_quant_tables (j_compress_ptr cinfo, char * filename, boolean force_baseline)
/* Read a set of quantization tables from the specified file.
* The file is plain ASCII text: decimal numbers with whitespace between.
* Comments preceded by '#' may be included in the file.
@@ -108,7 +108,8 @@ read_quant_tables (j_compress_ptr cinfo, char * filename,
}
table[i] = (unsigned int) val;
}
- jpeg_add_quant_table(cinfo, tblno, table, scale_factor, force_baseline);
+ jpeg_add_quant_table(cinfo, tblno, table, cinfo->q_scale_factor[tblno],
+ force_baseline);
tblno++;
}
@@ -263,6 +264,38 @@ bogus:
GLOBAL(boolean)
+set_quality_ratings (j_compress_ptr cinfo, char *arg, boolean force_baseline)
+/* Process a quality-ratings parameter string, of the form
+ * N[,N,...]
+ * If there are more q-table slots than parameters, the last value is replicated.
+ */
+{
+ int val = 75; /* default value */
+ int tblno;
+ char ch;
+
+ for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
+ if (*arg) {
+ ch = ','; /* if not set by sscanf, will be ',' */
+ if (sscanf(arg, "%d%c", &val, &ch) < 1)
+ return FALSE;
+ if (ch != ',') /* syntax check */
+ return FALSE;
+ /* Convert user 0-100 rating to percentage scaling */
+ cinfo->q_scale_factor[tblno] = jpeg_quality_scaling(val);
+ while (*arg && *arg++ != ',') /* advance to next segment of arg string */
+ ;
+ } else {
+ /* reached end of parameter, set remaining factors to last value */
+ cinfo->q_scale_factor[tblno] = jpeg_quality_scaling(val);
+ }
+ }
+ jpeg_default_qtables(cinfo, force_baseline);
+ return TRUE;
+}
+
+
+GLOBAL(boolean)
set_quant_slots (j_compress_ptr cinfo, char *arg)
/* Process a quantization-table-selectors parameter string, of the form
* N[,N,...]
diff --git a/transupp.c b/transupp.c
index e5ec564..ff0f275 100644
--- a/transupp.c
+++ b/transupp.c
@@ -1,7 +1,7 @@
/*
* transupp.c
*
- * Copyright (C) 1997, Thomas G. Lane.
+ * Copyright (C) 1997-2001, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@@ -20,6 +20,7 @@
#include "jinclude.h"
#include "jpeglib.h"
#include "transupp.h" /* My own external interface */
+#include <ctype.h> /* to declare isdigit() */
#if TRANSFORMS_SUPPORTED
@@ -28,7 +29,8 @@
* Lossless image transformation routines. These routines work on DCT
* coefficient arrays and thus do not require any lossy decompression
* or recompression of the image.
- * Thanks to Guido Vollbeding for the initial design and code of this feature.
+ * Thanks to Guido Vollbeding for the initial design and code of this feature,
+ * and to Ben Jackson for introducing the cropping feature.
*
* Horizontal flipping is done in-place, using a single top-to-bottom
* pass through the virtual source array. It will thus be much the
@@ -42,6 +44,13 @@
* arrays for most of the transforms. That could result in much thrashing
* if the image is larger than main memory.
*
+ * If cropping or trimming is involved, the destination arrays may be smaller
+ * than the source arrays. Note it is not possible to do horizontal flip
+ * in-place when a nonzero Y crop offset is specified, since we'd have to move
+ * data from one block row to another but the virtual array manager doesn't
+ * guarantee we can touch more than one row at a time. So in that case,
+ * we have to use a separate destination array.
+ *
* Some notes about the operating environment of the individual transform
* routines:
* 1. Both the source and destination virtual arrays are allocated from the
@@ -54,20 +63,65 @@
* and we may as well take that as the effective iMCU size.
* 4. When "trim" is in effect, the destination's dimensions will be the
* trimmed values but the source's will be untrimmed.
- * 5. All the routines assume that the source and destination buffers are
+ * 5. When "crop" is in effect, the destination's dimensions will be the
+ * cropped values but the source's will be uncropped. Each transform
+ * routine is responsible for picking up source data starting at the
+ * correct X and Y offset for the crop region. (The X and Y offsets
+ * passed to the transform routines are measured in iMCU blocks of the
+ * destination.)
+ * 6. All the routines assume that the source and destination buffers are
* padded out to a full iMCU boundary. This is true, although for the
* source buffer it is an undocumented property of jdcoefct.c.
- * Notes 2,3,4 boil down to this: generally we should use the destination's
- * dimensions and ignore the source's.
*/
LOCAL(void)
-do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
- jvirt_barray_ptr *src_coef_arrays)
-/* Horizontal flip; done in-place, so no separate dest array is required */
+do_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+ JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+ jvirt_barray_ptr *src_coef_arrays,
+ jvirt_barray_ptr *dst_coef_arrays)
+/* Crop. This is only used when no rotate/flip is requested with the crop. */
+{
+ JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks;
+ int ci, offset_y;
+ JBLOCKARRAY src_buffer, dst_buffer;
+ jpeg_component_info *compptr;
+
+ /* We simply have to copy the right amount of data (the destination's
+ * image size) starting at the given X and Y offsets in the source.
+ */
+ for (ci = 0; ci < dstinfo->num_components; ci++) {
+ compptr = dstinfo->comp_info + ci;
+ x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+ y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+ for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+ dst_blk_y += compptr->v_samp_factor) {
+ dst_buffer = (*srcinfo->mem->access_virt_barray)
+ ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+ (JDIMENSION) compptr->v_samp_factor, TRUE);
+ src_buffer = (*srcinfo->mem->access_virt_barray)
+ ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+ dst_blk_y + y_crop_blocks,
+ (JDIMENSION) compptr->v_samp_factor, FALSE);
+ for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+ jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+ dst_buffer[offset_y],
+ compptr->width_in_blocks);
+ }
+ }
+ }
+}
+
+
+LOCAL(void)
+do_flip_h_no_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+ JDIMENSION x_crop_offset,
+ jvirt_barray_ptr *src_coef_arrays)
+/* Horizontal flip; done in-place, so no separate dest array is required.
+ * NB: this only works when y_crop_offset is zero.
+ */
{
- JDIMENSION MCU_cols, comp_width, blk_x, blk_y;
+ JDIMENSION MCU_cols, comp_width, blk_x, blk_y, x_crop_blocks;
int ci, k, offset_y;
JBLOCKARRAY buffer;
JCOEFPTR ptr1, ptr2;
@@ -79,17 +133,19 @@ do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
* mirroring by changing the signs of odd-numbered columns.
* Partial iMCUs at the right edge are left untouched.
*/
- MCU_cols = dstinfo->image_width / (dstinfo->max_h_samp_factor * DCTSIZE);
+ MCU_cols = srcinfo->image_width / (dstinfo->max_h_samp_factor * DCTSIZE);
for (ci = 0; ci < dstinfo->num_components; ci++) {
compptr = dstinfo->comp_info + ci;
comp_width = MCU_cols * compptr->h_samp_factor;
+ x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
for (blk_y = 0; blk_y < compptr->height_in_blocks;
blk_y += compptr->v_samp_factor) {
buffer = (*srcinfo->mem->access_virt_barray)
((j_common_ptr) srcinfo, src_coef_arrays[ci], blk_y,
(JDIMENSION) compptr->v_samp_factor, TRUE);
for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+ /* Do the mirroring */
for (blk_x = 0; blk_x * 2 < comp_width; blk_x++) {
ptr1 = buffer[offset_y][blk_x];
ptr2 = buffer[offset_y][comp_width - blk_x - 1];
@@ -105,6 +161,79 @@ do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
*ptr2++ = -temp1;
}
}
+ if (x_crop_blocks > 0) {
+ /* Now left-justify the portion of the data to be kept.
+ * We can't use a single jcopy_block_row() call because that routine
+ * depends on memcpy(), whose behavior is unspecified for overlapping
+ * source and destination areas. Sigh.
+ */
+ for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) {
+ jcopy_block_row(buffer[offset_y] + blk_x + x_crop_blocks,
+ buffer[offset_y] + blk_x,
+ (JDIMENSION) 1);
+ }
+ }
+ }
+ }
+ }
+}
+
+
+LOCAL(void)
+do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+ JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+ jvirt_barray_ptr *src_coef_arrays,
+ jvirt_barray_ptr *dst_coef_arrays)
+/* Horizontal flip in general cropping case */
+{
+ JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y;
+ JDIMENSION x_crop_blocks, y_crop_blocks;
+ int ci, k, offset_y;
+ JBLOCKARRAY src_buffer, dst_buffer;
+ JBLOCKROW src_row_ptr, dst_row_ptr;
+ JCOEFPTR src_ptr, dst_ptr;
+ jpeg_component_info *compptr;
+
+ /* Here we must output into a separate array because we can't touch
+ * different rows of a single virtual array simultaneously. Otherwise,
+ * this is essentially the same as the routine above.
+ */
+ MCU_cols = srcinfo->image_width / (dstinfo->max_h_samp_factor * DCTSIZE);
+
+ for (ci = 0; ci < dstinfo->num_components; ci++) {
+ compptr = dstinfo->comp_info + ci;
+ comp_width = MCU_cols * compptr->h_samp_factor;
+ x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+ y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+ for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+ dst_blk_y += compptr->v_samp_factor) {
+ dst_buffer = (*srcinfo->mem->access_virt_barray)
+ ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+ (JDIMENSION) compptr->v_samp_factor, TRUE);
+ src_buffer = (*srcinfo->mem->access_virt_barray)
+ ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+ dst_blk_y + y_crop_blocks,
+ (JDIMENSION) compptr->v_samp_factor, FALSE);
+ for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+ dst_row_ptr = dst_buffer[offset_y];
+ src_row_ptr = src_buffer[offset_y];
+ for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
+ if (x_crop_blocks + dst_blk_x < comp_width) {
+ /* Do the mirrorable blocks */
+ dst_ptr = dst_row_ptr[dst_blk_x];
+ src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+ /* this unrolled loop doesn't need to know which row it's on... */
+ for (k = 0; k < DCTSIZE2; k += 2) {
+ *dst_ptr++ = *src_ptr++; /* copy even column */
+ *dst_ptr++ = - *src_ptr++; /* copy odd column with sign change */
+ }
+ } else {
+ /* Copy last partial block(s) verbatim */
+ jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
+ dst_row_ptr + dst_blk_x,
+ (JDIMENSION) 1);
+ }
+ }
}
}
}
@@ -113,11 +242,13 @@ do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
LOCAL(void)
do_flip_v (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+ JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
jvirt_barray_ptr *src_coef_arrays,
jvirt_barray_ptr *dst_coef_arrays)
/* Vertical flip */
{
JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y;
+ JDIMENSION x_crop_blocks, y_crop_blocks;
int ci, i, j, offset_y;
JBLOCKARRAY src_buffer, dst_buffer;
JBLOCKROW src_row_ptr, dst_row_ptr;
@@ -131,33 +262,38 @@ do_flip_v (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
* of odd-numbered rows.
* Partial iMCUs at the bottom edge are copied verbatim.
*/
- MCU_rows = dstinfo->image_height / (dstinfo->max_v_samp_factor * DCTSIZE);
+ MCU_rows = srcinfo->image_height / (dstinfo->max_v_samp_factor * DCTSIZE);
for (ci = 0; ci < dstinfo->num_components; ci++) {
compptr = dstinfo->comp_info + ci;
comp_height = MCU_rows * compptr->v_samp_factor;
+ x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+ y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
dst_blk_y += compptr->v_samp_factor) {
dst_buffer = (*srcinfo->mem->access_virt_barray)
((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
(JDIMENSION) compptr->v_samp_factor, TRUE);
- if (dst_blk_y < comp_height) {
+ if (y_crop_blocks + dst_blk_y < comp_height) {
/* Row is within the mirrorable area. */
src_buffer = (*srcinfo->mem->access_virt_barray)
((j_common_ptr) srcinfo, src_coef_arrays[ci],
- comp_height - dst_blk_y - (JDIMENSION) compptr->v_samp_factor,
+ comp_height - y_crop_blocks - dst_blk_y -
+ (JDIMENSION) compptr->v_samp_factor,
(JDIMENSION) compptr->v_samp_factor, FALSE);
} else {
/* Bottom-edge blocks will be copied verbatim. */
src_buffer = (*srcinfo->mem->access_virt_barray)
- ((j_common_ptr) srcinfo, src_coef_arrays[ci], dst_blk_y,
+ ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+ dst_blk_y + y_crop_blocks,
(JDIMENSION) compptr->v_samp_factor, FALSE);
}
for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
- if (dst_blk_y < comp_height) {
+ if (y_crop_blocks + dst_blk_y < comp_height) {
/* Row is within the mirrorable area. */
dst_row_ptr = dst_buffer[offset_y];
src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
+ src_row_ptr += x_crop_blocks;
for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
dst_blk_x++) {
dst_ptr = dst_row_ptr[dst_blk_x];
@@ -173,7 +309,8 @@ do_flip_v (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
}
} else {
/* Just copy row verbatim. */
- jcopy_block_row(src_buffer[offset_y], dst_buffer[offset_y],
+ jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+ dst_buffer[offset_y],
compptr->width_in_blocks);
}
}
@@ -184,11 +321,12 @@ do_flip_v (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
LOCAL(void)
do_transpose (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+ JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
jvirt_barray_ptr *src_coef_arrays,
jvirt_barray_ptr *dst_coef_arrays)
/* Transpose source into destination */
{
- JDIMENSION dst_blk_x, dst_blk_y;
+ JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks;
int ci, i, j, offset_x, offset_y;
JBLOCKARRAY src_buffer, dst_buffer;
JCOEFPTR src_ptr, dst_ptr;
@@ -201,6 +339,8 @@ do_transpose (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
*/
for (ci = 0; ci < dstinfo->num_components; ci++) {
compptr = dstinfo->comp_info + ci;
+ x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+ y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
dst_blk_y += compptr->v_samp_factor) {
dst_buffer = (*srcinfo->mem->access_virt_barray)
@@ -210,11 +350,12 @@ do_transpose (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
dst_blk_x += compptr->h_samp_factor) {
src_buffer = (*srcinfo->mem->access_virt_barray)
- ((j_common_ptr) srcinfo, src_coef_arrays[ci], dst_blk_x,
+ ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+ dst_blk_x + x_crop_blocks,
(JDIMENSION) compptr->h_samp_factor, FALSE);
for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
- src_ptr = src_buffer[offset_x][dst_blk_y + offset_y];
dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+ src_ptr = src_buffer[offset_x][dst_blk_y + offset_y + y_crop_blocks];
for (i = 0; i < DCTSIZE; i++)
for (j = 0; j < DCTSIZE; j++)
dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
@@ -228,6 +369,7 @@ do_transpose (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
LOCAL(void)
do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+ JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
jvirt_barray_ptr *src_coef_arrays,
jvirt_barray_ptr *dst_coef_arrays)
/* 90 degree rotation is equivalent to
@@ -237,6 +379,7 @@ do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
*/
{
JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y;
+ JDIMENSION x_crop_blocks, y_crop_blocks;
int ci, i, j, offset_x, offset_y;
JBLOCKARRAY src_buffer, dst_buffer;
JCOEFPTR src_ptr, dst_ptr;
@@ -246,11 +389,13 @@ do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
* at the (output) right edge properly. They just get transposed and
* not mirrored.
*/
- MCU_cols = dstinfo->image_width / (dstinfo->max_h_samp_factor * DCTSIZE);
+ MCU_cols = srcinfo->image_height / (dstinfo->max_h_samp_factor * DCTSIZE);
for (ci = 0; ci < dstinfo->num_components; ci++) {
compptr = dstinfo->comp_info + ci;
comp_width = MCU_cols * compptr->h_samp_factor;
+ x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+ y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
dst_blk_y += compptr->v_samp_factor) {
dst_buffer = (*srcinfo->mem->access_virt_barray)
@@ -259,15 +404,26 @@ do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
dst_blk_x += compptr->h_samp_factor) {
- src_buffer = (*srcinfo->mem->access_virt_barray)
- ((j_common_ptr) srcinfo, src_coef_arrays[ci], dst_blk_x,
- (JDIMENSION) compptr->h_samp_factor, FALSE);
+ if (x_crop_blocks + dst_blk_x < comp_width) {
+ /* Block is within the mirrorable area. */
+ src_buffer = (*srcinfo->mem->access_virt_barray)
+ ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+ comp_width - x_crop_blocks - dst_blk_x -
+ (JDIMENSION) compptr->h_samp_factor,
+ (JDIMENSION) compptr->h_samp_factor, FALSE);
+ } else {
+ /* Edge blocks are transposed but not mirrored. */
+ src_buffer = (*srcinfo->mem->access_virt_barray)
+ ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+ dst_blk_x + x_crop_blocks,
+ (JDIMENSION) compptr->h_samp_factor, FALSE);
+ }
for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
- src_ptr = src_buffer[offset_x][dst_blk_y + offset_y];
- if (dst_blk_x < comp_width) {
+ dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+ if (x_crop_blocks + dst_blk_x < comp_width) {
/* Block is within the mirrorable area. */
- dst_ptr = dst_buffer[offset_y]
- [comp_width - dst_blk_x - offset_x - 1];
+ src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+ [dst_blk_y + offset_y + y_crop_blocks];
for (i = 0; i < DCTSIZE; i++) {
for (j = 0; j < DCTSIZE; j++)
dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
@@ -277,7 +433,8 @@ do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
}
} else {
/* Edge blocks are transposed but not mirrored. */
- dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+ src_ptr = src_buffer[offset_x]
+ [dst_blk_y + offset_y + y_crop_blocks];
for (i = 0; i < DCTSIZE; i++)
for (j = 0; j < DCTSIZE; j++)
dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
@@ -292,6 +449,7 @@ do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
LOCAL(void)
do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+ JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
jvirt_barray_ptr *src_coef_arrays,
jvirt_barray_ptr *dst_coef_arrays)
/* 270 degree rotation is equivalent to
@@ -301,6 +459,7 @@ do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
*/
{
JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y;
+ JDIMENSION x_crop_blocks, y_crop_blocks;
int ci, i, j, offset_x, offset_y;
JBLOCKARRAY src_buffer, dst_buffer;
JCOEFPTR src_ptr, dst_ptr;
@@ -310,11 +469,13 @@ do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
* at the (output) bottom edge properly. They just get transposed and
* not mirrored.
*/
- MCU_rows = dstinfo->image_height / (dstinfo->max_v_samp_factor * DCTSIZE);
+ MCU_rows = srcinfo->image_width / (dstinfo->max_v_samp_factor * DCTSIZE);
for (ci = 0; ci < dstinfo->num_components; ci++) {
compptr = dstinfo->comp_info + ci;
comp_height = MCU_rows * compptr->v_samp_factor;
+ x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+ y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
dst_blk_y += compptr->v_samp_factor) {
dst_buffer = (*srcinfo->mem->access_virt_barray)
@@ -324,14 +485,15 @@ do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
dst_blk_x += compptr->h_samp_factor) {
src_buffer = (*srcinfo->mem->access_virt_barray)
- ((j_common_ptr) srcinfo, src_coef_arrays[ci], dst_blk_x,
+ ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+ dst_blk_x + x_crop_blocks,
(JDIMENSION) compptr->h_samp_factor, FALSE);
for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
- if (dst_blk_y < comp_height) {
+ if (y_crop_blocks + dst_blk_y < comp_height) {
/* Block is within the mirrorable area. */
src_ptr = src_buffer[offset_x]
- [comp_height - dst_blk_y - offset_y - 1];
+ [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
for (i = 0; i < DCTSIZE; i++) {
for (j = 0; j < DCTSIZE; j++) {
dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
@@ -341,7 +503,8 @@ do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
}
} else {
/* Edge blocks are transposed but not mirrored. */
- src_ptr = src_buffer[offset_x][dst_blk_y + offset_y];
+ src_ptr = src_buffer[offset_x]
+ [dst_blk_y + offset_y + y_crop_blocks];
for (i = 0; i < DCTSIZE; i++)
for (j = 0; j < DCTSIZE; j++)
dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
@@ -356,6 +519,7 @@ do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
LOCAL(void)
do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+ JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
jvirt_barray_ptr *src_coef_arrays,
jvirt_barray_ptr *dst_coef_arrays)
/* 180 degree rotation is equivalent to
@@ -365,89 +529,93 @@ do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
*/
{
JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y;
+ JDIMENSION x_crop_blocks, y_crop_blocks;
int ci, i, j, offset_y;
JBLOCKARRAY src_buffer, dst_buffer;
JBLOCKROW src_row_ptr, dst_row_ptr;
JCOEFPTR src_ptr, dst_ptr;
jpeg_component_info *compptr;
- MCU_cols = dstinfo->image_width / (dstinfo->max_h_samp_factor * DCTSIZE);
- MCU_rows = dstinfo->image_height / (dstinfo->max_v_samp_factor * DCTSIZE);
+ MCU_cols = srcinfo->image_width / (dstinfo->max_h_samp_factor * DCTSIZE);
+ MCU_rows = srcinfo->image_height / (dstinfo->max_v_samp_factor * DCTSIZE);
for (ci = 0; ci < dstinfo->num_components; ci++) {
compptr = dstinfo->comp_info + ci;
comp_width = MCU_cols * compptr->h_samp_factor;
comp_height = MCU_rows * compptr->v_samp_factor;
+ x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+ y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
dst_blk_y += compptr->v_samp_factor) {
dst_buffer = (*srcinfo->mem->access_virt_barray)
((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
(JDIMENSION) compptr->v_samp_factor, TRUE);
- if (dst_blk_y < comp_height) {
+ if (y_crop_blocks + dst_blk_y < comp_height) {
/* Row is within the vertically mirrorable area. */
src_buffer = (*srcinfo->mem->access_virt_barray)
((j_common_ptr) srcinfo, src_coef_arrays[ci],
- comp_height - dst_blk_y - (JDIMENSION) compptr->v_samp_factor,
+ comp_height - y_crop_blocks - dst_blk_y -
+ (JDIMENSION) compptr->v_samp_factor,
(JDIMENSION) compptr->v_samp_factor, FALSE);
} else {
/* Bottom-edge rows are only mirrored horizontally. */
src_buffer = (*srcinfo->mem->access_virt_barray)
- ((j_common_ptr) srcinfo, src_coef_arrays[ci], dst_blk_y,
+ ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+ dst_blk_y + y_crop_blocks,
(JDIMENSION) compptr->v_samp_factor, FALSE);
}
for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
- if (dst_blk_y < comp_height) {
+ dst_row_ptr = dst_buffer[offset_y];
+ if (y_crop_blocks + dst_blk_y < comp_height) {
/* Row is within the mirrorable area. */
- dst_row_ptr = dst_buffer[offset_y];
src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
- /* Process the blocks that can be mirrored both ways. */
- for (dst_blk_x = 0; dst_blk_x < comp_width; dst_blk_x++) {
+ for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
dst_ptr = dst_row_ptr[dst_blk_x];
- src_ptr = src_row_ptr[comp_width - dst_blk_x - 1];
- for (i = 0; i < DCTSIZE; i += 2) {
- /* For even row, negate every odd column. */
- for (j = 0; j < DCTSIZE; j += 2) {
- *dst_ptr++ = *src_ptr++;
- *dst_ptr++ = - *src_ptr++;
+ if (x_crop_blocks + dst_blk_x < comp_width) {
+ /* Process the blocks that can be mirrored both ways. */
+ src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+ for (i = 0; i < DCTSIZE; i += 2) {
+ /* For even row, negate every odd column. */
+ for (j = 0; j < DCTSIZE; j += 2) {
+ *dst_ptr++ = *src_ptr++;
+ *dst_ptr++ = - *src_ptr++;
+ }
+ /* For odd row, negate every even column. */
+ for (j = 0; j < DCTSIZE; j += 2) {
+ *dst_ptr++ = - *src_ptr++;
+ *dst_ptr++ = *src_ptr++;
+ }
}
- /* For odd row, negate every even column. */
- for (j = 0; j < DCTSIZE; j += 2) {
- *dst_ptr++ = - *src_ptr++;
- *dst_ptr++ = *src_ptr++;
+ } else {
+ /* Any remaining right-edge blocks are only mirrored vertically. */
+ src_ptr = src_row_ptr[x_crop_blocks + dst_blk_x];
+ for (i = 0; i < DCTSIZE; i += 2) {
+ for (j = 0; j < DCTSIZE; j++)
+ *dst_ptr++ = *src_ptr++;
+ for (j = 0; j < DCTSIZE; j++)
+ *dst_ptr++ = - *src_ptr++;
}
}
}
- /* Any remaining right-edge blocks are only mirrored vertically. */
- for (; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
- dst_ptr = dst_row_ptr[dst_blk_x];
- src_ptr = src_row_ptr[dst_blk_x];
- for (i = 0; i < DCTSIZE; i += 2) {
- for (j = 0; j < DCTSIZE; j++)
- *dst_ptr++ = *src_ptr++;
- for (j = 0; j < DCTSIZE; j++)
- *dst_ptr++ = - *src_ptr++;
- }
- }
} else {
/* Remaining rows are just mirrored horizontally. */
- dst_row_ptr = dst_buffer[offset_y];
src_row_ptr = src_buffer[offset_y];
- /* Process the blocks that can be mirrored. */
- for (dst_blk_x = 0; dst_blk_x < comp_width; dst_blk_x++) {
- dst_ptr = dst_row_ptr[dst_blk_x];
- src_ptr = src_row_ptr[comp_width - dst_blk_x - 1];
- for (i = 0; i < DCTSIZE2; i += 2) {
- *dst_ptr++ = *src_ptr++;
- *dst_ptr++ = - *src_ptr++;
+ for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
+ if (x_crop_blocks + dst_blk_x < comp_width) {
+ /* Process the blocks that can be mirrored. */
+ dst_ptr = dst_row_ptr[dst_blk_x];
+ src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+ for (i = 0; i < DCTSIZE2; i += 2) {
+ *dst_ptr++ = *src_ptr++;
+ *dst_ptr++ = - *src_ptr++;
+ }
+ } else {
+ /* Any remaining right-edge blocks are only copied. */
+ jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
+ dst_row_ptr + dst_blk_x,
+ (JDIMENSION) 1);
}
}
- /* Any remaining right-edge blocks are only copied. */
- for (; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
- dst_ptr = dst_row_ptr[dst_blk_x];
- src_ptr = src_row_ptr[dst_blk_x];
- for (i = 0; i < DCTSIZE2; i++)
- *dst_ptr++ = *src_ptr++;
- }
}
}
}
@@ -457,6 +625,7 @@ do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
LOCAL(void)
do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+ JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
jvirt_barray_ptr *src_coef_arrays,
jvirt_barray_ptr *dst_coef_arrays)
/* Transverse transpose is equivalent to
@@ -470,18 +639,21 @@ do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
*/
{
JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y;
+ JDIMENSION x_crop_blocks, y_crop_blocks;
int ci, i, j, offset_x, offset_y;
JBLOCKARRAY src_buffer, dst_buffer;
JCOEFPTR src_ptr, dst_ptr;
jpeg_component_info *compptr;
- MCU_cols = dstinfo->image_width / (dstinfo->max_h_samp_factor * DCTSIZE);
- MCU_rows = dstinfo->image_height / (dstinfo->max_v_samp_factor * DCTSIZE);
+ MCU_cols = srcinfo->image_height / (dstinfo->max_h_samp_factor * DCTSIZE);
+ MCU_rows = srcinfo->image_width / (dstinfo->max_v_samp_factor * DCTSIZE);
for (ci = 0; ci < dstinfo->num_components; ci++) {
compptr = dstinfo->comp_info + ci;
comp_width = MCU_cols * compptr->h_samp_factor;
comp_height = MCU_rows * compptr->v_samp_factor;
+ x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+ y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
dst_blk_y += compptr->v_samp_factor) {
dst_buffer = (*srcinfo->mem->access_virt_barray)
@@ -490,17 +662,26 @@ do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
dst_blk_x += compptr->h_samp_factor) {
- src_buffer = (*srcinfo->mem->access_virt_barray)
- ((j_common_ptr) srcinfo, src_coef_arrays[ci], dst_blk_x,
- (JDIMENSION) compptr->h_samp_factor, FALSE);
+ if (x_crop_blocks + dst_blk_x < comp_width) {
+ /* Block is within the mirrorable area. */
+ src_buffer = (*srcinfo->mem->access_virt_barray)
+ ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+ comp_width - x_crop_blocks - dst_blk_x -
+ (JDIMENSION) compptr->h_samp_factor,
+ (JDIMENSION) compptr->h_samp_factor, FALSE);
+ } else {
+ src_buffer = (*srcinfo->mem->access_virt_barray)
+ ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+ dst_blk_x + x_crop_blocks,
+ (JDIMENSION) compptr->h_samp_factor, FALSE);
+ }
for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
- if (dst_blk_y < comp_height) {
- src_ptr = src_buffer[offset_x]
- [comp_height - dst_blk_y - offset_y - 1];
- if (dst_blk_x < comp_width) {
+ dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+ if (y_crop_blocks + dst_blk_y < comp_height) {
+ if (x_crop_blocks + dst_blk_x < comp_width) {
/* Block is within the mirrorable area. */
- dst_ptr = dst_buffer[offset_y]
- [comp_width - dst_blk_x - offset_x - 1];
+ src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+ [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
for (i = 0; i < DCTSIZE; i++) {
for (j = 0; j < DCTSIZE; j++) {
dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
@@ -516,7 +697,8 @@ do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
}
} else {
/* Right-edge blocks are mirrored in y only */
- dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+ src_ptr = src_buffer[offset_x]
+ [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
for (i = 0; i < DCTSIZE; i++) {
for (j = 0; j < DCTSIZE; j++) {
dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
@@ -526,11 +708,10 @@ do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
}
}
} else {
- src_ptr = src_buffer[offset_x][dst_blk_y + offset_y];
- if (dst_blk_x < comp_width) {
+ if (x_crop_blocks + dst_blk_x < comp_width) {
/* Bottom-edge blocks are mirrored in x only */
- dst_ptr = dst_buffer[offset_y]
- [comp_width - dst_blk_x - offset_x - 1];
+ src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+ [dst_blk_y + offset_y + y_crop_blocks];
for (i = 0; i < DCTSIZE; i++) {
for (j = 0; j < DCTSIZE; j++)
dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
@@ -540,7 +721,8 @@ do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
}
} else {
/* At lower right corner, just transpose, no mirroring */
- dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+ src_ptr = src_buffer[offset_x]
+ [dst_blk_y + offset_y + y_crop_blocks];
for (i = 0; i < DCTSIZE; i++)
for (j = 0; j < DCTSIZE; j++)
dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
@@ -554,8 +736,116 @@ do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
}
+/* Parse an unsigned integer: subroutine for jtransform_parse_crop_spec.
+ * Returns TRUE if valid integer found, FALSE if not.
+ * *strptr is advanced over the digit string, and *result is set to its value.
+ */
+
+LOCAL(boolean)
+jt_read_integer (const char ** strptr, JDIMENSION * result)
+{
+ const char * ptr = *strptr;
+ JDIMENSION val = 0;
+
+ for (; isdigit(*ptr); ptr++) {
+ val = val * 10 + (JDIMENSION) (*ptr - '0');
+ }
+ *result = val;
+ if (ptr == *strptr)
+ return FALSE; /* oops, no digits */
+ *strptr = ptr;
+ return TRUE;
+}
+
+
+/* Parse a crop specification (written in X11 geometry style).
+ * The routine returns TRUE if the spec string is valid, FALSE if not.
+ *
+ * The crop spec string should have the format
+ * <width>x<height>{+-}<xoffset>{+-}<yoffset>
+ * where width, height, xoffset, and yoffset are unsigned integers.
+ * Each of the elements can be omitted to indicate a default value.
+ * (A weakness of this style is that it is not possible to omit xoffset
+ * while specifying yoffset, since they look alike.)
+ *
+ * This code is loosely based on XParseGeometry from the X11 distribution.
+ */
+
+GLOBAL(boolean)
+jtransform_parse_crop_spec (jpeg_transform_info *info, const char *spec)
+{
+ info->crop = FALSE;
+ info->crop_width_set = JCROP_UNSET;
+ info->crop_height_set = JCROP_UNSET;
+ info->crop_xoffset_set = JCROP_UNSET;
+ info->crop_yoffset_set = JCROP_UNSET;
+
+ if (isdigit(*spec)) {
+ /* fetch width */
+ if (! jt_read_integer(&spec, &info->crop_width))
+ return FALSE;
+ info->crop_width_set = JCROP_POS;
+ }
+ if (*spec == 'x' || *spec == 'X') {
+ /* fetch height */
+ spec++;
+ if (! jt_read_integer(&spec, &info->crop_height))
+ return FALSE;
+ info->crop_height_set = JCROP_POS;
+ }
+ if (*spec == '+' || *spec == '-') {
+ /* fetch xoffset */
+ info->crop_xoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS;
+ spec++;
+ if (! jt_read_integer(&spec, &info->crop_xoffset))
+ return FALSE;
+ }
+ if (*spec == '+' || *spec == '-') {
+ /* fetch yoffset */
+ info->crop_yoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS;
+ spec++;
+ if (! jt_read_integer(&spec, &info->crop_yoffset))
+ return FALSE;
+ }
+ /* We had better have gotten to the end of the string. */
+ if (*spec != '\0')
+ return FALSE;
+ info->crop = TRUE;
+ return TRUE;
+}
+
+
+/* Trim off any partial iMCUs on the indicated destination edge */
+
+LOCAL(void)
+trim_right_edge (jpeg_transform_info *info, JDIMENSION full_width)
+{
+ JDIMENSION MCU_cols;
+
+ MCU_cols = info->output_width / (info->max_h_samp_factor * DCTSIZE);
+ if (MCU_cols > 0 && info->x_crop_offset + MCU_cols ==
+ full_width / (info->max_h_samp_factor * DCTSIZE))
+ info->output_width = MCU_cols * (info->max_h_samp_factor * DCTSIZE);
+}
+
+LOCAL(void)
+trim_bottom_edge (jpeg_transform_info *info, JDIMENSION full_height)
+{
+ JDIMENSION MCU_rows;
+
+ MCU_rows = info->output_height / (info->max_v_samp_factor * DCTSIZE);
+ if (MCU_rows > 0 && info->y_crop_offset + MCU_rows ==
+ full_height / (info->max_v_samp_factor * DCTSIZE))
+ info->output_height = MCU_rows * (info->max_v_samp_factor * DCTSIZE);
+}
+
+
/* Request any required workspace.
*
+ * This routine figures out the size that the output image will be
+ * (which implies that all the transform parameters must be set before
+ * it is called).
+ *
* We allocate the workspace virtual arrays from the source decompression
* object, so that all the arrays (both the original data and the workspace)
* will be taken into account while making memory management decisions.
@@ -569,9 +859,13 @@ jtransform_request_workspace (j_decompress_ptr srcinfo,
jpeg_transform_info *info)
{
jvirt_barray_ptr *coef_arrays = NULL;
+ boolean need_workspace, transpose_it;
jpeg_component_info *compptr;
- int ci;
+ JDIMENSION xoffset, yoffset, width_in_iMCUs, height_in_iMCUs;
+ JDIMENSION width_in_blocks, height_in_blocks;
+ int ci, h_samp_factor, v_samp_factor;
+ /* Determine number of components in output image */
if (info->force_grayscale &&
srcinfo->jpeg_color_space == JCS_YCbCr &&
srcinfo->num_components == 3) {
@@ -581,55 +875,181 @@ jtransform_request_workspace (j_decompress_ptr srcinfo,
/* Process all the components */
info->num_components = srcinfo->num_components;
}
+ /* If there is only one output component, force the iMCU size to be 1;
+ * else use the source iMCU size. (This allows us to do the right thing
+ * when reducing color to grayscale, and also provides a handy way of
+ * cleaning up "funny" grayscale images whose sampling factors are not 1x1.)
+ */
+
+ switch (info->transform) {
+ case JXFORM_TRANSPOSE:
+ case JXFORM_TRANSVERSE:
+ case JXFORM_ROT_90:
+ case JXFORM_ROT_270:
+ info->output_width = srcinfo->image_height;
+ info->output_height = srcinfo->image_width;
+ if (info->num_components == 1) {
+ info->max_h_samp_factor = 1;
+ info->max_v_samp_factor = 1;
+ } else {
+ info->max_h_samp_factor = srcinfo->max_v_samp_factor;
+ info->max_v_samp_factor = srcinfo->max_h_samp_factor;
+ }
+ break;
+ default:
+ info->output_width = srcinfo->image_width;
+ info->output_height = srcinfo->image_height;
+ if (info->num_components == 1) {
+ info->max_h_samp_factor = 1;
+ info->max_v_samp_factor = 1;
+ } else {
+ info->max_h_samp_factor = srcinfo->max_h_samp_factor;
+ info->max_v_samp_factor = srcinfo->max_v_samp_factor;
+ }
+ break;
+ }
+
+ /* If cropping has been requested, compute the crop area's position and
+ * dimensions, ensuring that its upper left corner falls at an iMCU boundary.
+ */
+ if (info->crop) {
+ /* Insert default values for unset crop parameters */
+ if (info->crop_xoffset_set == JCROP_UNSET)
+ info->crop_xoffset = 0; /* default to +0 */
+ if (info->crop_yoffset_set == JCROP_UNSET)
+ info->crop_yoffset = 0; /* default to +0 */
+ if (info->crop_xoffset >= info->output_width ||
+ info->crop_yoffset >= info->output_height)
+ ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+ if (info->crop_width_set == JCROP_UNSET)
+ info->crop_width = info->output_width - info->crop_xoffset;
+ if (info->crop_height_set == JCROP_UNSET)
+ info->crop_height = info->output_height - info->crop_yoffset;
+ /* Ensure parameters are valid */
+ if (info->crop_width <= 0 || info->crop_width > info->output_width ||
+ info->crop_height <= 0 || info->crop_height > info->output_height ||
+ info->crop_xoffset > info->output_width - info->crop_width ||
+ info->crop_yoffset > info->output_height - info->crop_height)
+ ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+ /* Convert negative crop offsets into regular offsets */
+ if (info->crop_xoffset_set == JCROP_NEG)
+ xoffset = info->output_width - info->crop_width - info->crop_xoffset;
+ else
+ xoffset = info->crop_xoffset;
+ if (info->crop_yoffset_set == JCROP_NEG)
+ yoffset = info->output_height - info->crop_height - info->crop_yoffset;
+ else
+ yoffset = info->crop_yoffset;
+ /* Now adjust so that upper left corner falls at an iMCU boundary */
+ info->output_width =
+ info->crop_width + (xoffset % (info->max_h_samp_factor * DCTSIZE));
+ info->output_height =
+ info->crop_height + (yoffset % (info->max_v_samp_factor * DCTSIZE));
+ /* Save x/y offsets measured in iMCUs */
+ info->x_crop_offset = xoffset / (info->max_h_samp_factor * DCTSIZE);
+ info->y_crop_offset = yoffset / (info->max_v_samp_factor * DCTSIZE);
+ } else {
+ info->x_crop_offset = 0;
+ info->y_crop_offset = 0;
+ }
+ /* Figure out whether we need workspace arrays,
+ * and if so whether they are transposed relative to the source.
+ */
+ need_workspace = FALSE;
+ transpose_it = FALSE;
switch (info->transform) {
case JXFORM_NONE:
+ if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
+ need_workspace = TRUE;
+ /* No workspace needed if neither cropping nor transforming */
+ break;
case JXFORM_FLIP_H:
- /* Don't need a workspace array */
+ if (info->trim)
+ trim_right_edge(info, srcinfo->image_width);
+ if (info->y_crop_offset != 0)
+ need_workspace = TRUE;
+ /* do_flip_h_no_crop doesn't need a workspace array */
break;
case JXFORM_FLIP_V:
- case JXFORM_ROT_180:
- /* Need workspace arrays having same dimensions as source image.
- * Note that we allocate arrays padded out to the next iMCU boundary,
- * so that transform routines need not worry about missing edge blocks.
- */
- coef_arrays = (jvirt_barray_ptr *)
- (*srcinfo->mem->alloc_small) ((j_common_ptr) srcinfo, JPOOL_IMAGE,
- SIZEOF(jvirt_barray_ptr) * info->num_components);
- for (ci = 0; ci < info->num_components; ci++) {
- compptr = srcinfo->comp_info + ci;
- coef_arrays[ci] = (*srcinfo->mem->request_virt_barray)
- ((j_common_ptr) srcinfo, JPOOL_IMAGE, FALSE,
- (JDIMENSION) jround_up((long) compptr->width_in_blocks,
- (long) compptr->h_samp_factor),
- (JDIMENSION) jround_up((long) compptr->height_in_blocks,
- (long) compptr->v_samp_factor),
- (JDIMENSION) compptr->v_samp_factor);
- }
+ if (info->trim)
+ trim_bottom_edge(info, srcinfo->image_height);
+ /* Need workspace arrays having same dimensions as source image. */
+ need_workspace = TRUE;
break;
case JXFORM_TRANSPOSE:
+ /* transpose does NOT have to trim anything */
+ /* Need workspace arrays having transposed dimensions. */
+ need_workspace = TRUE;
+ transpose_it = TRUE;
+ break;
case JXFORM_TRANSVERSE:
+ if (info->trim) {
+ trim_right_edge(info, srcinfo->image_height);
+ trim_bottom_edge(info, srcinfo->image_width);
+ }
+ /* Need workspace arrays having transposed dimensions. */
+ need_workspace = TRUE;
+ transpose_it = TRUE;
+ break;
case JXFORM_ROT_90:
+ if (info->trim)
+ trim_right_edge(info, srcinfo->image_height);
+ /* Need workspace arrays having transposed dimensions. */
+ need_workspace = TRUE;
+ transpose_it = TRUE;
+ break;
+ case JXFORM_ROT_180:
+ if (info->trim) {
+ trim_right_edge(info, srcinfo->image_width);
+ trim_bottom_edge(info, srcinfo->image_height);
+ }
+ /* Need workspace arrays having same dimensions as source image. */
+ need_workspace = TRUE;
+ break;
case JXFORM_ROT_270:
- /* Need workspace arrays having transposed dimensions.
- * Note that we allocate arrays padded out to the next iMCU boundary,
- * so that transform routines need not worry about missing edge blocks.
- */
+ if (info->trim)
+ trim_bottom_edge(info, srcinfo->image_width);
+ /* Need workspace arrays having transposed dimensions. */
+ need_workspace = TRUE;
+ transpose_it = TRUE;
+ break;
+ }
+
+ /* Allocate workspace if needed.
+ * Note that we allocate arrays padded out to the next iMCU boundary,
+ * so that transform routines need not worry about missing edge blocks.
+ */
+ if (need_workspace) {
coef_arrays = (jvirt_barray_ptr *)
(*srcinfo->mem->alloc_small) ((j_common_ptr) srcinfo, JPOOL_IMAGE,
- SIZEOF(jvirt_barray_ptr) * info->num_components);
+ SIZEOF(jvirt_barray_ptr) * info->num_components);
+ width_in_iMCUs = (JDIMENSION)
+ jdiv_round_up((long) info->output_width,
+ (long) (info->max_h_samp_factor * DCTSIZE));
+ height_in_iMCUs = (JDIMENSION)
+ jdiv_round_up((long) info->output_height,
+ (long) (info->max_v_samp_factor * DCTSIZE));
for (ci = 0; ci < info->num_components; ci++) {
compptr = srcinfo->comp_info + ci;
+ if (info->num_components == 1) {
+ /* we're going to force samp factors to 1x1 in this case */
+ h_samp_factor = v_samp_factor = 1;
+ } else if (transpose_it) {
+ h_samp_factor = compptr->v_samp_factor;
+ v_samp_factor = compptr->h_samp_factor;
+ } else {
+ h_samp_factor = compptr->h_samp_factor;
+ v_samp_factor = compptr->v_samp_factor;
+ }
+ width_in_blocks = width_in_iMCUs * h_samp_factor;
+ height_in_blocks = height_in_iMCUs * v_samp_factor;
coef_arrays[ci] = (*srcinfo->mem->request_virt_barray)
((j_common_ptr) srcinfo, JPOOL_IMAGE, FALSE,
- (JDIMENSION) jround_up((long) compptr->height_in_blocks,
- (long) compptr->v_samp_factor),
- (JDIMENSION) jround_up((long) compptr->width_in_blocks,
- (long) compptr->h_samp_factor),
- (JDIMENSION) compptr->h_samp_factor);
+ width_in_blocks, height_in_blocks, (JDIMENSION) v_samp_factor);
}
- break;
}
+
info->workspace_coef_arrays = coef_arrays;
}
@@ -642,14 +1062,8 @@ transpose_critical_parameters (j_compress_ptr dstinfo)
int tblno, i, j, ci, itemp;
jpeg_component_info *compptr;
JQUANT_TBL *qtblptr;
- JDIMENSION dtemp;
UINT16 qtemp;
- /* Transpose basic image dimensions */
- dtemp = dstinfo->image_width;
- dstinfo->image_width = dstinfo->image_height;
- dstinfo->image_height = dtemp;
-
/* Transpose sampling factors */
for (ci = 0; ci < dstinfo->num_components; ci++) {
compptr = dstinfo->comp_info + ci;
@@ -674,46 +1088,159 @@ transpose_critical_parameters (j_compress_ptr dstinfo)
}
-/* Trim off any partial iMCUs on the indicated destination edge */
+/* Adjust Exif image parameters.
+ *
+ * We try to adjust the Tags ExifImageWidth and ExifImageHeight if possible.
+ */
LOCAL(void)
-trim_right_edge (j_compress_ptr dstinfo)
+adjust_exif_parameters (JOCTET FAR * data, unsigned int length,
+ JDIMENSION new_width, JDIMENSION new_height)
{
- int ci, max_h_samp_factor;
- JDIMENSION MCU_cols;
+ boolean is_motorola; /* Flag for byte order */
+ unsigned int number_of_tags, tagnum;
+ unsigned int firstoffset, offset;
+ JDIMENSION new_value;
+
+ if (length < 12) return; /* Length of an IFD entry */
+
+ /* Discover byte order */
+ if (GETJOCTET(data[0]) == 0x49 && GETJOCTET(data[1]) == 0x49)
+ is_motorola = FALSE;
+ else if (GETJOCTET(data[0]) == 0x4D && GETJOCTET(data[1]) == 0x4D)
+ is_motorola = TRUE;
+ else
+ return;
+
+ /* Check Tag Mark */
+ if (is_motorola) {
+ if (GETJOCTET(data[2]) != 0) return;
+ if (GETJOCTET(data[3]) != 0x2A) return;
+ } else {
+ if (GETJOCTET(data[3]) != 0) return;
+ if (GETJOCTET(data[2]) != 0x2A) return;
+ }
- /* We have to compute max_h_samp_factor ourselves,
- * because it hasn't been set yet in the destination
- * (and we don't want to use the source's value).
- */
- max_h_samp_factor = 1;
- for (ci = 0; ci < dstinfo->num_components; ci++) {
- int h_samp_factor = dstinfo->comp_info[ci].h_samp_factor;
- max_h_samp_factor = MAX(max_h_samp_factor, h_samp_factor);
+ /* Get first IFD offset (offset to IFD0) */
+ if (is_motorola) {
+ if (GETJOCTET(data[4]) != 0) return;
+ if (GETJOCTET(data[5]) != 0) return;
+ firstoffset = GETJOCTET(data[6]);
+ firstoffset <<= 8;
+ firstoffset += GETJOCTET(data[7]);
+ } else {
+ if (GETJOCTET(data[7]) != 0) return;
+ if (GETJOCTET(data[6]) != 0) return;
+ firstoffset = GETJOCTET(data[5]);
+ firstoffset <<= 8;
+ firstoffset += GETJOCTET(data[4]);
}
- MCU_cols = dstinfo->image_width / (max_h_samp_factor * DCTSIZE);
- if (MCU_cols > 0) /* can't trim to 0 pixels */
- dstinfo->image_width = MCU_cols * (max_h_samp_factor * DCTSIZE);
-}
+ if (firstoffset > length - 2) return; /* check end of data segment */
-LOCAL(void)
-trim_bottom_edge (j_compress_ptr dstinfo)
-{
- int ci, max_v_samp_factor;
- JDIMENSION MCU_rows;
+ /* Get the number of directory entries contained in this IFD */
+ if (is_motorola) {
+ number_of_tags = GETJOCTET(data[firstoffset]);
+ number_of_tags <<= 8;
+ number_of_tags += GETJOCTET(data[firstoffset+1]);
+ } else {
+ number_of_tags = GETJOCTET(data[firstoffset+1]);
+ number_of_tags <<= 8;
+ number_of_tags += GETJOCTET(data[firstoffset]);
+ }
+ if (number_of_tags == 0) return;
+ firstoffset += 2;
+
+ /* Search for ExifSubIFD offset Tag in IFD0 */
+ for (;;) {
+ if (firstoffset > length - 12) return; /* check end of data segment */
+ /* Get Tag number */
+ if (is_motorola) {
+ tagnum = GETJOCTET(data[firstoffset]);
+ tagnum <<= 8;
+ tagnum += GETJOCTET(data[firstoffset+1]);
+ } else {
+ tagnum = GETJOCTET(data[firstoffset+1]);
+ tagnum <<= 8;
+ tagnum += GETJOCTET(data[firstoffset]);
+ }
+ if (tagnum == 0x8769) break; /* found ExifSubIFD offset Tag */
+ if (--number_of_tags == 0) return;
+ firstoffset += 12;
+ }
- /* We have to compute max_v_samp_factor ourselves,
- * because it hasn't been set yet in the destination
- * (and we don't want to use the source's value).
- */
- max_v_samp_factor = 1;
- for (ci = 0; ci < dstinfo->num_components; ci++) {
- int v_samp_factor = dstinfo->comp_info[ci].v_samp_factor;
- max_v_samp_factor = MAX(max_v_samp_factor, v_samp_factor);
+ /* Get the ExifSubIFD offset */
+ if (is_motorola) {
+ if (GETJOCTET(data[firstoffset+8]) != 0) return;
+ if (GETJOCTET(data[firstoffset+9]) != 0) return;
+ offset = GETJOCTET(data[firstoffset+10]);
+ offset <<= 8;
+ offset += GETJOCTET(data[firstoffset+11]);
+ } else {
+ if (GETJOCTET(data[firstoffset+11]) != 0) return;
+ if (GETJOCTET(data[firstoffset+10]) != 0) return;
+ offset = GETJOCTET(data[firstoffset+9]);
+ offset <<= 8;
+ offset += GETJOCTET(data[firstoffset+8]);
}
- MCU_rows = dstinfo->image_height / (max_v_samp_factor * DCTSIZE);
- if (MCU_rows > 0) /* can't trim to 0 pixels */
- dstinfo->image_height = MCU_rows * (max_v_samp_factor * DCTSIZE);
+ if (offset > length - 2) return; /* check end of data segment */
+
+ /* Get the number of directory entries contained in this SubIFD */
+ if (is_motorola) {
+ number_of_tags = GETJOCTET(data[offset]);
+ number_of_tags <<= 8;
+ number_of_tags += GETJOCTET(data[offset+1]);
+ } else {
+ number_of_tags = GETJOCTET(data[offset+1]);
+ number_of_tags <<= 8;
+ number_of_tags += GETJOCTET(data[offset]);
+ }
+ if (number_of_tags < 2) return;
+ offset += 2;
+
+ /* Search for ExifImageWidth and ExifImageHeight Tags in this SubIFD */
+ do {
+ if (offset > length - 12) return; /* check end of data segment */
+ /* Get Tag number */
+ if (is_motorola) {
+ tagnum = GETJOCTET(data[offset]);
+ tagnum <<= 8;
+ tagnum += GETJOCTET(data[offset+1]);
+ } else {
+ tagnum = GETJOCTET(data[offset+1]);
+ tagnum <<= 8;
+ tagnum += GETJOCTET(data[offset]);
+ }
+ if (tagnum == 0xA002 || tagnum == 0xA003) {
+ if (tagnum == 0xA002)
+ new_value = new_width; /* ExifImageWidth Tag */
+ else
+ new_value = new_height; /* ExifImageHeight Tag */
+ if (is_motorola) {
+ data[offset+2] = 0; /* Format = unsigned long (4 octets) */
+ data[offset+3] = 4;
+ data[offset+4] = 0; /* Number Of Components = 1 */
+ data[offset+5] = 0;
+ data[offset+6] = 0;
+ data[offset+7] = 1;
+ data[offset+8] = 0;
+ data[offset+9] = 0;
+ data[offset+10] = (JOCTET)((new_value >> 8) & 0xFF);
+ data[offset+11] = (JOCTET)(new_value & 0xFF);
+ } else {
+ data[offset+2] = 4; /* Format = unsigned long (4 octets) */
+ data[offset+3] = 0;
+ data[offset+4] = 1; /* Number Of Components = 1 */
+ data[offset+5] = 0;
+ data[offset+6] = 0;
+ data[offset+7] = 0;
+ data[offset+8] = (JOCTET)(new_value & 0xFF);
+ data[offset+9] = (JOCTET)((new_value >> 8) & 0xFF);
+ data[offset+10] = 0;
+ data[offset+11] = 0;
+ }
+ }
+ offset += 12;
+ } while (--number_of_tags);
}
@@ -736,18 +1263,22 @@ jtransform_adjust_parameters (j_decompress_ptr srcinfo,
{
/* If force-to-grayscale is requested, adjust destination parameters */
if (info->force_grayscale) {
- /* We use jpeg_set_colorspace to make sure subsidiary settings get fixed
- * properly. Among other things, the target h_samp_factor & v_samp_factor
- * will get set to 1, which typically won't match the source.
- * In fact we do this even if the source is already grayscale; that
- * provides an easy way of coercing a grayscale JPEG with funny sampling
- * factors to the customary 1,1. (Some decoders fail on other factors.)
+ /* First, ensure we have YCbCr or grayscale data, and that the source's
+ * Y channel is full resolution. (No reasonable person would make Y
+ * be less than full resolution, so actually coping with that case
+ * isn't worth extra code space. But we check it to avoid crashing.)
*/
- if ((dstinfo->jpeg_color_space == JCS_YCbCr &&
- dstinfo->num_components == 3) ||
- (dstinfo->jpeg_color_space == JCS_GRAYSCALE &&
- dstinfo->num_components == 1)) {
- /* We have to preserve the source's quantization table number. */
+ if (((dstinfo->jpeg_color_space == JCS_YCbCr &&
+ dstinfo->num_components == 3) ||
+ (dstinfo->jpeg_color_space == JCS_GRAYSCALE &&
+ dstinfo->num_components == 1)) &&
+ srcinfo->comp_info[0].h_samp_factor == srcinfo->max_h_samp_factor &&
+ srcinfo->comp_info[0].v_samp_factor == srcinfo->max_v_samp_factor) {
+ /* We use jpeg_set_colorspace to make sure subsidiary settings get fixed
+ * properly. Among other things, it sets the target h_samp_factor &
+ * v_samp_factor to 1, which typically won't match the source.
+ * We have to preserve the source's quantization table number, however.
+ */
int sv_quant_tbl_no = dstinfo->comp_info[0].quant_tbl_no;
jpeg_set_colorspace(dstinfo, JCS_GRAYSCALE);
dstinfo->comp_info[0].quant_tbl_no = sv_quant_tbl_no;
@@ -755,50 +1286,52 @@ jtransform_adjust_parameters (j_decompress_ptr srcinfo,
/* Sorry, can't do it */
ERREXIT(dstinfo, JERR_CONVERSION_NOTIMPL);
}
+ } else if (info->num_components == 1) {
+ /* For a single-component source, we force the destination sampling factors
+ * to 1x1, with or without force_grayscale. This is useful because some
+ * decoders choke on grayscale images with other sampling factors.
+ */
+ dstinfo->comp_info[0].h_samp_factor = 1;
+ dstinfo->comp_info[0].v_samp_factor = 1;
}
- /* Correct the destination's image dimensions etc if necessary */
+ /* Correct the destination's image dimensions as necessary
+ * for crop and rotate/flip operations.
+ */
+ dstinfo->image_width = info->output_width;
+ dstinfo->image_height = info->output_height;
+
+ /* Transpose destination image parameters */
switch (info->transform) {
- case JXFORM_NONE:
- /* Nothing to do */
- break;
- case JXFORM_FLIP_H:
- if (info->trim)
- trim_right_edge(dstinfo);
- break;
- case JXFORM_FLIP_V:
- if (info->trim)
- trim_bottom_edge(dstinfo);
- break;
case JXFORM_TRANSPOSE:
- transpose_critical_parameters(dstinfo);
- /* transpose does NOT have to trim anything */
- break;
case JXFORM_TRANSVERSE:
- transpose_critical_parameters(dstinfo);
- if (info->trim) {
- trim_right_edge(dstinfo);
- trim_bottom_edge(dstinfo);
- }
- break;
case JXFORM_ROT_90:
- transpose_critical_parameters(dstinfo);
- if (info->trim)
- trim_right_edge(dstinfo);
- break;
- case JXFORM_ROT_180:
- if (info->trim) {
- trim_right_edge(dstinfo);
- trim_bottom_edge(dstinfo);
- }
- break;
case JXFORM_ROT_270:
transpose_critical_parameters(dstinfo);
- if (info->trim)
- trim_bottom_edge(dstinfo);
break;
}
+ /* Adjust Exif properties */
+ if (srcinfo->marker_list != NULL &&
+ srcinfo->marker_list->marker == JPEG_APP0+1 &&
+ srcinfo->marker_list->data_length >= 6 &&
+ GETJOCTET(srcinfo->marker_list->data[0]) == 0x45 &&
+ GETJOCTET(srcinfo->marker_list->data[1]) == 0x78 &&
+ GETJOCTET(srcinfo->marker_list->data[2]) == 0x69 &&
+ GETJOCTET(srcinfo->marker_list->data[3]) == 0x66 &&
+ GETJOCTET(srcinfo->marker_list->data[4]) == 0 &&
+ GETJOCTET(srcinfo->marker_list->data[5]) == 0) {
+ /* Suppress output of JFIF marker */
+ dstinfo->write_JFIF_header = FALSE;
+ /* Adjust Exif image parameters */
+ if (dstinfo->image_width != srcinfo->image_width ||
+ dstinfo->image_height != srcinfo->image_height)
+ /* Align data segment to start of TIFF structure for parsing */
+ adjust_exif_parameters(srcinfo->marker_list->data + 6,
+ srcinfo->marker_list->data_length - 6,
+ dstinfo->image_width, dstinfo->image_height);
+ }
+
/* Return the appropriate output data set */
if (info->workspace_coef_arrays != NULL)
return info->workspace_coef_arrays;
@@ -816,40 +1349,108 @@ jtransform_adjust_parameters (j_decompress_ptr srcinfo,
*/
GLOBAL(void)
-jtransform_execute_transformation (j_decompress_ptr srcinfo,
- j_compress_ptr dstinfo,
- jvirt_barray_ptr *src_coef_arrays,
- jpeg_transform_info *info)
+jtransform_execute_transform (j_decompress_ptr srcinfo,
+ j_compress_ptr dstinfo,
+ jvirt_barray_ptr *src_coef_arrays,
+ jpeg_transform_info *info)
{
jvirt_barray_ptr *dst_coef_arrays = info->workspace_coef_arrays;
+ /* Note: conditions tested here should match those in switch statement
+ * in jtransform_request_workspace()
+ */
switch (info->transform) {
case JXFORM_NONE:
+ if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
+ do_crop(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+ src_coef_arrays, dst_coef_arrays);
break;
case JXFORM_FLIP_H:
- do_flip_h(srcinfo, dstinfo, src_coef_arrays);
+ if (info->y_crop_offset != 0)
+ do_flip_h(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+ src_coef_arrays, dst_coef_arrays);
+ else
+ do_flip_h_no_crop(srcinfo, dstinfo, info->x_crop_offset,
+ src_coef_arrays);
break;
case JXFORM_FLIP_V:
- do_flip_v(srcinfo, dstinfo, src_coef_arrays, dst_coef_arrays);
+ do_flip_v(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+ src_coef_arrays, dst_coef_arrays);
break;
case JXFORM_TRANSPOSE:
- do_transpose(srcinfo, dstinfo, src_coef_arrays, dst_coef_arrays);
+ do_transpose(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+ src_coef_arrays, dst_coef_arrays);
break;
case JXFORM_TRANSVERSE:
- do_transverse(srcinfo, dstinfo, src_coef_arrays, dst_coef_arrays);
+ do_transverse(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+ src_coef_arrays, dst_coef_arrays);
break;
case JXFORM_ROT_90:
- do_rot_90(srcinfo, dstinfo, src_coef_arrays, dst_coef_arrays);
+ do_rot_90(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+ src_coef_arrays, dst_coef_arrays);
break;
case JXFORM_ROT_180:
- do_rot_180(srcinfo, dstinfo, src_coef_arrays, dst_coef_arrays);
+ do_rot_180(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+ src_coef_arrays, dst_coef_arrays);
break;
case JXFORM_ROT_270:
- do_rot_270(srcinfo, dstinfo, src_coef_arrays, dst_coef_arrays);
+ do_rot_270(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+ src_coef_arrays, dst_coef_arrays);
break;
}
}
+/* jtransform_perfect_transform
+ *
+ * Determine whether lossless transformation is perfectly
+ * possible for a specified image and transformation.
+ *
+ * Inputs:
+ * image_width, image_height: source image dimensions.
+ * MCU_width, MCU_height: pixel dimensions of MCU.
+ * transform: transformation identifier.
+ * Parameter sources from initialized jpeg_struct
+ * (after reading source header):
+ * image_width = cinfo.image_width
+ * image_height = cinfo.image_height
+ * MCU_width = cinfo.max_h_samp_factor * DCTSIZE
+ * MCU_height = cinfo.max_v_samp_factor * DCTSIZE
+ * Result:
+ * TRUE = perfect transformation possible
+ * FALSE = perfect transformation not possible
+ * (may use custom action then)
+ */
+
+GLOBAL(boolean)
+jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
+ int MCU_width, int MCU_height,
+ JXFORM_CODE transform)
+{
+ boolean result = TRUE; /* initialize TRUE */
+
+ switch (transform) {
+ case JXFORM_FLIP_H:
+ case JXFORM_ROT_270:
+ if (image_width % (JDIMENSION) MCU_width)
+ result = FALSE;
+ break;
+ case JXFORM_FLIP_V:
+ case JXFORM_ROT_90:
+ if (image_height % (JDIMENSION) MCU_height)
+ result = FALSE;
+ break;
+ case JXFORM_TRANSVERSE:
+ case JXFORM_ROT_180:
+ if (image_width % (JDIMENSION) MCU_width)
+ result = FALSE;
+ if (image_height % (JDIMENSION) MCU_height)
+ result = FALSE;
+ break;
+ }
+
+ return result;
+}
+
#endif /* TRANSFORMS_SUPPORTED */
diff --git a/transupp.h b/transupp.h
index 5c2d32a..981b1ce 100644
--- a/transupp.h
+++ b/transupp.h
@@ -1,7 +1,7 @@
/*
* transupp.h
*
- * Copyright (C) 1997, Thomas G. Lane.
+ * Copyright (C) 1997-2001, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@@ -22,32 +22,6 @@
#define TRANSFORMS_SUPPORTED 1 /* 0 disables transform code */
#endif
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jtransform_request_workspace jTrRequest
-#define jtransform_adjust_parameters jTrAdjust
-#define jtransform_execute_transformation jTrExec
-#define jcopy_markers_setup jCMrkSetup
-#define jcopy_markers_execute jCMrkExec
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
-/*
- * Codes for supported types of image transformations.
- */
-
-typedef enum {
- JXFORM_NONE, /* no transformation */
- JXFORM_FLIP_H, /* horizontal flip */
- JXFORM_FLIP_V, /* vertical flip */
- JXFORM_TRANSPOSE, /* transpose across UL-to-LR axis */
- JXFORM_TRANSVERSE, /* transpose across UR-to-LL axis */
- JXFORM_ROT_90, /* 90-degree clockwise rotation */
- JXFORM_ROT_180, /* 180-degree rotation */
- JXFORM_ROT_270 /* 270-degree clockwise (or 90 ccw) */
-} JXFORM_CODE;
-
/*
* Although rotating and flipping data expressed as DCT coefficients is not
* hard, there is an asymmetry in the JPEG format specification for images
@@ -75,6 +49,19 @@ typedef enum {
* (For example, -rot 270 -trim trims only the bottom edge, but -rot 90 -trim
* followed by -rot 180 -trim trims both edges.)
*
+ * We also offer a lossless-crop option, which discards data outside a given
+ * image region but losslessly preserves what is inside. Like the rotate and
+ * flip transforms, lossless crop is restricted by the JPEG format: the upper
+ * left corner of the selected region must fall on an iMCU boundary. If this
+ * does not hold for the given crop parameters, we silently move the upper left
+ * corner up and/or left to make it so, simultaneously increasing the region
+ * dimensions to keep the lower right crop corner unchanged. (Thus, the
+ * output image covers at least the requested region, but may cover more.)
+ *
+ * If both crop and a rotate/flip transform are requested, the crop is applied
+ * last --- that is, the crop region is specified in terms of the destination
+ * image.
+ *
* We also offer a "force to grayscale" option, which simply discards the
* chrominance channels of a YCbCr image. This is lossless in the sense that
* the luminance channel is preserved exactly. It's not the same kind of
@@ -83,20 +70,89 @@ typedef enum {
* be aware of the option to know how many components to work on.
*/
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jtransform_parse_crop_spec jTrParCrop
+#define jtransform_request_workspace jTrRequest
+#define jtransform_adjust_parameters jTrAdjust
+#define jtransform_execute_transform jTrExec
+#define jtransform_perfect_transform jTrPerfect
+#define jcopy_markers_setup jCMrkSetup
+#define jcopy_markers_execute jCMrkExec
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/*
+ * Codes for supported types of image transformations.
+ */
+
+typedef enum {
+ JXFORM_NONE, /* no transformation */
+ JXFORM_FLIP_H, /* horizontal flip */
+ JXFORM_FLIP_V, /* vertical flip */
+ JXFORM_TRANSPOSE, /* transpose across UL-to-LR axis */
+ JXFORM_TRANSVERSE, /* transpose across UR-to-LL axis */
+ JXFORM_ROT_90, /* 90-degree clockwise rotation */
+ JXFORM_ROT_180, /* 180-degree rotation */
+ JXFORM_ROT_270 /* 270-degree clockwise (or 90 ccw) */
+} JXFORM_CODE;
+
+/*
+ * Codes for crop parameters, which can individually be unspecified,
+ * positive, or negative. (Negative width or height makes no sense, though.)
+ */
+
+typedef enum {
+ JCROP_UNSET,
+ JCROP_POS,
+ JCROP_NEG
+} JCROP_CODE;
+
+/*
+ * Transform parameters struct.
+ * NB: application must not change any elements of this struct after
+ * calling jtransform_request_workspace.
+ */
+
typedef struct {
/* Options: set by caller */
JXFORM_CODE transform; /* image transform operator */
+ boolean perfect; /* if TRUE, fail if partial MCUs are requested */
boolean trim; /* if TRUE, trim partial MCUs as needed */
boolean force_grayscale; /* if TRUE, convert color image to grayscale */
+ boolean crop; /* if TRUE, crop source image */
+
+ /* Crop parameters: application need not set these unless crop is TRUE.
+ * These can be filled in by jtransform_parse_crop_spec().
+ */
+ JDIMENSION crop_width; /* Width of selected region */
+ JCROP_CODE crop_width_set;
+ JDIMENSION crop_height; /* Height of selected region */
+ JCROP_CODE crop_height_set;
+ JDIMENSION crop_xoffset; /* X offset of selected region */
+ JCROP_CODE crop_xoffset_set; /* (negative measures from right edge) */
+ JDIMENSION crop_yoffset; /* Y offset of selected region */
+ JCROP_CODE crop_yoffset_set; /* (negative measures from bottom edge) */
/* Internal workspace: caller should not touch these */
int num_components; /* # of components in workspace */
jvirt_barray_ptr * workspace_coef_arrays; /* workspace for transformations */
+ JDIMENSION output_width; /* cropped destination dimensions */
+ JDIMENSION output_height;
+ JDIMENSION x_crop_offset; /* destination crop offsets measured in iMCUs */
+ JDIMENSION y_crop_offset;
+ int max_h_samp_factor; /* destination iMCU size */
+ int max_v_samp_factor;
} jpeg_transform_info;
#if TRANSFORMS_SUPPORTED
+/* Parse a crop specification (written in X11 geometry style) */
+EXTERN(boolean) jtransform_parse_crop_spec
+ JPP((jpeg_transform_info *info, const char *spec));
/* Request any required workspace */
EXTERN(void) jtransform_request_workspace
JPP((j_decompress_ptr srcinfo, jpeg_transform_info *info));
@@ -106,10 +162,24 @@ EXTERN(jvirt_barray_ptr *) jtransform_adjust_parameters
jvirt_barray_ptr *src_coef_arrays,
jpeg_transform_info *info));
/* Execute the actual transformation, if any */
-EXTERN(void) jtransform_execute_transformation
+EXTERN(void) jtransform_execute_transform
JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
jvirt_barray_ptr *src_coef_arrays,
jpeg_transform_info *info));
+/* Determine whether lossless transformation is perfectly
+ * possible for a specified image and transformation.
+ */
+EXTERN(boolean) jtransform_perfect_transform
+ JPP((JDIMENSION image_width, JDIMENSION image_height,
+ int MCU_width, int MCU_height,
+ JXFORM_CODE transform));
+
+/* jtransform_execute_transform used to be called
+ * jtransform_execute_transformation, but some compilers complain about
+ * routine names that long. This macro is here to avoid breaking any
+ * old source code that uses the original name...
+ */
+#define jtransform_execute_transformation jtransform_execute_transform
#endif /* TRANSFORMS_SUPPORTED */