diff options
author | Michael Vrhel <michael.vrhel@artifex.com> | 2011-04-13 22:17:12 +0000 |
---|---|---|
committer | Michael Vrhel <michael.vrhel@artifex.com> | 2011-04-13 22:17:12 +0000 |
commit | 4a3666ee2833e877a26ef75fece00fd6fad14c93 (patch) | |
tree | 6197e8aec27f50fda03d9675a582937a10c5e912 | |
parent | 3791878570227c7f5f1c07891c066fab3db2e5d3 (diff) | |
download | ghostpdl-4a3666ee2833e877a26ef75fece00fd6fad14c93.tar.gz |
Reorganization of thresholding code to make it easier to get the threshold operation working for color images as input as well as the case when we go to a cmyk planar device.
git-svn-id: http://svn.ghostscript.com/ghostscript/trunk@12396 a1074d23-0009-0410-80fe-cf8c14f379e6
-rw-r--r-- | gs/base/gxht_thresh.c | 609 | ||||
-rw-r--r-- | gs/base/gxht_thresh.h | 5 | ||||
-rw-r--r-- | gs/base/gxicolor.c | 20 | ||||
-rw-r--r-- | gs/base/gximono.c | 361 | ||||
-rw-r--r-- | gs/base/lib.mak | 2 |
5 files changed, 493 insertions, 504 deletions
diff --git a/gs/base/gxht_thresh.c b/gs/base/gxht_thresh.c index 47b398d3d..a63570cf6 100644 --- a/gs/base/gxht_thresh.c +++ b/gs/base/gxht_thresh.c @@ -37,6 +37,7 @@ #else #define __align16 __declspec(align(16)) #endif +#define fastfloor(x) (((int)(x)) - (((x)<0) && ((x) != (float)(int)(x))))
#ifdef HAVE_SSE2 @@ -357,159 +358,455 @@ gxht_thresh_image_init(gx_image_enum *penum) int code = 0; fixed ox, oy; int temp; - int dev_width, max_height; - int spp_out; - - if (gx_device_must_halftone(penum->dev)) { - if (penum->pis != NULL && penum->pis->dev_ht != NULL) { - gx_ht_order *d_order = &(penum->pis->dev_ht->components[0].corder); - code = gx_ht_construct_threshold(d_order, penum->dev, penum->pis, 0); - } else { - return -1; - } - } - spp_out = penum->dev->color_info.num_components; - /* If the image is landscaped then we want to maintain a buffer - that is sufficiently large so that we can hold a byte - of halftoned data along the column. This way we avoid doing - multiple writes into the same position over and over. - The size of the buffer we need depends upon the bitdepth of - the output device, the number of device coloranants and the - number of colorants in the source space. Note we will - need to eventually consider multi-level halftone case - here too. For now, to make use of the SSE2 stuff, we would - like to have 16 bytes of data to process at a time. So we - will collect the columns of data in a buffer that is 16 wide. - We will also keep track of the widths of each column. When - the total width count reaches 16, we will create our - threshold array and apply it. We may have one column that is - buffered between calls in this case. Also if a call is made - with h=0 we will flush the buffer as we are at the end of the - data. */ - if (penum->posture == image_landscape) { - int col_length = - fixed2int_var_rounded(any_abs(penum->x_extent.y)) * spp_out; - ox = dda_current(penum->dda.pixel0.x); - oy = dda_current(penum->dda.pixel0.y); - temp = (int) ceil((float) col_length/16.0); - penum->line_size = temp * 16; /* The stride */ - /* Now we need at most 16 of these */ - penum->line = gs_alloc_bytes(penum->memory, - 16 * penum->line_size + 16, - "gxht_thresh"); - /* Same with this */ - penum->thresh_buffer = gs_alloc_bytes(penum->memory, - penum->line_size * 16 + 16, - "gxht_thresh"); - /* That maps into 2 bytes of Halftone data */ - penum->ht_buffer = gs_alloc_bytes(penum->memory, - penum->line_size * 2, - "gxht_thresh"); - penum->ht_stride = penum->line_size; - if (penum->line == NULL || penum->thresh_buffer == NULL - || penum->ht_buffer == NULL) - return -1; - penum->ht_landscape.count = 0; - penum->ht_landscape.num_contones = 0; - if (penum->y_extent.x < 0) { - /* Going right to left */ - penum->ht_landscape.curr_pos = 15; - penum->ht_landscape.index = -1; - } else { - /* Going left to right */ - penum->ht_landscape.curr_pos = 0; - penum->ht_landscape.index = 1; - } - if (penum->x_extent.y < 0) { - penum->ht_landscape.flipy = true; - penum->ht_landscape.y_pos = - fixed2int_pixround_perfect(dda_current(penum->dda.pixel0.y) + penum->x_extent.y); - } else { - penum->ht_landscape.flipy = false; - penum->ht_landscape.y_pos = - fixed2int_pixround_perfect(dda_current(penum->dda.pixel0.y)); - } - memset(&(penum->ht_landscape.widths[0]), 0, sizeof(int)*16); - penum->ht_landscape.offset_set = false; - penum->ht_offset_bits = 0; /* Will get set in call to render */ - if (code >= 0) { -#if defined(DEBUG) || defined(PACIFY_VALGRIND) - memset(penum->line, 0, 16 * penum->line_size + 16); - memset(penum->ht_buffer, 0, penum->line_size * 2); - memset(penum->thresh_buffer, 0, 16 * penum->line_size + 16); -#endif - } - } else { - /* In the portrait case we allocate a single line buffer - in device width, a threshold buffer of the same size - and possibly wider and the buffer for the halftoned - bits. We have to do a bit of work to enable 16 byte - boundary after an offset to ensure that we can make use - of the SSE2 operations for thresholding. We do the - allocations now to avoid doing them with every line */ - /* Initialize the ht_landscape stuff to zero */ - memset(&(penum->ht_landscape), 0, sizeof(ht_landscape_info_t)); - ox = dda_current(penum->dda.pixel0.x); - oy = dda_current(penum->dda.pixel0.y); - dev_width = - (int) fabs((long) fixed2long_pixround(ox + penum->x_extent.x) - - fixed2long_pixround(ox)); - /* Get the bit position so that we can do a copy_mono for - the left remainder and then 16 bit aligned copies for the - rest. The right remainder will be OK as it will land in - the MSBit positions. Note the #define chunk bits16 in - gdevm1.c. Allow also for a 15 sample over run. - */ - penum->ht_offset_bits = (-fixed2int_var_pixround(ox)) & 15; - if (penum->ht_offset_bits > 0) { - penum->ht_stride = ((7 + (dev_width + 4) * spp_out) / 8) + - ARCH_SIZEOF_LONG; - } else { - penum->ht_stride = ((7 + (dev_width + 2) * spp_out) / 8) + - ARCH_SIZEOF_LONG; - } - /* We want to figure out the maximum height that we may - have in taking a single source row and going to device - space */ - max_height = (int) ceil(fixed2float(any_abs(penum->dst_height)) / - (float) penum->Height); - penum->ht_buffer = gs_alloc_bytes(penum->memory, - penum->ht_stride * max_height, - "gxht_thresh"); - /* We want to have 128 bit alignement for our contone and - threshold strips so that we can use SSE operations - in the threshold operation. Add in a minor buffer and offset - to ensure this. If gs_alloc_bytes provides at least 16 - bit alignment so we may need to move 14 bytes. However, the - HT process is split in two operations. One that involves - the HT of a left remainder and the rest which ensures that - we pack in the HT data in the bits with no skew for a fast - copy into the gdevm1 device (16 bit copies). So, we - need to account for those pixels which occur first and which - are NOT aligned for the contone buffer. After we offset - by this remainder portion we should be 128 bit aligned. - Also allow a 15 sample over run during the execution. */ - temp = (int) ceil((float) ((dev_width + 15.0) * spp_out + 15.0)/16.0); - penum->line_size = temp * 16; /* The stride */ - penum->line = gs_alloc_bytes(penum->memory, penum->line_size, - "gxht_thresh"); - penum->thresh_buffer = gs_alloc_bytes(penum->memory, - penum->line_size * max_height, - "gxht_thresh"); - if (penum->line == NULL || penum->thresh_buffer == NULL || - penum->ht_buffer == NULL) { - return -1; - } else { -#if defined(DEBUG) || defined(PACIFY_VALGRIND) - memset(penum->line, 0, penum->line_size); - memset(penum->ht_buffer, 0, - penum->ht_stride * max_height); - memset(penum->thresh_buffer, 0, - penum->line_size * max_height); -#endif - } - } - /* Precompute values needed for rasterizing. */ - penum->dxx = float2fixed(penum->matrix.xx + fixed2float(fixed_epsilon) / 2); - return code; -} + int dev_width, max_height;
+ int spp_out;
+
+ if (gx_device_must_halftone(penum->dev)) {
+ if (penum->pis != NULL && penum->pis->dev_ht != NULL) {
+ gx_ht_order *d_order = &(penum->pis->dev_ht->components[0].corder);
+ code = gx_ht_construct_threshold(d_order, penum->dev, penum->pis, 0);
+ } else {
+ return -1;
+ }
+ }
+ spp_out = penum->dev->color_info.num_components;
+ /* If the image is landscaped then we want to maintain a buffer
+ that is sufficiently large so that we can hold a byte
+ of halftoned data along the column. This way we avoid doing
+ multiple writes into the same position over and over.
+ The size of the buffer we need depends upon the bitdepth of
+ the output device, the number of device coloranants and the
+ number of colorants in the source space. Note we will
+ need to eventually consider multi-level halftone case
+ here too. For now, to make use of the SSE2 stuff, we would
+ like to have 16 bytes of data to process at a time. So we
+ will collect the columns of data in a buffer that is 16 wide.
+ We will also keep track of the widths of each column. When
+ the total width count reaches 16, we will create our
+ threshold array and apply it. We may have one column that is
+ buffered between calls in this case. Also if a call is made
+ with h=0 we will flush the buffer as we are at the end of the
+ data. */
+ if (penum->posture == image_landscape) {
+ int col_length =
+ fixed2int_var_rounded(any_abs(penum->x_extent.y)) * spp_out;
+ ox = dda_current(penum->dda.pixel0.x);
+ oy = dda_current(penum->dda.pixel0.y);
+ temp = (int) ceil((float) col_length/16.0);
+ penum->line_size = temp * 16; /* The stride */
+ /* Now we need at most 16 of these */
+ penum->line = gs_alloc_bytes(penum->memory,
+ 16 * penum->line_size + 16,
+ "gxht_thresh");
+ /* Same with this */
+ penum->thresh_buffer = gs_alloc_bytes(penum->memory,
+ penum->line_size * 16 + 16,
+ "gxht_thresh");
+ /* That maps into 2 bytes of Halftone data */
+ penum->ht_buffer = gs_alloc_bytes(penum->memory,
+ penum->line_size * 2,
+ "gxht_thresh");
+ penum->ht_stride = penum->line_size;
+ if (penum->line == NULL || penum->thresh_buffer == NULL
+ || penum->ht_buffer == NULL)
+ return -1;
+ penum->ht_landscape.count = 0;
+ penum->ht_landscape.num_contones = 0;
+ if (penum->y_extent.x < 0) {
+ /* Going right to left */
+ penum->ht_landscape.curr_pos = 15;
+ penum->ht_landscape.index = -1;
+ } else {
+ /* Going left to right */
+ penum->ht_landscape.curr_pos = 0;
+ penum->ht_landscape.index = 1;
+ }
+ if (penum->x_extent.y < 0) {
+ penum->ht_landscape.flipy = true;
+ penum->ht_landscape.y_pos =
+ fixed2int_pixround_perfect(dda_current(penum->dda.pixel0.y) + penum->x_extent.y);
+ } else {
+ penum->ht_landscape.flipy = false;
+ penum->ht_landscape.y_pos =
+ fixed2int_pixround_perfect(dda_current(penum->dda.pixel0.y));
+ }
+ memset(&(penum->ht_landscape.widths[0]), 0, sizeof(int)*16);
+ penum->ht_landscape.offset_set = false;
+ penum->ht_offset_bits = 0; /* Will get set in call to render */
+ if (code >= 0) {
+#if defined(DEBUG) || defined(PACIFY_VALGRIND)
+ memset(penum->line, 0, 16 * penum->line_size + 16);
+ memset(penum->ht_buffer, 0, penum->line_size * 2);
+ memset(penum->thresh_buffer, 0, 16 * penum->line_size + 16);
+#endif
+ }
+ } else {
+ /* In the portrait case we allocate a single line buffer
+ in device width, a threshold buffer of the same size
+ and possibly wider and the buffer for the halftoned
+ bits. We have to do a bit of work to enable 16 byte
+ boundary after an offset to ensure that we can make use
+ of the SSE2 operations for thresholding. We do the
+ allocations now to avoid doing them with every line */
+ /* Initialize the ht_landscape stuff to zero */
+ memset(&(penum->ht_landscape), 0, sizeof(ht_landscape_info_t));
+ ox = dda_current(penum->dda.pixel0.x);
+ oy = dda_current(penum->dda.pixel0.y);
+ dev_width =
+ (int) fabs((long) fixed2long_pixround(ox + penum->x_extent.x) -
+ fixed2long_pixround(ox));
+ /* Get the bit position so that we can do a copy_mono for
+ the left remainder and then 16 bit aligned copies for the
+ rest. The right remainder will be OK as it will land in
+ the MSBit positions. Note the #define chunk bits16 in
+ gdevm1.c. Allow also for a 15 sample over run.
+ */
+ penum->ht_offset_bits = (-fixed2int_var_pixround(ox)) & 15;
+ if (penum->ht_offset_bits > 0) {
+ penum->ht_stride = ((7 + (dev_width + 4) * spp_out) / 8) +
+ ARCH_SIZEOF_LONG;
+ } else {
+ penum->ht_stride = ((7 + (dev_width + 2) * spp_out) / 8) +
+ ARCH_SIZEOF_LONG;
+ }
+ /* We want to figure out the maximum height that we may
+ have in taking a single source row and going to device
+ space */
+ max_height = (int) ceil(fixed2float(any_abs(penum->dst_height)) /
+ (float) penum->Height);
+ penum->ht_buffer = gs_alloc_bytes(penum->memory,
+ penum->ht_stride * max_height,
+ "gxht_thresh");
+ /* We want to have 128 bit alignement for our contone and
+ threshold strips so that we can use SSE operations
+ in the threshold operation. Add in a minor buffer and offset
+ to ensure this. If gs_alloc_bytes provides at least 16
+ bit alignment so we may need to move 14 bytes. However, the
+ HT process is split in two operations. One that involves
+ the HT of a left remainder and the rest which ensures that
+ we pack in the HT data in the bits with no skew for a fast
+ copy into the gdevm1 device (16 bit copies). So, we
+ need to account for those pixels which occur first and which
+ are NOT aligned for the contone buffer. After we offset
+ by this remainder portion we should be 128 bit aligned.
+ Also allow a 15 sample over run during the execution. */
+ temp = (int) ceil((float) ((dev_width + 15.0) * spp_out + 15.0)/16.0);
+ penum->line_size = temp * 16; /* The stride */
+ penum->line = gs_alloc_bytes(penum->memory, penum->line_size,
+ "gxht_thresh");
+ penum->thresh_buffer = gs_alloc_bytes(penum->memory,
+ penum->line_size * max_height,
+ "gxht_thresh");
+ if (penum->line == NULL || penum->thresh_buffer == NULL ||
+ penum->ht_buffer == NULL) {
+ return -1;
+ } else {
+#if defined(DEBUG) || defined(PACIFY_VALGRIND)
+ memset(penum->line, 0, penum->line_size);
+ memset(penum->ht_buffer, 0,
+ penum->ht_stride * max_height);
+ memset(penum->thresh_buffer, 0,
+ penum->line_size * max_height);
+#endif
+ }
+ }
+ /* Precompute values needed for rasterizing. */
+ penum->dxx = float2fixed(penum->matrix.xx + fixed2float(fixed_epsilon) / 2);
+ return code;
+}
+
+static void
+fill_threshhold_buffer(byte *dest_strip, byte *src_strip, int src_width,
+ int left_offset, int left_width, int num_tiles,
+ int right_width)
+{
+ byte *ptr_out_temp = dest_strip;
+ int ii;
+
+ /* Left part */
+ memcpy(dest_strip, src_strip + left_offset, left_width);
+ ptr_out_temp += left_width;
+ /* Now the full parts */
+ for (ii = 0; ii < num_tiles; ii++){
+ memcpy(ptr_out_temp, src_strip, src_width);
+ ptr_out_temp += src_width;
+ }
+ /* Now the remainder */
+ memcpy(ptr_out_temp, src_strip, right_width);
+#ifdef PACIFY_VALGRIND
+ ptr_out_temp += right_width;
+ ii = (dest_strip-ptr_out_temp) & 15;
+ if (ii > 0)
+ memset(ptr_out_temp, 0, ii);
+#endif
+}
+
+/* If we are in here, we had data left over. Move it to the proper position
+ and get ht_landscape_info_t set properly */
+static void
+reset_landscape_buffer(ht_landscape_info_t *ht_landscape, byte *contone_align,
+ int data_length, int num_used)
+{
+ int k;
+ int position_curr, position_new, delta;
+ int curr_x_pos = ht_landscape->xstart;
+
+ if (ht_landscape->index < 0) {
+ /* Moving right to left, move column to far right */
+ position_curr = ht_landscape->curr_pos + 1;
+ position_new = 15;
+ delta = ht_landscape->count - num_used;
+ memset(&(ht_landscape->widths[0]), 0, sizeof(int)*16);
+ ht_landscape->widths[15] = delta;
+ ht_landscape->curr_pos = 14;
+ ht_landscape->xstart = curr_x_pos - num_used;
+ } else {
+ /* Moving left to right, move column to far left */
+ position_curr = ht_landscape->curr_pos - 1;
+ position_new = 0;
+ delta = ht_landscape->count - num_used;
+ memset(&(ht_landscape->widths[0]), 0, sizeof(int)*16);
+ ht_landscape->widths[0] = delta;
+ ht_landscape->curr_pos = 1;
+ ht_landscape->xstart = curr_x_pos + num_used;
+ }
+ ht_landscape->count = delta;
+ ht_landscape->num_contones = 1;
+ for (k = 0; k < data_length; k++) {
+ contone_align[position_new] = contone_align[position_curr];
+ position_curr += 16;
+ position_new += 16;
+ }
+}
+
+/* This performs a thresholding operation on a single plane of data and
+ performs a copy mono operation to the device */
+int
+gxht_thresh_plane(gx_image_enum *penum, gx_ht_order *d_order,
+ fixed xrun, int dest_width, int dest_height,
+ byte *thresh_align, byte *contone_align, int contone_stride,
+ gx_device * dev)
+{
+ int thresh_width, thresh_height, dx;
+ int left_rem_end, left_width, vdi;
+ int num_full_tiles, right_tile_width;
+ int k, jj, dy;
+ byte *thresh_tile;
+ int position;
+ bool replicate_tile;
+ image_posture posture = penum->posture;
+ const int y_pos = penum->yci;
+ int width;
+ byte *ptr_out, *row_ptr, *ptr_out_temp;
+ byte *threshold = d_order->threshold;
+ int init_tile, in_row_offset, ii, num_tiles, tile_remainder;
+ int offset_bits = penum->ht_offset_bits;
+ byte *halftone = penum->ht_buffer;
+ int dithered_stride = penum->ht_stride;
+
+ /* Go ahead and fill the threshold line buffer with tiled threshold values.
+ First just grab the row or column that we are going to tile with and
+ then do memcpy into the buffer */
+
+ thresh_width = d_order->width;
+ thresh_height = d_order->height;
+ /* Figure out the tile steps. Left offset, Number of tiles, Right offset. */
+ switch (posture) {
+ case image_portrait:
+ vdi = penum->hci;
+ /* Compute the tiling positions with dest_width */
+ dx = fixed2int_var(xrun) % thresh_width;
+ /* Left remainder part */
+ left_rem_end = min(dx + dest_width, thresh_width);
+ left_width = left_rem_end - dx; /* The left width of our tile part */
+ /* Now the middle part */
+ num_full_tiles =
+ (int)fastfloor((dest_width - left_width)/ (float) thresh_width);
+ /* Now the right part */
+ right_tile_width = dest_width - num_full_tiles * thresh_width -
+ left_width;
+ /* Those dimensions stay the same across the set of lines that
+ we fill in our buffer. Iterate over the vdi and fill up our
+ threshold buffer */
+ for (k = 0; k < vdi; k++) {
+ /* Get a pointer to our tile row */
+ dy = (penum->yci + k + penum->dev->band_offset_y) % thresh_height;
+ thresh_tile = threshold + d_order->width * dy;
+ /* Fill the buffer, can be multiple rows. Make sure
+ to update with stride */
+ position = contone_stride * k;
+ /* Tile into the 128 bit aligned threshold strip */
+ fill_threshhold_buffer(&(thresh_align[position]),
+ thresh_tile, thresh_width, dx, left_width,
+ num_full_tiles, right_tile_width);
+ }
+ /* Apply the threshold operation */
+#if RAW_HT_DUMP
+ gx_ht_threshold_row_byte(contone_align, thresh_align, contone_stride,
+ halftone, dithered_stride, dest_width, vdi);
+ sprintf(file_name,"HT_Portrait_%d_%dx%dx%d.raw", penum->id, dest_width,
+ dest_height, spp_out);
+ fid = fopen(file_name,"a+b");
+ fwrite(halftone,1,dest_width * vdi,fid);
+ fclose(fid);
+#else
+ if (offset_bits > dest_width)
+ offset_bits = dest_width;
+ gx_ht_threshold_row_bit(contone_align, thresh_align, contone_stride,
+ halftone, dithered_stride, dest_width, vdi,
+ offset_bits);
+ /* FIXME: An improvement here would be to generate the initial
+ * offset_bits at the correct offset within the byte so that they
+ * align with the remainder of the line. This would mean not
+ * always packing them into the first offset_bits (in MSB order)
+ * of our 16 bit word, but rather into the last offset_bits
+ * (in MSB order) (except when the entire run is small!).
+ *
+ * This would enable us to do just one aligned copy_mono call for
+ * the entire scanline. */
+ /* Now do the copy mono operation */
+ /* First the left remainder bits */
+ if (offset_bits > 0) {
+ int x_pos = fixed2int_var(xrun);
+ (*dev_proc(dev, copy_mono)) (dev, halftone, 0, dithered_stride,
+ gx_no_bitmap_id, x_pos, y_pos,
+ offset_bits, vdi,
+ (gx_color_index) 0,
+ (gx_color_index) 1);
+ }
+ if ((dest_width - offset_bits) > 0 ) {
+ /* Now the primary aligned bytes */
+ byte *curr_ptr = halftone;
+ int curr_width = dest_width - offset_bits;
+ int x_pos = fixed2int_var(xrun) + offset_bits;
+ if (offset_bits > 0) {
+ curr_ptr += 2; /* If the first 2 bytes had the left part then increment */
+ }
+ (*dev_proc(dev, copy_mono)) (dev, curr_ptr, 0, dithered_stride,
+ gx_no_bitmap_id, x_pos, y_pos,
+ curr_width, vdi,
+ (gx_color_index) 0, (gx_color_index) 1);
+ }
+#endif
+ break;
+ case image_landscape:
+ /* Go ahead and paint the chunk if we have 16 values or a partial
+ to get us in sync with the 1 bit devices 16 bit positions */
+ vdi = penum->wci;
+ while (penum->ht_landscape.count > 15 ||
+ ((penum->ht_landscape.count >= offset_bits) &&
+ penum->ht_landscape.offset_set)) {
+ /* Go ahead and 2D tile in the threshold buffer at this time */
+ /* Always work the tiling from the upper left corner of our
+ 16 columns */
+ if (penum->ht_landscape.offset_set) {
+ width = offset_bits;
+ } else {
+ width = 16;
+ }
+ if (penum->y_extent.x < 0) {
+ dx = (penum->ht_landscape.xstart - width + 1) % thresh_width;
+ } else {
+ dx = penum->ht_landscape.xstart % thresh_width;
+ }
+ dy = (penum->dev->band_offset_y + penum->ht_landscape.y_pos) % thresh_height;
+ if (dy < 0)
+ dy += thresh_height;
+ /* Left remainder part */
+ left_rem_end = min(dx + 16, thresh_width);
+ left_width = left_rem_end - dx;
+ /* Now the middle part */
+ num_full_tiles =
+ (int)fastfloor((float) (16 - left_width)/ (float) thresh_width);
+ /* Now the right part */
+ right_tile_width =
+ 16 - num_full_tiles * thresh_width - left_width;
+ /* Now loop over the y stuff */
+ ptr_out = thresh_align;
+ /* Do this in three parts. We do a top part, followed by
+ larger mem copies followed by a bottom partial. After
+ a slower initial fill we are able to do larger faster
+ expansions */
+ if (dest_height <= 2 * thresh_height) {
+ init_tile = dest_height;
+ replicate_tile = false;
+ } else {
+ init_tile = thresh_height;
+ replicate_tile = true;
+ }
+ for (jj = 0; jj < init_tile; jj++) {
+ in_row_offset = (jj + dy) % thresh_height;
+ row_ptr = threshold + in_row_offset * thresh_width;
+ ptr_out_temp = ptr_out;
+ /* Left part */
+ memcpy(ptr_out_temp, row_ptr + dx, left_width);
+ ptr_out_temp += left_width;
+ /* Now the full tiles */
+ for (ii = 0; ii < num_full_tiles; ii++) {
+ memcpy(ptr_out_temp, row_ptr, thresh_width);
+ ptr_out_temp += thresh_width;
+ }
+ /* Now the remainder */
+ memcpy(ptr_out_temp, row_ptr, right_tile_width);
+ ptr_out += 16;
+ }
+ if (replicate_tile) {
+ /* Find out how many we need to copy */
+ num_tiles =
+ (int)fastfloor((float) (dest_height - thresh_height)/ (float) thresh_height);
+ tile_remainder = dest_height - (num_tiles + 1) * thresh_height;
+ for (jj = 0; jj < num_tiles; jj ++) {
+ memcpy(ptr_out, thresh_align, 16 * thresh_height);
+ ptr_out += 16 * thresh_height;
+ }
+ /* Now fill in the remainder */
+ memcpy(ptr_out, thresh_align, 16 * tile_remainder);
+ }
+ /* Apply the threshold operation */
+ gx_ht_threshold_landscape(contone_align, thresh_align,
+ penum->ht_landscape, halftone, dest_height);
+ /* Perform the copy mono */
+ penum->ht_landscape.offset_set = false;
+ if (penum->ht_landscape.index < 0) {
+ (*dev_proc(dev, copy_mono)) (dev, halftone, 0, 2,
+ gx_no_bitmap_id,
+ penum->ht_landscape.xstart - width + 1,
+ penum->ht_landscape.y_pos,
+ width, dest_height,
+ (gx_color_index) 0,
+ (gx_color_index) 1);
+ } else {
+ (*dev_proc(dev, copy_mono)) (dev, halftone, 0, 2,
+ gx_no_bitmap_id,
+ penum->ht_landscape.xstart,
+ penum->ht_landscape.y_pos,
+ width, dest_height,
+ (gx_color_index) 0,
+ (gx_color_index) 1);
+ }
+ /* Clean up and reset our buffer. We may have a line left
+ over that has to be maintained due to line replication in the
+ resolution conversion */
+ if (width != penum->ht_landscape.count) {
+ reset_landscape_buffer(&(penum->ht_landscape), contone_align,
+ dest_height, width);
+ } else {
+ /* Reset the whole buffer */
+ penum->ht_landscape.count = 0;
+ if (penum->ht_landscape.index < 0) {
+ /* Going right to left */
+ penum->ht_landscape.curr_pos = 15;
+ } else {
+ /* Going left to right */
+ penum->ht_landscape.curr_pos = 0;
+ }
+ penum->ht_landscape.num_contones = 0;
+ memset(&(penum->ht_landscape.widths[0]), 0, sizeof(int)*16);
+ }
+ }
+ break;
+ default:
+ return gs_rethrow(-1, "Invalid orientation for thresholding");
+ }
+ return 0;
+}
diff --git a/gs/base/gxht_thresh.h b/gs/base/gxht_thresh.h index eb28cbb39..f1ae5fcb7 100644 --- a/gs/base/gxht_thresh.h +++ b/gs/base/gxht_thresh.h @@ -32,7 +32,10 @@ void gx_ht_threshold_row_bit(byte *contone, byte *threshold_strip, void gx_ht_threshold_landscape(byte *contone_align, byte *thresh_align, ht_landscape_info_t ht_landscape, byte *halftone, int data_length); - int gxht_thresh_image_init(gx_image_enum *penum); +int gxht_thresh_plane(gx_image_enum *penum, gx_ht_order *d_order,
+ fixed xrun, int dest_width, int dest_height,
+ byte *thresh_align, byte *contone_align, int contone_stride,
+ gx_device * dev); #endif /* gshtx_INCLUDED */ diff --git a/gs/base/gxicolor.c b/gs/base/gxicolor.c index 14d31311b..238aeeeb0 100644 --- a/gs/base/gxicolor.c +++ b/gs/base/gxicolor.c @@ -38,6 +38,7 @@ #include "gsicc_cms.h" #include "gxcie.h" #include "gscie.h" +#include "gzht.h" #include "gxht_thresh.h" @@ -351,11 +352,26 @@ image_color_icc_prep(gx_image_enum *penum_orig, const byte *psrc, uint w, } static int -image_render_color_thresh(gx_image_enum *penum_orig, const byte *buffer, int data_x, +image_render_color_thresh(gx_image_enum *penum, const byte *buffer, int data_x, uint w, int h, gx_device * dev) { + int code; + int spp = penum->spp; + const byte *psrc_initial = buffer + data_x * spp; + const byte *psrc = psrc_initial; + int spp_cm = 0; + byte *psrc_cm = NULL, *psrc_cm_start = NULL, *psrc_decode = NULL; + byte *bufend = NULL; - + if (h == 0) + return 0; + /* Get the buffer into the device color space */ + code = image_color_icc_prep(penum, psrc, w, dev, &spp_cm, &psrc_cm, + &psrc_cm_start, &psrc_decode, &bufend); + /* Data is now in the proper destination color space. Now we want + to go ahead and get the data into the proper spatial setting and then + threshold */ + return 0; } diff --git a/gs/base/gximono.c b/gs/base/gximono.c index da8321456..3f006b664 100644 --- a/gs/base/gximono.c +++ b/gs/base/gximono.c @@ -740,71 +740,6 @@ err: return code; } -static void -fill_threshhold_buffer(byte *dest_strip, byte *src_strip, int src_width, - int left_offset, int left_width, int num_tiles, - int right_width) -{ - byte *ptr_out_temp = dest_strip; - int ii; - - /* Left part */ - memcpy(dest_strip, src_strip + left_offset, left_width); - ptr_out_temp += left_width; - /* Now the full parts */ - for (ii = 0; ii < num_tiles; ii++){ - memcpy(ptr_out_temp, src_strip, src_width); - ptr_out_temp += src_width; - } - /* Now the remainder */ - memcpy(ptr_out_temp, src_strip, right_width); -#ifdef PACIFY_VALGRIND - ptr_out_temp += right_width; - ii = (dest_strip-ptr_out_temp) & 15; - if (ii > 0) - memset(ptr_out_temp, 0, ii); -#endif -} - - -/* If we are in here, we had data left over. Move it to the proper position - and get ht_landscape_info_t set properly */ -static void -reset_landscape_buffer(ht_landscape_info_t *ht_landscape, byte *contone_align, - int data_length, int num_used) -{ - int k; - int position_curr, position_new, delta; - int curr_x_pos = ht_landscape->xstart; - - if (ht_landscape->index < 0) { - /* Moving right to left, move column to far right */ - position_curr = ht_landscape->curr_pos + 1; - position_new = 15; - delta = ht_landscape->count - num_used; - memset(&(ht_landscape->widths[0]), 0, sizeof(int)*16); - ht_landscape->widths[15] = delta; - ht_landscape->curr_pos = 14; - ht_landscape->xstart = curr_x_pos - num_used; - } else { - /* Moving left to right, move column to far left */ - position_curr = ht_landscape->curr_pos - 1; - position_new = 0; - delta = ht_landscape->count - num_used; - memset(&(ht_landscape->widths[0]), 0, sizeof(int)*16); - ht_landscape->widths[0] = delta; - ht_landscape->curr_pos = 1; - ht_landscape->xstart = curr_x_pos + num_used; - } - ht_landscape->count = delta; - ht_landscape->num_contones = 1; - for (k = 0; k < data_length; k++) { - contone_align[position_new] = contone_align[position_curr]; - position_curr += 16; - position_new += 16; - } -} - /* An image render case where the source color is monochrome or indexed and the output is to be halftoned. If the source color requires decoding, @@ -815,45 +750,27 @@ image_render_mono_ht(gx_image_enum * penum_orig, const byte * buffer, int data_x uint w, int h, gx_device * dev) { gx_image_enum *penum = penum_orig; /* const within proc */ - gx_dda_fixed_point pnext; image_posture posture = penum->posture; int vdi; /* amounts to replicate */ fixed xrun; - byte *contone_align, *thresh_align, *halftone; + byte *contone_align, *thresh_align; int spp_out = penum->dev->color_info.num_components; byte *devc_contone; const byte *psrc = buffer + data_x; int dest_width, dest_height, data_length; byte *dev_value, *color_cache; gx_ht_order *d_order = &(penum->pis->dev_ht->components[0].corder); - byte *threshold = d_order->threshold; - byte *thresh_tile; - int thresh_width, thresh_height; - int dx, dy; - int left_rem_end, left_width, right_tile_width; - int num_full_tiles; - int dithered_stride; int position, k; int offset_bits = penum->ht_offset_bits; - int contone_stride; - const int y_pos = penum->yci; + int contone_stride = 0; /* Not used in landscape case */ fixed scale_factor, offset; - int in_row_offset, jj, ii; - byte *row_ptr, *ptr_out_temp, *ptr_out; - int init_tile, num_tiles, tile_remainder; - bool replicate_tile; - int width; int src_size; bool flush_buff = false; byte *psrc_temp; int offset_contone; /* to ensure 128 bit boundary */ int offset_threshold; /* to ensure 128 bit boundary */ gx_dda_int_t dda_ht; - -#if RAW_HT_DUMP - FILE *fid; - char file_name[50]; -#endif + int code; if (h == 0) { if (penum->ht_landscape.count == 0 || posture == image_portrait) { @@ -861,10 +778,13 @@ image_render_mono_ht(gx_image_enum * penum_orig, const byte * buffer, int data_x } else { /* Need to flush the buffer */ offset_bits = penum->ht_landscape.count; + penum->ht_offset_bits = offset_bits; penum->ht_landscape.offset_set = true; flush_buff = true; } } + src_size = (penum->rect.w - 1.0); + switch (posture) { case image_portrait: /* Figure out our offset in the contone and threshold data @@ -875,39 +795,19 @@ image_render_mono_ht(gx_image_enum * penum_orig, const byte * buffer, int data_x penum->ht_offset_bits)) & 15; offset_threshold = (- (((long)(penum->thresh_buffer)) + penum->ht_offset_bits)) & 15; - pnext = penum->dda.pixel0; - xrun = dda_current(pnext.x); + xrun = dda_current(penum->dda.pixel0.x); xrun = xrun - penum->adjust + (fixed_half - fixed_epsilon); dest_width = fixed2int_var_rounded(any_abs(penum->x_extent.x)); if (penum->x_extent.x < 0) xrun += penum->x_extent.x; vdi = penum->hci; data_length = dest_width; - src_size = (penum->rect.w - 1.0); dest_height = fixed2int_var_rounded(any_abs(penum->y_extent.y)); contone_stride = penum->line_size; scale_factor = float2fixed_rounded((float) src_size / (float) (dest_width - 1)); -#if RAW_HT_DUMP - dithered_stride = data_length * spp_out; - offset_bits = 0; - thresh_align = gs_alloc_bytes(penum->memory, contone_stride * vdi, - "image_render_mono_ht"); - halftone = gs_alloc_bytes(penum->memory, dithered_stride * vdi, - "image_render_mono_ht"); - contone_align = gs_alloc_bytes(penum->memory, contone_stride * spp_out, - "image_render_mono_ht"); - if (contone_align == NULL || thresh_align == NULL || halftone == NULL) - return gs_rethrow(gs_error_VMerror, "Memory allocation failure"); -#else - /* Get the pointers to our buffers */ - dithered_stride = penum->ht_stride; - halftone = penum->ht_buffer; - contone_align = penum->line + offset_contone; - thresh_align = penum->thresh_buffer + offset_threshold; -#endif #ifdef DEBUG /* Help in spotting problems */ - memset(halftone,0x00, dithered_stride * vdi); + memset(penum->ht_buffer,0x00, penum->ht_stride * vdi); #endif break; case image_landscape: @@ -922,25 +822,12 @@ image_render_mono_ht(gx_image_enum * penum_orig, const byte * buffer, int data_x dest_width = fixed2int_var_rounded(any_abs(penum->y_extent.x)); dest_height = fixed2int_var_rounded(any_abs(penum->x_extent.y)); data_length = dest_height; - src_size = (penum->rect.w - 1.0); scale_factor = float2fixed_rounded((float) src_size / (float) (dest_height - 1)); /* In the landscaped case, we want to accumulate multiple columns of data before sending to the device. We want to have a full byte of HT data in one write. This may not be possible at the left or right and for those and for those we have so send partial chunks */ -#if RAW_HT_DUMP - dithered_stride = data_length * spp_out; - offset_bits = 0; - thresh_align = gs_alloc_bytes(penum->memory, contone_stride * vdi, - "image_render_mono_ht"); - halftone = gs_alloc_bytes(penum->memory, dithered_stride * vdi, - "image_render_mono_ht"); - contone_align = gs_alloc_bytes(penum->memory, contone_stride * spp_out, - "image_render_mono_ht"); - if (contone_align == NULL || thresh_align == NULL || halftone == NULL) - return gs_rethrow(gs_error_VMerror, "Memory allocation failure"); -#else /* Initialize our xstart and compute our partial bit chunk so that we get in sync with the 1 bit mem device 16 bit positions for the rest of the chunks */ @@ -965,17 +852,15 @@ image_render_mono_ht(gx_image_enum * penum_orig, const byte * buffer, int data_x penum->ht_offset_bits = offset_bits; } } - /* Get the pointers to our buffers */ - dithered_stride = penum->ht_stride; - halftone = penum->ht_buffer; - contone_align = penum->line + offset_contone; - thresh_align = penum->thresh_buffer + offset_threshold; -#endif break; } + /* Get the pointers to our buffers */ + contone_align = penum->line + offset_contone; + thresh_align = penum->thresh_buffer + offset_threshold; + + if (flush_buff) goto flush; /* All done */ /* Set up the dda. We could move this out but the cost is pretty small */ dda_init(dda_ht, 0, src_size, data_length-1); - if (flush_buff) goto flush; /* All done */ devc_contone = contone_align; if (penum->color_cache == NULL) { /* No look-up in the cache to fill the source buffer. Still need to @@ -1110,221 +995,9 @@ image_render_mono_ht(gx_image_enum * penum_orig, const byte * buffer, int data_x break; } } - /* Go ahead and fill the threshold line buffer with tiled threshold values. - First just grab the row or column that we are going to tile with and - then do memcpy into the buffer */ + /* Apply threshold array to image data */ flush: - thresh_width = d_order->width; - thresh_height = d_order->height; - /* Figure out the tile steps. Left offset, Number of tiles, Right offset. */ - switch (posture) { - case image_portrait: - /* Compute the tiling positions with dest_width */ - dx = fixed2int_var(xrun) % thresh_width; - /* Left remainder part */ - left_rem_end = min(dx + dest_width, thresh_width); - left_width = left_rem_end - dx; /* The left width of our tile part */ - /* Now the middle part */ - num_full_tiles = - (int)fastfloor((dest_width - left_width)/ (float) thresh_width); - /* Now the right part */ - right_tile_width = dest_width - num_full_tiles * thresh_width - - left_width; - /* Those dimensions stay the same across the set of lines that - we fill in our buffer. Iterate over the vdi and fill up our - threshold buffer */ - for (k = 0; k < vdi; k++) { - /* Get a pointer to our tile row */ - dy = (penum->yci + k + penum->dev->band_offset_y) % thresh_height; - thresh_tile = threshold + d_order->width * dy; - /* Fill the buffer, can be multiple rows. Make sure - to update with stride */ - position = contone_stride * k; - /* Tile into the 128 bit aligned threshold strip */ - fill_threshhold_buffer(&(thresh_align[position]), - thresh_tile, thresh_width, dx, left_width, - num_full_tiles, right_tile_width); - } - /* Apply the threshold operation */ -#if RAW_HT_DUMP - gx_ht_threshold_row_byte(contone_align, thresh_align, contone_stride, - halftone, dithered_stride, dest_width, vdi); - sprintf(file_name,"HT_Portrait_%d_%dx%dx%d.raw", penum->id, dest_width, - dest_height, spp_out); - fid = fopen(file_name,"a+b"); - fwrite(halftone,1,dest_width * vdi,fid); - fclose(fid); -#else - if (offset_bits > dest_width) - offset_bits = dest_width; - gx_ht_threshold_row_bit(contone_align, thresh_align, contone_stride, - halftone, dithered_stride, dest_width, vdi, - offset_bits); - /* FIXME: An improvement here would be to generate the initial - * offset_bits at the correct offset within the byte so that they - * align with the remainder of the line. This would mean not - * always packing them into the first offset_bits (in MSB order) - * of our 16 bit word, but rather into the last offset_bits - * (in MSB order) (except when the entire run is small!). - * - * This would enable us to do just one aligned copy_mono call for - * the entire scanline. */ - /* Now do the copy mono operation */ - /* First the left remainder bits */ - if (offset_bits > 0) { - int x_pos = fixed2int_var(xrun); - (*dev_proc(dev, copy_mono)) (dev, halftone, 0, dithered_stride, - gx_no_bitmap_id, x_pos, y_pos, - offset_bits, vdi, - (gx_color_index) 0, - (gx_color_index) 1); - } - if ((dest_width - offset_bits) > 0 ) { - /* Now the primary aligned bytes */ - byte *curr_ptr = halftone; - int curr_width = dest_width - offset_bits; - int x_pos = fixed2int_var(xrun) + offset_bits; - if (offset_bits > 0) { - curr_ptr += 2; /* If the first 2 bytes had the left part then increment */ - } - (*dev_proc(dev, copy_mono)) (dev, curr_ptr, 0, dithered_stride, - gx_no_bitmap_id, x_pos, y_pos, - curr_width, vdi, - (gx_color_index) 0, (gx_color_index) 1); - } -#endif - break; - case image_landscape: - /* Go ahead and paint the chunk if we have 16 values or a partial - to get us in sync with the 1 bit devices 16 bit positions */ - while (penum->ht_landscape.count > 15 || - ((penum->ht_landscape.count >= offset_bits) && - penum->ht_landscape.offset_set)) { - /* Go ahead and 2D tile in the threshold buffer at this time */ - /* Always work the tiling from the upper left corner of our - 16 columns */ - if (penum->ht_landscape.offset_set) { - width = offset_bits; - } else { - width = 16; - } - if (penum->y_extent.x < 0) { - dx = (penum->ht_landscape.xstart - width + 1) % thresh_width; - } else { - dx = penum->ht_landscape.xstart % thresh_width; - } - dy = (penum->dev->band_offset_y + penum->ht_landscape.y_pos) % thresh_height; - if (dy < 0) - dy += thresh_height; - /* Left remainder part */ - left_rem_end = min(dx + 16, thresh_width); - left_width = left_rem_end - dx; - /* Now the middle part */ - num_full_tiles = - (int)fastfloor((float) (16 - left_width)/ (float) thresh_width); - /* Now the right part */ - right_tile_width = - 16 - num_full_tiles * thresh_width - left_width; - /* Now loop over the y stuff */ - ptr_out = thresh_align; - /* Do this in three parts. We do a top part, followed by - larger mem copies followed by a bottom partial. After - a slower initial fill we are able to do larger faster - expansions */ - if (dest_height <= 2 * thresh_height) { - init_tile = dest_height; - replicate_tile = false; - } else { - init_tile = thresh_height; - replicate_tile = true; - } - for (jj = 0; jj < init_tile; jj++) { - in_row_offset = (jj + dy) % thresh_height; - row_ptr = threshold + in_row_offset * thresh_width; - ptr_out_temp = ptr_out; - /* Left part */ - memcpy(ptr_out_temp, row_ptr + dx, left_width); - ptr_out_temp += left_width; - /* Now the full tiles */ - for (ii = 0; ii < num_full_tiles; ii++) { - memcpy(ptr_out_temp, row_ptr, thresh_width); - ptr_out_temp += thresh_width; - } - /* Now the remainder */ - memcpy(ptr_out_temp, row_ptr, right_tile_width); -#ifdef PACIFY_VALGRIND - ptr_out_temp += right_tile_width; - if (ptr_out_temp < ptr_out + 16) - memset(ptr_out_temp, 0, ptr_out + 16 - ptr_out_temp); -#endif - ptr_out += 16; - } - if (replicate_tile) { - /* Find out how many we need to copy */ - num_tiles = - (int)fastfloor((float) (dest_height - thresh_height)/ (float) thresh_height); - tile_remainder = dest_height - (num_tiles + 1) * thresh_height; - for (jj = 0; jj < num_tiles; jj ++) { - memcpy(ptr_out, thresh_align, 16 * thresh_height); - ptr_out += 16 * thresh_height; - } - /* Now fill in the remainder */ - memcpy(ptr_out, thresh_align, 16 * tile_remainder); - } - /* Apply the threshold operation */ - gx_ht_threshold_landscape(contone_align, thresh_align, - penum->ht_landscape, halftone, data_length); - /* Perform the copy mono */ - penum->ht_landscape.offset_set = false; - if (penum->ht_landscape.index < 0) { - (*dev_proc(dev, copy_mono)) (dev, halftone, 0, 2, - gx_no_bitmap_id, - penum->ht_landscape.xstart - width + 1, - penum->ht_landscape.y_pos, - width, data_length, - (gx_color_index) 0, - (gx_color_index) 1); - } else { - (*dev_proc(dev, copy_mono)) (dev, halftone, 0, 2, - gx_no_bitmap_id, - penum->ht_landscape.xstart, - penum->ht_landscape.y_pos, - width, data_length, - (gx_color_index) 0, - (gx_color_index) 1); - } - /* Clean up and reset our buffer. We may have a line left - over that has to be maintained due to line replication in the - resolution conversion */ - if (width != penum->ht_landscape.count) { - reset_landscape_buffer(&(penum->ht_landscape), contone_align, - data_length, width); - } else { - /* Reset the whole buffer */ - penum->ht_landscape.count = 0; - if (penum->ht_landscape.index < 0) { - /* Going right to left */ - penum->ht_landscape.curr_pos = 15; - } else { - /* Going left to right */ - penum->ht_landscape.curr_pos = 0; - } - penum->ht_landscape.num_contones = 0; - memset(&(penum->ht_landscape.widths[0]), 0, sizeof(int)*16); - } - } - break; - default: - return gs_rethrow(-1, "Invalid orientation for thresholding"); - } - - /* Clean up. Only for debug case */ -#if RAW_HT_DUMP - gs_free_object(penum->memory, contone_align, "image_render_mono_ht"); - gs_free_object(penum->memory, thresh_align, "image_render_mono_ht"); - gs_free_object(penum->memory, halftone, "image_render_mono_ht"); -#endif - return 0; + code = gxht_thresh_plane(penum, d_order, xrun, dest_width, dest_height, + thresh_align, contone_align, contone_stride, dev); + return code; } - - diff --git a/gs/base/lib.mak b/gs/base/lib.mak index 1267e2f64..2470f2910 100644 --- a/gs/base/lib.mak +++ b/gs/base/lib.mak @@ -2344,7 +2344,7 @@ $(GLOBJ)gxicolor.$(OBJ) : $(GLSRC)gxicolor.c $(GXERR) $(memory__h) $(gpcheck_h)\ $(gxdevice_h) $(gxcmap_h) $(gxdcconv_h) $(gxdcolor_h)\ $(gxistate_h) $(gxdevmem_h) $(gxcpath_h) $(gximage_h)\ $(gsicc_h) $(gsicc_cache_h) $(gsicc_cms_h) $(gxcie_h)\ - $(gscie_h) $(gxht_thresh_h) + $(gscie_h) $(gzht_h) $(gxht_thresh_h) $(GLCC) $(GLO_)gxicolor.$(OBJ) $(C_) $(GLSRC)gxicolor.c # ---- Level 1 path miscellany (arcs, pathbbox, path enumeration) ---- # |