summaryrefslogtreecommitdiff
path: root/gs/base/gdevpdfi.c
diff options
context:
space:
mode:
authorKen Sharp <ken.sharp@artifex.com>2010-11-16 14:33:57 +0000
committerKen Sharp <ken.sharp@artifex.com>2010-11-16 14:33:57 +0000
commitbd8bfb08e4aff9608240ee82ecb83c925437c960 (patch)
tree04f617a38e671225cb34be1138bebea8dfc2b129 /gs/base/gdevpdfi.c
parentd5fdb08915a7e03e3754a8235fd44c7f473b8f42 (diff)
downloadghostpdl-bd8bfb08e4aff9608240ee82ecb83c925437c960.tar.gz
pdfwrite enhancement : attempt to make PCL bitmap fonts into searchable type 3
In general pdfwrite only resorts to making a bitmap from a font when it cannot handle the original font type, which is rare for PostScript, PDF and XPS. However all PCL bitmap fonts are handled this way. When this happens, the bitmap is stored into a general type 3 font, a 'bucket' where all such glyphs are stored. When this font is full, a new one is started and so on. The text stored in the PDF page stream references the correct type 3 font, but usually the character code will be unrelated to the original character code. For PCL bitmap fonts pdfwrite actually starts by creating a type 3 font to hold the PCL bitmaps, but doesn't use it. This patch tries to store the bitmaps in the type 3 font where possible, using the character code from the original PCL document. Although this will not create searchable text in the general case, it does seem that there are a good number of PCL documents which do use an ASCII encoding and so will produce a searchable PDF file. There are 3 parts to this enhancement: 1) Cached glyphs. When the current font is a type 3 font, and the text operation is one which might result in an ASCII character code, and we can manufacture a glyph name for the resulting character code, store the glyph in the type 3 font (rather than the general 'bucket' font), using the character code and glyph name. Glyphs which can't be handled this way for any reason are still stored in the general recipient 'bucket' font. 2) Uncached glyphs. Glyphs which are too large for the cache are rendered as images. The image handling code has been extensively reworked to try and detect this situation and, if the criteria for cached glyphs above also holds true, to store the image as a glyph in a type 3 font and draw text in the PDF content stream instead of an image. Images which do not fulfil these criteria are still handled as images. 3) Recached glyphs. If the glyph cache fills up, glyphs will be flushed to make space. If a glyph is then reused we go through the caching case again (for large glyphs which are uncached we end up repeating the code every time the glyph is used). We now attempt to spot this by determining that the glyph in the font has already been used, and rather than storing a new copy of the glyph, as the old code did, we simply emit text into the page content stream. Note that there is a recommendation that inline images in PDF should not exceed 4KB. Since CharProcs must use inline images, bitmaps which exceed this size will be rendered as images, not text (they will also exceed the cache size and so are always rendered uncached). Expected Differences A number of PCL files exhibit small differences at low resolution (75 dpi). These are either; one pixel shifts in size or position due to the old code rendering an image with a single matrix and the new code rendering text using two matrices and the attendant loss of precision, or an 'emboldening' effect which seems to be due to the rendering code treating a bitmap in a glyph differently to an image. git-svn-id: http://svn.ghostscript.com/ghostscript/trunk@11901 a1074d23-0009-0410-80fe-cf8c14f379e6
Diffstat (limited to 'gs/base/gdevpdfi.c')
-rw-r--r--gs/base/gdevpdfi.c236
1 files changed, 200 insertions, 36 deletions
diff --git a/gs/base/gdevpdfi.c b/gs/base/gdevpdfi.c
index c79d11091..89dc0f776 100644
--- a/gs/base/gdevpdfi.c
+++ b/gs/base/gdevpdfi.c
@@ -32,7 +32,14 @@
#include "gxpcolor.h"
#include "gxcolor2.h"
#include "gxhldevc.h"
+#include "gzstate.h" /* for gs_state */
+#include "gxchar.h" /* for gs_show_enum */
+#include "gdevpdtx.h"
+#include "gdevpdtf.h" /* for pdfont->FontType */
+#include "gdevpdts.h"
+#include "gdevpdtt.h"
+#include "gdevpdti.h"
/* Forward references */
static image_enum_proc_plane_data(pdf_image_plane_data);
@@ -69,10 +76,14 @@ static const gx_image_enum_procs_t pdf_image_cvd_enum_procs = {
typedef struct pdf_image_enum_s {
gx_image_enum_common;
int width;
+ int height;
int bits_per_pixel; /* bits per pixel (per plane) */
int rows_left;
pdf_image_writer writer;
gs_matrix mat;
+ int IsCharProc; /* Used if this image results from a rendered glyph */
+ pdf_char_proc_t *pcp; /* too large to be cached. */
+ pdf_stream_position_t ipos; /* ditto */
} pdf_image_enum;
gs_private_st_composite(st_pdf_image_enum, pdf_image_enum, "pdf_image_enum",
pdf_image_enum_enum_ptrs, pdf_image_enum_reloc_ptrs);
@@ -336,6 +347,7 @@ pdf_begin_typed_image_impl(gx_device_pdf *pdev, const gs_imager_state * pis,
gs_color_space *pcs_device = NULL;
gs_color_space *pcs_orig = NULL;
pdf_lcvd_t *cvd = NULL;
+ gs_state *pgs = (gs_state *)gx_hld_get_gstate_ptr(pis);
/*
* Pop the image name from the NI stack. We must do this, to keep the
@@ -548,26 +560,6 @@ pdf_begin_typed_image_impl(gx_device_pdf *pdev, const gs_imager_state * pis,
pcs = pim->ColorSpace;
num_components = (is_mask ? 1 : gs_color_space_num_components(pcs));
- if (pdf_must_put_clip_path(pdev, pcpath))
- code = pdf_unclip(pdev);
- else
- code = pdf_open_page(pdev, PDF_IN_STREAM);
- if (code < 0)
- return code;
- if (context == PDF_IMAGE_TYPE3_MASK) {
- /*
- * The soft mask for an ImageType 3x image uses a DevicePixel
- * color space, which pdf_color_space() can't handle. Patch it
- * to DeviceGray here.
- */
- /* {csrc} make sure this gets freed */
- pcs = gs_cspace_new_DeviceGray(pdev->memory);
- } else if (is_mask)
- code = pdf_prepare_imagemask(pdev, pis, pdcolor);
- else
- code = pdf_prepare_image(pdev, pis);
- if (code < 0)
- goto nyi;
if (prect)
rect = *prect;
else {
@@ -595,6 +587,7 @@ pdf_begin_typed_image_impl(gx_device_pdf *pdev, const gs_imager_state * pis,
width = rect.q.x - rect.p.x;
pie->width = width;
height = rect.q.y - rect.p.y;
+ pie->height = height;
pie->bits_per_pixel =
pim->BitsPerComponent * num_components / pie->num_planes;
pie->rows_left = height;
@@ -606,14 +599,6 @@ pdf_begin_typed_image_impl(gx_device_pdf *pdev, const gs_imager_state * pis,
in_line &= (nbytes < pdev->MaxInlineImageSize);
}
- if (rect.p.x != 0 || rect.p.y != 0 ||
- rect.q.x != pim->Width || rect.q.y != pim->Height ||
- (is_mask && pim->CombineWithColor)
- /* Color space setup used to be done here: see SRZB comment below. */
- ) {
- gs_free_object(mem, pie, "pdf_begin_image");
- goto nyi;
- }
if (pmat == 0)
pmat = &ctm_only(pis);
{
@@ -636,9 +621,129 @@ pdf_begin_typed_image_impl(gx_device_pdf *pdev, const gs_imager_state * pis,
goto nyi;
}
}
- code = pdf_put_clip_path(pdev, pcpath);
- if (code < 0)
- return code;
+ /* This block tests to see if the image we are getting might be the result of
+ * rendering a glyph to a bitmap. If it is, and the current font is a type
+ * 3 font, then it would be better to store the bitmap as a bitmap in the
+ * type 3 font. However, PDF recommends a maximum size of 4Kb for inline
+ * images, and the image *must* be inline in a CharProc.
+ */
+ pie->IsCharProc = 0;
+ /* Is our current gstate a 'show' ? */
+ if (pgs && pgs->show_gstate) {
+ gs_show_enum *show_enum = (gs_show_enum *)pdev->pte;
+ /* Is the current font a type 3, and is the btimap small enough ? */
+ if (show_enum && show_enum->current_font->FontType == ft_user_defined
+ && ((pie->width * pie->height) / 8) <= 4096) {
+ gs_matrix mat;
+ double w;
+
+ gs_matrix_multiply (pmat, &pim->ImageMatrix, &mat);
+ mat.tx = pie->mat.tx;
+ mat.ty = pie->mat.ty;
+ pie->mat = mat;
+ pie->IsCharProc = 1;
+
+ if (show_enum->use_wxy_float)
+ pdev->char_width.x = show_enum->wxy_float.x;
+ else
+ pdev->char_width.x = fixed2float(show_enum->wxy.x);
+ if (pdev->char_width.x == 0)
+ pdev->char_width.x = width;
+
+ if (pdf_must_put_clip_path(pdev, pcpath))
+ code = pdf_unclip(pdev);
+ else
+ code = pdf_open_page(pdev, PDF_IN_STREAM);
+ if (code < 0) {
+ gs_free_object(mem, pie, "pdf_begin_image");
+ return code;
+ }
+#if 0
+ if (context == PDF_IMAGE_TYPE3_MASK) {
+ /*
+ * The soft mask for an ImageType 3x image uses a DevicePixel
+ * color space, which pdf_color_space() can't handle. Patch it
+ * to DeviceGray here.
+ */
+ /* {csrc} make sure this gets freed */
+ pcs = gs_cspace_new_DeviceGray(pdev->memory);
+ } else if (is_mask)
+#endif
+ code = pdf_prepare_imagemask(pdev, pis, pdcolor);
+#if 0
+ else
+ code = pdf_prepare_image(pdev, pis);
+#endif
+ if (code < 0) {
+ gs_free_object(mem, pie, "pdf_begin_image");
+ goto nyi;
+ }
+ code = pdf_put_clip_path(pdev, pcpath);
+ if (code < 0) {
+ gs_free_object(mem, pie, "pdf_begin_image");
+ return code;
+ }
+ code = pdf_begin_char_proc(pdev, pie->width, pie->height, 0, 0, 0, 0,
+ &pie->pcp, &pie->ipos);
+ if (code < 0)
+ return code;
+ pie->skipping = code;
+ if (!pie->skipping) {
+ w = psdf_round(pdev->char_width.x, 100, 10); /* See
+ pdf_write_Widths about rounding. We need to provide
+ a compatible data for Tj. */
+ pprintg1(pdev->strm, "%g ", w);
+ pprintd4(pdev->strm, "0 %d %d %d %d d1\n", 0, pie->height, pie->width, 0);
+ pprintd4(pdev->strm, "%d 0 0 %d %d %d cm\n", pie->width, -pie->height, 0,
+ pie->height);
+ in_line = 1;
+ }
+ else
+ return 0;
+ }
+ }
+ /* If we don't have a bitmap glyph in a type 3 font, then carry on
+ * doing setup for an image (glyph setup is handled above)
+ */
+ if (pie->IsCharProc == 0) {
+ if (pdf_must_put_clip_path(pdev, pcpath))
+ code = pdf_unclip(pdev);
+ else
+ code = pdf_open_page(pdev, PDF_IN_STREAM);
+ if (code < 0) {
+ gs_free_object(mem, pie, "pdf_begin_image");
+ return code;
+ }
+ if (context == PDF_IMAGE_TYPE3_MASK) {
+ /*
+ * The soft mask for an ImageType 3x image uses a DevicePixel
+ * color space, which pdf_color_space() can't handle. Patch it
+ * to DeviceGray here.
+ */
+ /* {csrc} make sure this gets freed */
+ pcs = gs_cspace_new_DeviceGray(pdev->memory);
+ } else if (is_mask)
+ code = pdf_prepare_imagemask(pdev, pis, pdcolor);
+ else
+ code = pdf_prepare_image(pdev, pis);
+ if (code < 0) {
+ gs_free_object(mem, pie, "pdf_begin_image");
+ goto nyi;
+ }
+ code = pdf_put_clip_path(pdev, pcpath);
+ if (code < 0) {
+ gs_free_object(mem, pie, "pdf_begin_image");
+ return code;
+ }
+ }
+ if (rect.p.x != 0 || rect.p.y != 0 ||
+ rect.q.x != pim->Width || rect.q.y != pim->Height ||
+ (is_mask && pim->CombineWithColor)
+ /* Color space setup used to be done here: see SRZB comment below. */
+ ) {
+ gs_free_object(mem, pie, "pdf_begin_image");
+ goto nyi;
+ }
pdf_image_writer_init(&pie->writer);
pie->writer.alt_writer_count = (in_line ||
(pim->Width <= 64 && pim->Height <= 64) ||
@@ -727,7 +832,22 @@ pdf_begin_typed_image_impl(gx_device_pdf *pdev, const gs_imager_state * pis,
* this piece of code.
*/
rc_increment_cs(image[0].pixel.ColorSpace);
- if ((pdf_begin_write_image(pdev, &pie->writer, gs_no_id, width,
+
+ /* Special setup for images which are actually glyphs and are being
+ * stored in a type 3 font (specific image compression type).
+ */
+ if (pie->IsCharProc ){
+ code = pdf_begin_write_image(pdev, &pie->writer, gs_no_id, width,
+ height, pnamed, true);
+ pie->writer.end_string = "";
+ if (code < 0) {
+ if (image[0].pixel.ColorSpace == pim->ColorSpace)
+ rc_decrement_only_cs(pim->ColorSpace, "psdf_setup_image_filters");
+ goto fail;
+ }
+ psdf_CFE_binary(&pie->writer.binary[0], pie->width, pie->height, true);
+ } else {
+ if ((pdf_begin_write_image(pdev, &pie->writer, gs_no_id, width,
height, pnamed, in_line)) < 0 ||
/*
* Some regrettable PostScript code (such as LanguageLevel 1 output
@@ -746,9 +866,10 @@ pdf_begin_typed_image_impl(gx_device_pdf *pdev, const gs_imager_state * pis,
&pie->writer.binary[0], &image[0].pixel,
pmat, pis, true, in_line))) < 0
) {
- if (image[0].pixel.ColorSpace == pim->ColorSpace)
- rc_decrement_only_cs(pim->ColorSpace, "psdf_setup_image_filters");
- goto fail;
+ if (image[0].pixel.ColorSpace == pim->ColorSpace)
+ rc_decrement_only_cs(pim->ColorSpace, "psdf_setup_image_filters");
+ goto fail;
+ }
}
if (image[0].pixel.ColorSpace == pim->ColorSpace)
rc_decrement_only_cs(pim->ColorSpace, "psdf_setup_image_filters");
@@ -974,6 +1095,15 @@ pdf_image_plane_data(gx_image_enum_common_t * info,
{
pdf_image_enum *pie = (pdf_image_enum *) info;
int i;
+
+ /* If a glyph was flushed from the cache and later reused then it gets
+ * rendered again, but we don't want a duplicate, so we use the existing
+ * one. In that case we don't want to write any of the bitmap data, so
+ * we just jump out here without writing the image data.
+ */
+ if (pie->IsCharProc && pie->skipping)
+ return 1;
+
for (i = 0; i < pie->writer.alt_writer_count; i++) {
int code = pdf_image_plane_data_alt(info, planes, height, rows_used, i);
if (code)
@@ -1112,7 +1242,8 @@ pdf_image_end_image_data(gx_image_enum_common_t * info, bool draw_last,
if (pie->writer.pres)
((pdf_x_object_t *)pie->writer.pres)->data_height = data_height;
- else if (data_height > 0)
+ else if (data_height > 0 && ! pie->IsCharProc)
+ /* If this is a type 3 CharProc the matrix is already handled, don't do it here */
pdf_put_image_matrix(pdev, &pie->mat, (double)data_height / height);
if (data_height > 0) {
code = pdf_complete_image_data(pdev, &pie->writer, data_height,
@@ -1145,6 +1276,39 @@ pdf_image_end_image_data(gx_image_enum_common_t * info, bool draw_last,
code = pdf_end_and_do_image(pdev, &pie->writer, &pie->mat, info->id, do_image);
pie->writer.alt_writer_count--; /* For GC. */
}
+
+ /* This code handles the case where a bitmap glyph is rendered uncached
+ * because its too large for the cache. We've finished the imagem, so
+ * now we need to close the CharProc (if required) and actually put the
+ * text in the PDF page stream.
+ */
+ if (pie->IsCharProc)
+ {
+ gs_matrix imat;
+ gs_point point, save;
+
+ /* skipping is set if we already had a copy of the glyph. In this case
+ * we didn't make a new CharProc, so we don't need to close it. We
+ * simply need to set up the text matrix and show the character code
+ */
+ if (!pie->skipping) {
+ code = pdf_end_char_proc(pdev, &pie->ipos);
+ if (code < 0)
+ return code;
+ }
+
+ /* This sets up the text matrix for the character */
+ save.x = pie->mat.tx;
+ save.y = pie->mat.ty;
+ gs_make_identity(&imat);
+ gs_matrix_multiply(&imat, &pie->mat, &imat);
+ gs_distance_transform(0, pie->height, &imat, &point);
+ imat.tx = save.x - point.x;
+ imat.ty = save.y - point.y;// + pie->height;
+
+ /* And here we emit it into the page content stream */
+ code = pdf_do_char_image(pdev, pie->pcp, &imat, pie->skipping);
+ }
gx_image_free_enum(&info);
return code;
}