summaryrefslogtreecommitdiff
path: root/pdf
diff options
context:
space:
mode:
authorChris Liddell <chris.liddell@artifex.com>2023-02-20 13:44:31 +0000
committerChris Liddell <chris.liddell@artifex.com>2023-02-23 16:34:59 +0000
commitec203362d93fd7d61ca2775aaac17b23c1cca6c8 (patch)
treeef84965ef63389f4a95ace1a77845b77ab6be13e /pdf
parent42a4ff9ac99e365734a35a90beaa114ee2e81f39 (diff)
downloadghostpdl-ec203362d93fd7d61ca2775aaac17b23c1cca6c8.tar.gz
Bug 706257: CIDFont glyph ordering issues with pdfi/pdfwrite
This exposed a couple of issues: Firstly, and most importantly, when pdfwrite uses the callback to retrieve the glyph index for text in a CIDFont, it uses the descendant font, not the Type 0, as I originally thought. For embedded CIDFonts, that didn't cause a problem, but for substituted CIDFonts it meant the glyph decoding callback did not have access to the decoding table. Secondly, fixing that exposed some byte ordering issues, where Unicode codes read from the ToUnicode CMap differed in byte order from codes read from the decoding table.
Diffstat (limited to 'pdf')
-rw-r--r--pdf/pdf_fapi.c6
-rw-r--r--pdf/pdf_font.c149
-rw-r--r--pdf/pdf_font.h8
-rw-r--r--pdf/pdf_font0.c52
-rw-r--r--pdf/pdf_font11.c57
-rw-r--r--pdf/pdf_font1C.c2
-rw-r--r--pdf/pdf_font_types.h6
7 files changed, 206 insertions, 74 deletions
diff --git a/pdf/pdf_fapi.c b/pdf/pdf_fapi.c
index 7fb7c954c..2939fd039 100644
--- a/pdf/pdf_fapi.c
+++ b/pdf/pdf_fapi.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2019-2022 Artifex Software, Inc.
+/* Copyright (C) 2019-2023 Artifex Software, Inc.
All Rights Reserved.
This software is provided AS-IS with no warranty, either express or
@@ -875,10 +875,10 @@ pdfi_fapi_get_glyphname_or_cid(gs_text_enum_t *penum, gs_font_base * pbfont, gs_
l = penum->orig_font->procs.decode_glyph((gs_font *)penum->orig_font, ccode, (gs_char)cc, (ushort *)uc, 4);
if (l == 2) {
- cc = uc[1] | uc[0] << 8;
+ cc = uc[0] | uc[1] << 8;
}
else if (l == 4) {
- cc = uc[3] | uc[2] << 8 | uc[2] << 16 | uc[2] << 24;
+ cc = uc[0] | uc[1] << 8 | uc[2] << 16 | uc[3] << 24;
}
else
cc = ccode;
diff --git a/pdf/pdf_font.c b/pdf/pdf_font.c
index b6409d483..7f760bfa2 100644
--- a/pdf/pdf_font.c
+++ b/pdf/pdf_font.c
@@ -1002,13 +1002,10 @@ int pdfi_load_font(pdf_context *ctx, pdf_dict *stream_dict, pdf_dict *page_dict,
code = gs_note_error(gs_error_invalidfont);
}
else {
- if (cidfont) {
- ((pdf_cidfont_t *)ppdffont)->substitute = (substitute != font_embedded);
- }
- else {
- if ((substitute & font_substitute) == font_substitute)
- code = pdfi_font_match_glyph_widths(ppdffont);
- }
+ ppdffont->substitute = (substitute != font_embedded);
+
+ if ((substitute & font_substitute) == font_substitute)
+ code = pdfi_font_match_glyph_widths(ppdffont);
*ppfont = (gs_font *)ppdffont->pfont;
}
@@ -1707,6 +1704,35 @@ gs_glyph pdfi_encode_char(gs_font * pfont, gs_char chr, gs_glyph_space_t not_use
return g;
}
+extern const pdfi_cid_decoding_t *pdfi_cid_decoding_list[];
+extern const pdfi_cid_subst_nwp_table_t *pdfi_cid_substnwp_list[];
+
+void pdfi_cidfont_cid_subst_tables(const char *reg, const int reglen, const char *ord,
+ const int ordlen, pdfi_cid_decoding_t **decoding, pdfi_cid_subst_nwp_table_t **substnwp)
+{
+ int i;
+ *decoding = NULL;
+ *substnwp = NULL;
+ /* This only makes sense for Adobe orderings */
+ if (reglen == 5 && !memcmp(reg, "Adobe", 5)) {
+ for (i = 0; pdfi_cid_decoding_list[i] != NULL; i++) {
+ if (strlen(pdfi_cid_decoding_list[i]->s_order) == ordlen &&
+ !memcmp(pdfi_cid_decoding_list[i]->s_order, ord, ordlen)) {
+ *decoding = (pdfi_cid_decoding_t *)pdfi_cid_decoding_list[i];
+ break;
+ }
+ }
+ /* For now, also only for Adobe orderings */
+ for (i = 0; pdfi_cid_substnwp_list[i] != NULL; i++) {
+ if (strlen(pdfi_cid_substnwp_list[i]->ordering) == ordlen &&
+ !memcmp(pdfi_cid_substnwp_list[i]->ordering, ord, ordlen)) {
+ *substnwp = (pdfi_cid_subst_nwp_table_t *)pdfi_cid_substnwp_list[i];
+ break;
+ }
+ }
+ }
+}
+
int pdfi_tounicode_char_to_unicode(pdf_context *ctx, pdf_cmap *tounicode, gs_glyph glyph, int ch, ushort *unicode_return, unsigned int length)
{
int i, l = 0;
@@ -1728,33 +1754,33 @@ int pdfi_tounicode_char_to_unicode(pdf_context *ctx, pdf_cmap *tounicode, gs_gly
if (counter.entry.value.size == 1) {
l = 2;
if (ucode != NULL && length >= l) {
- ucode[0] = counter.entry.value.data[0];
- ucode[1] = counter.entry.value.data[1];
+ ucode[1] = counter.entry.value.data[0];
+ ucode[0] = counter.entry.value.data[1];
}
}
else if (counter.entry.value.size == 2) {
l = 2;
if (ucode != NULL && length >= l) {
- ucode[0] = counter.entry.value.data[0];
- ucode[1] = counter.entry.value.data[1];
+ ucode[1] = counter.entry.value.data[0];
+ ucode[0] = counter.entry.value.data[1];
}
}
else if (counter.entry.value.size == 3) {
l = 4;
if (ucode != NULL && length >= l) {
- ucode[0] = counter.entry.value.data[0];
- ucode[1] = counter.entry.value.data[1];
- ucode[2] = counter.entry.value.data[2];
- ucode[3] = 0;
+ ucode[3] = counter.entry.value.data[0];
+ ucode[2] = counter.entry.value.data[1];
+ ucode[1] = counter.entry.value.data[2];
+ ucode[0] = 0;
}
}
else {
l = 4;
if (ucode != NULL && length >= l) {
- ucode[0] = counter.entry.value.data[0];
- ucode[1] = counter.entry.value.data[1];
+ ucode[3] = counter.entry.value.data[0];
ucode[2] = counter.entry.value.data[1];
- ucode[3] = counter.entry.value.data[3];
+ ucode[1] = counter.entry.value.data[1];
+ ucode[0] = counter.entry.value.data[3];
}
}
}
@@ -1768,6 +1794,93 @@ int pdfi_tounicode_char_to_unicode(pdf_context *ctx, pdf_cmap *tounicode, gs_gly
return code;
}
+int
+pdfi_cidfont_decode_glyph(gs_font *font, gs_glyph glyph, int ch, ushort *u, unsigned int length)
+{
+ gs_glyph cc = glyph < GS_MIN_CID_GLYPH ? glyph : glyph - GS_MIN_CID_GLYPH;
+ pdf_cidfont_t *pcidfont = (pdf_cidfont_t *)font->client_data;
+ int code = gs_error_undefined, i;
+ uchar *unicode_return = (uchar *)u;
+ pdfi_cid_subst_nwp_table_t *substnwp = pcidfont->substnwp;
+
+ code = gs_error_undefined;
+ while (1) { /* Loop to make retrying with a substitute CID easier */
+ /* Favour the ToUnicode if one exists */
+ code = pdfi_tounicode_char_to_unicode(pcidfont->ctx, (pdf_cmap *)pcidfont->ToUnicode, glyph, ch, u, length);
+
+ if (code == gs_error_undefined && pcidfont->decoding) {
+ const int *n;
+
+ if (cc / 256 < pcidfont->decoding->nranges) {
+ n = (const int *)pcidfont->decoding->ranges[cc / 256][cc % 256];
+ for (i = 0; i < pcidfont->decoding->val_sizes; i++) {
+ unsigned int cmapcc;
+ if (n[i] == -1)
+ break;
+ cc = n[i];
+ cmapcc = (unsigned int)cc;
+ if (pcidfont->pdfi_font_type == e_pdf_cidfont_type2)
+ code = pdfi_fapi_check_cmap_for_GID((gs_font *)pcidfont->pfont, (unsigned int)cc, &cmapcc);
+ else
+ code = 0;
+ if (code >= 0 && cmapcc != 0){
+ code = 0;
+ break;
+ }
+ }
+ /* If it's a TTF derived CIDFont, we prefer a code point supported by the cmap table
+ but if not, use the first available one
+ */
+ if (code < 0 && n[0] != -1) {
+ cc = n[0];
+ code = 0;
+ }
+ }
+ if (code >= 0) {
+ if (cc > 65535) {
+ code = 4;
+ if (unicode_return != NULL && length >= code) {
+ unicode_return[3] = (cc & 0xFF000000)>> 24;
+ unicode_return[2] = (cc & 0x00FF0000) >> 16;
+ unicode_return[1] = (cc & 0x0000FF00) >> 8;
+ unicode_return[0] = (cc & 0x000000FF);
+ }
+ }
+ else {
+ code = 2;
+ if (unicode_return != NULL && length >= code) {
+ unicode_return[1] = (cc & 0x0000FF00) >> 8;
+ unicode_return[0] = (cc & 0x000000FF);
+ }
+ }
+ }
+ }
+ /* If we get here, and still don't have a usable code point, check for a
+ pre-defined CID substitution, and if there's one, jump back to the start
+ and try again.
+ */
+ if (code == gs_error_undefined && substnwp) {
+ for (i = 0; substnwp->subst[i].s_type != 0; i++ ) {
+ if (cc >= substnwp->subst[i].s_scid && cc <= substnwp->subst[i].e_scid) {
+ cc = substnwp->subst[i].s_dcid + (cc - substnwp->subst[i].s_scid);
+ substnwp = NULL;
+ break;
+ }
+ if (cc >= substnwp->subst[i].s_dcid
+ && cc <= substnwp->subst[i].s_dcid + (substnwp->subst[i].e_scid - substnwp->subst[i].s_scid)) {
+ cc = substnwp->subst[i].s_scid + (cc - substnwp->subst[i].s_dcid);
+ substnwp = NULL;
+ break;
+ }
+ }
+ if (substnwp == NULL)
+ continue;
+ }
+ break;
+ }
+ return (code < 0 ? 0 : code);
+}
+
/* Get the unicode valude for a glyph FIXME - not written yet
*/
int pdfi_decode_glyph(gs_font * font, gs_glyph glyph, int ch, ushort *unicode_return, unsigned int length)
diff --git a/pdf/pdf_font.h b/pdf/pdf_font.h
index a18541c6f..22fb426e7 100644
--- a/pdf/pdf_font.h
+++ b/pdf/pdf_font.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2018-2022 Artifex Software, Inc.
+/* Copyright (C) 2018-2023 Artifex Software, Inc.
All Rights Reserved.
This software is provided AS-IS with no warranty, either express or
@@ -62,6 +62,12 @@ int pdfi_create_Encoding(pdf_context *ctx, pdf_obj *pdf_Encoding, pdf_obj *font_
gs_glyph pdfi_encode_char(gs_font * pfont, gs_char chr, gs_glyph_space_t not_used);
int pdfi_glyph_index(gs_font *pfont, byte *str, uint size, uint *glyph);
int pdfi_glyph_name(gs_font * pfont, gs_glyph glyph, gs_const_string * pstr);
+
+void pdfi_cidfont_cid_subst_tables(const char *reg, const int reglen, const char *ord,
+ const int ordlen, pdfi_cid_decoding_t **decoding, pdfi_cid_subst_nwp_table_t **substnwp);
+
+int pdfi_cidfont_decode_glyph(gs_font *font, gs_glyph glyph, int ch, ushort *u, unsigned int length);
+
int pdfi_tounicode_char_to_unicode(pdf_context *ctx, pdf_cmap *tounicode, gs_glyph glyph, int ch, ushort *unicode_return, unsigned int length);
int pdfi_decode_glyph(gs_font * font, gs_glyph glyph, int ch, ushort *unicode_return, unsigned int length);
diff --git a/pdf/pdf_font0.c b/pdf/pdf_font0.c
index 51cf4989a..594a79a40 100644
--- a/pdf/pdf_font0.c
+++ b/pdf/pdf_font0.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2019-2022 Artifex Software, Inc.
+/* Copyright (C) 2019-2023 Artifex Software, Inc.
All Rights Reserved.
This software is provided AS-IS with no warranty, either express or
@@ -34,35 +34,6 @@
#include "gsutil.h" /* For gs_next_ids() */
-extern const pdfi_cid_decoding_t *pdfi_cid_decoding_list[];
-extern const pdfi_cid_subst_nwp_table_t *pdfi_cid_substnwp_list[];
-
-static void pdfi_font0_cid_subst_tables(const char *reg, const int reglen, const char *ord,
- const int ordlen, pdfi_cid_decoding_t **decoding, pdfi_cid_subst_nwp_table_t **substnwp)
-{
- int i;
- *decoding = NULL;
- *substnwp = NULL;
- /* This only makes sense for Adobe orderings */
- if (reglen == 5 && !memcmp(reg, "Adobe", 5)) {
- for (i = 0; pdfi_cid_decoding_list[i] != NULL; i++) {
- if (strlen(pdfi_cid_decoding_list[i]->s_order) == ordlen &&
- !memcmp(pdfi_cid_decoding_list[i]->s_order, ord, ordlen)) {
- *decoding = (pdfi_cid_decoding_t *)pdfi_cid_decoding_list[i];
- break;
- }
- }
- /* For now, also only for Adobe orderings */
- for (i = 0; pdfi_cid_substnwp_list[i] != NULL; i++) {
- if (strlen(pdfi_cid_substnwp_list[i]->ordering) == ordlen &&
- !memcmp(pdfi_cid_substnwp_list[i]->ordering, ord, ordlen)) {
- *substnwp = (pdfi_cid_subst_nwp_table_t *)pdfi_cid_substnwp_list[i];
- break;
- }
- }
- }
-}
-
static int
pdfi_font0_glyph_name(gs_font *pfont, gs_glyph index, gs_const_string *pstr)
{
@@ -134,17 +105,17 @@ pdfi_font0_map_glyph_to_unicode(gs_font *font, gs_glyph glyph, int ch, ushort *u
if (cc > 65535) {
code = 4;
if (unicode_return != NULL && length >= code) {
- unicode_return[0] = (cc & 0xFF000000)>> 24;
- unicode_return[1] = (cc & 0x00FF0000) >> 16;
- unicode_return[2] = (cc & 0x0000FF00) >> 8;
- unicode_return[3] = (cc & 0x000000FF);
+ unicode_return[3] = (cc & 0xFF000000)>> 24;
+ unicode_return[2] = (cc & 0x00FF0000) >> 16;
+ unicode_return[1] = (cc & 0x0000FF00) >> 8;
+ unicode_return[0] = (cc & 0x000000FF);
}
}
else {
code = 2;
if (unicode_return != NULL && length >= code) {
- unicode_return[0] = (cc & 0x0000FF00) >> 8;
- unicode_return[1] = (cc & 0x000000FF);
+ unicode_return[1] = (cc & 0x0000FF00) >> 8;
+ unicode_return[0] = (cc & 0x000000FF);
}
}
}
@@ -335,11 +306,10 @@ int pdfi_read_type0_font(pdf_context *ctx, pdf_dict *font_dict, pdf_dict *stream
olen = pcmap->csi_ord.size;
}
if (rlen > 0 && olen > 0)
- pdfi_font0_cid_subst_tables(r, rlen, o, olen, &dec, &substnwp);
- else {
- dec = NULL;
- substnwp = NULL;
- }
+ pdfi_cidfont_cid_subst_tables(r, rlen, o, olen, &dec, &substnwp);
+
+ ((pdf_cidfont_t *)descpfont)->decoding = dec;
+ ((pdf_cidfont_t *)descpfont)->substnwp = substnwp;
}
/* reference is now owned by the descendent font created above */
pdfi_countdown(decfontdict);
diff --git a/pdf/pdf_font11.c b/pdf/pdf_font11.c
index c57833acf..7a05d0855 100644
--- a/pdf/pdf_font11.c
+++ b/pdf/pdf_font11.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2020-2022 Artifex Software, Inc.
+/* Copyright (C) 2020-2023 Artifex Software, Inc.
All Rights Reserved.
This software is provided AS-IS with no warranty, either express or
@@ -50,9 +50,30 @@ static int pdfi_cidtype2_CIDMap_proc(gs_font_cid2 *pfont, gs_glyph glyph)
{
pdf_cidfont_type2 *pdffont11 = (pdf_cidfont_type2 *)pfont->client_data;
uint gid = glyph - GS_MIN_CID_GLYPH;
+ int code = 0;
- if (pdffont11->cidtogidmap != NULL && pdffont11->cidtogidmap->length > (gid << 1) + 1) {
- gid = pdffont11->cidtogidmap->data[gid << 1] << 8 | pdffont11->cidtogidmap->data[(gid << 1) + 1];
+ if (pdffont11->substitute == true) {
+ unsigned int ucc = 0;
+ int code = pfont->procs.decode_glyph((gs_font *)pfont, glyph, -1, NULL, 0);
+ if (code == 2) {
+ ushort sccode = 0;
+ (void)pfont->procs.decode_glyph((gs_font *)pfont, glyph, -1, &sccode, 2);
+ ucc = (uint)sccode;
+ }
+ else if (code == 4) {
+ uint iccode = 0;
+ (void)pfont->procs.decode_glyph((gs_font *)pfont, glyph, -1, (ushort *)&iccode, 2);
+ ucc = iccode;
+ }
+ if (code == 2 || code == 4) {
+ code = pdfi_fapi_check_cmap_for_GID((gs_font *)pfont, (unsigned int)ucc, &gid);
+ if (code < 0)
+ gid = glyph - GS_MIN_CID_GLYPH;
+ }
+ }
+
+ if (code == 0 && pdffont11->cidtogidmap != NULL && pdffont11->cidtogidmap->length > (gid << 1) + 1) {
+ gid = pdffont11->cidtogidmap->data[gid << 1] << 8 | pdffont11->cidtogidmap->data[(gid << 1) + 1];
}
return (int)gid;
@@ -61,16 +82,36 @@ static int pdfi_cidtype2_CIDMap_proc(gs_font_cid2 *pfont, gs_glyph glyph)
static uint pdfi_cidtype2_get_glyph_index(gs_font_type42 *pfont, gs_glyph glyph)
{
pdf_cidfont_type2 *pdffont11 = (pdf_cidfont_type2 *)pfont->client_data;
- uint gid = 0;
+ uint gid = glyph - GS_MIN_CID_GLYPH;
+ int code = 0;
if (glyph < GS_MIN_CID_GLYPH) {
gid = 0;
}
else {
if (glyph < GS_MIN_GLYPH_INDEX) {
- gid = glyph - GS_MIN_CID_GLYPH;
- if (pdffont11->cidtogidmap != NULL && pdffont11->cidtogidmap->length > (gid << 1) + 1) {
- gid = pdffont11->cidtogidmap->data[gid << 1] << 8 | pdffont11->cidtogidmap->data[(gid << 1) + 1];
+ if (pdffont11->substitute == true) {
+ unsigned int ucc = 0;
+ code = pfont->procs.decode_glyph((gs_font *)pfont, glyph, -1, NULL, 0);
+ if (code == 2) {
+ ushort sccode = 0;
+ (void)pfont->procs.decode_glyph((gs_font *)pfont, glyph, -1, &sccode, 2);
+ ucc = (uint)sccode;
+ }
+ else if (code == 4) {
+ uint iccode = 0;
+ (void)pfont->procs.decode_glyph((gs_font *)pfont, glyph, -1, (ushort *)&iccode, 2);
+ ucc = iccode;
+ }
+ if (code == 2 || code == 4) {
+ code = pdfi_fapi_check_cmap_for_GID((gs_font *)pfont, (unsigned int)ucc, &gid);
+ if (code < 0)
+ gid = glyph - GS_MIN_CID_GLYPH;
+ }
+ }
+
+ if (code == 0 && pdffont11->cidtogidmap != NULL && pdffont11->cidtogidmap->length > (gid << 1) + 1) {
+ gid = pdffont11->cidtogidmap->data[gid << 1] << 8 | pdffont11->cidtogidmap->data[(gid << 1) + 1];
}
}
}
@@ -254,7 +295,7 @@ pdfi_alloc_cidtype2_font(pdf_context *ctx, pdf_cidfont_type2 **font, bool is_cid
pfont->procs.encode_char = pdfi_encode_char;
pfont->data.string_proc = pdfi_cidtype2_string_proc;
pfont->procs.glyph_name = ctx->get_glyph_name;
- pfont->procs.decode_glyph = pdfi_decode_glyph;
+ pfont->procs.decode_glyph = pdfi_cidfont_decode_glyph;
pfont->procs.define_font = gs_no_define_font;
pfont->procs.make_font = gs_no_make_font;
pfont->procs.font_info = gs_default_font_info;
diff --git a/pdf/pdf_font1C.c b/pdf/pdf_font1C.c
index 7c2466266..81d192a52 100644
--- a/pdf/pdf_font1C.c
+++ b/pdf/pdf_font1C.c
@@ -2051,7 +2051,7 @@ pdfi_alloc_cff_cidfont(pdf_context *ctx, pdf_cidfont_type0 ** font, uint32_t obj
we won't worry about working without FAPI */
pfont->procs.encode_char = pdfi_encode_char;
pfont->procs.glyph_name = ctx->get_glyph_name;
- pfont->procs.decode_glyph = pdfi_decode_glyph;
+ pfont->procs.decode_glyph = pdfi_cidfont_decode_glyph;
pfont->procs.define_font = gs_no_define_font;
pfont->procs.make_font = gs_no_make_font;
pfont->procs.font_info = gs_default_font_info;
diff --git a/pdf/pdf_font_types.h b/pdf/pdf_font_types.h
index f29088f59..97a952f1e 100644
--- a/pdf/pdf_font_types.h
+++ b/pdf/pdf_font_types.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2019-2022 Artifex Software, Inc.
+/* Copyright (C) 2019-2023 Artifex Software, Inc.
All Rights Reserved.
This software is provided AS-IS with no warranty, either express or
@@ -86,6 +86,7 @@ typedef enum pdf_font_type_e {
pdf_dict *FontDescriptor; /* For PDF up to 1.4 this may be absent for the base 14 */ \
int64_t descflags; \
pdf_obj *ToUnicode; /* Name or stream (technically should be a stream, but we've seen Identity names */ \
+ bool substitute; /* We need to know what a CIDFont is a substitute */ \
pdf_string *filename /* If we read this from disk, this is the file it came from */
#define pdf_font_common \
@@ -113,7 +114,8 @@ typedef enum pdf_font_type_e {
pdf_string *ordering; \
int supplement; \
pdf_buffer *cidtogidmap; \
- bool substitute; /* We need to know what a CIDFont is a substitute */ \
+ pdfi_cid_decoding_t *decoding; /* Used when substituting a non-Identity CIDFont */ \
+ pdfi_cid_subst_nwp_table_t *substnwp; /* Also used for CIDFont substitions */ \
font_proc_glyph_info((*orig_glyph_info))
typedef struct pdf_font_s {