diff options
author | Chris Liddell <chris.liddell@artifex.com> | 2023-03-01 11:05:39 +0000 |
---|---|---|
committer | Chris Liddell <chris.liddell@artifex.com> | 2023-03-01 15:15:18 +0000 |
commit | 4b95c5da04ae4e2c8a83aec1774a067da4553f47 (patch) | |
tree | e844bb342f450276c4b001e9387559b3d41a5153 | |
parent | 3853fc8f7323f609b68907c214443e983eb514e7 (diff) | |
download | ghostpdl-4b95c5da04ae4e2c8a83aec1774a067da4553f47.tar.gz |
pdfi/pdfwrite/txtwrite: Revisit endian confusion for Unicode values
In my previous revision for this, I missed that the values from a CMap
(ToUnicode) are big endian, but the values from the built-in decoding are native
endian.
This should resolve that.
-rw-r--r-- | pdf/pdf_fapi.c | 13 | ||||
-rw-r--r-- | pdf/pdf_font.c | 34 | ||||
-rw-r--r-- | pdf/pdf_font0.c | 12 | ||||
-rw-r--r-- | pdf/pdf_font11.c | 26 |
4 files changed, 43 insertions, 42 deletions
diff --git a/pdf/pdf_fapi.c b/pdf/pdf_fapi.c index 2939fd039..1d3fc8c94 100644 --- a/pdf/pdf_fapi.c +++ b/pdf/pdf_fapi.c @@ -857,7 +857,7 @@ pdfi_fapi_get_glyphname_or_cid(gs_text_enum_t *penum, gs_font_base * pbfont, gs_ else { /* If the composite font has a decoding, then this is a subsituted CIDFont with a "known" ordering */ unsigned int gc = 0, cc = (unsigned int)ccode; byte uc[4]; - int l; + int l, i; if (penum->text.operation & TEXT_FROM_SINGLE_CHAR) { cc = penum->text.data.d_char; @@ -866,7 +866,6 @@ pdfi_fapi_get_glyphname_or_cid(gs_text_enum_t *penum, gs_font_base * pbfont, gs_ } else { byte *c = (byte *)&penum->text.data.bytes[penum->index - penum->bytes_decoded]; - int i; cc = 0; for (i = 0; i < penum->bytes_decoded ; i++) { cc |= c[i] << ((penum->bytes_decoded - 1) - i) * 8; @@ -874,11 +873,11 @@ pdfi_fapi_get_glyphname_or_cid(gs_text_enum_t *penum, gs_font_base * pbfont, gs_ } l = penum->orig_font->procs.decode_glyph((gs_font *)penum->orig_font, ccode, (gs_char)cc, (ushort *)uc, 4); - if (l == 2) { - cc = uc[0] | uc[1] << 8; - } - else if (l == 4) { - cc = uc[0] | uc[1] << 8 | uc[2] << 16 | uc[3] << 24; + if (l > 0 && l < sizeof(uc)) { + cc = 0; + for (i = 0; i < l; i++) { + cc |= uc[l - 1 - i] << (i * 8); + } } else cc = ccode; diff --git a/pdf/pdf_font.c b/pdf/pdf_font.c index 7f760bfa2..5862320bf 100644 --- a/pdf/pdf_font.c +++ b/pdf/pdf_font.c @@ -1754,33 +1754,33 @@ int pdfi_tounicode_char_to_unicode(pdf_context *ctx, pdf_cmap *tounicode, gs_gly if (counter.entry.value.size == 1) { l = 2; if (ucode != NULL && length >= l) { - ucode[1] = counter.entry.value.data[0]; - ucode[0] = counter.entry.value.data[1]; + ucode[0] = counter.entry.value.data[0]; + ucode[1] = counter.entry.value.data[1]; } } else if (counter.entry.value.size == 2) { l = 2; if (ucode != NULL && length >= l) { - ucode[1] = counter.entry.value.data[0]; - ucode[0] = counter.entry.value.data[1]; + ucode[0] = counter.entry.value.data[0]; + ucode[1] = counter.entry.value.data[1]; } } else if (counter.entry.value.size == 3) { l = 4; if (ucode != NULL && length >= l) { - ucode[3] = counter.entry.value.data[0]; - ucode[2] = counter.entry.value.data[1]; - ucode[1] = counter.entry.value.data[2]; - ucode[0] = 0; + ucode[0] = counter.entry.value.data[0]; + ucode[1] = counter.entry.value.data[1]; + ucode[2] = counter.entry.value.data[2]; + ucode[3] = 0; } } else { l = 4; if (ucode != NULL && length >= l) { - ucode[3] = counter.entry.value.data[0]; - ucode[2] = counter.entry.value.data[1]; + ucode[0] = counter.entry.value.data[0]; ucode[1] = counter.entry.value.data[1]; - ucode[0] = counter.entry.value.data[3]; + ucode[2] = counter.entry.value.data[1]; + ucode[3] = counter.entry.value.data[3]; } } } @@ -1840,17 +1840,17 @@ pdfi_cidfont_decode_glyph(gs_font *font, gs_glyph glyph, int ch, ushort *u, unsi if (cc > 65535) { code = 4; if (unicode_return != NULL && length >= code) { - unicode_return[3] = (cc & 0xFF000000)>> 24; - unicode_return[2] = (cc & 0x00FF0000) >> 16; - unicode_return[1] = (cc & 0x0000FF00) >> 8; - unicode_return[0] = (cc & 0x000000FF); + unicode_return[0] = (cc & 0xFF000000)>> 24; + unicode_return[1] = (cc & 0x00FF0000) >> 16; + unicode_return[2] = (cc & 0x0000FF00) >> 8; + unicode_return[3] = (cc & 0x000000FF); } } else { code = 2; if (unicode_return != NULL && length >= code) { - unicode_return[1] = (cc & 0x0000FF00) >> 8; - unicode_return[0] = (cc & 0x000000FF); + unicode_return[0] = (cc & 0x0000FF00) >> 8; + unicode_return[1] = (cc & 0x000000FF); } } } diff --git a/pdf/pdf_font0.c b/pdf/pdf_font0.c index 594a79a40..8d66437ea 100644 --- a/pdf/pdf_font0.c +++ b/pdf/pdf_font0.c @@ -105,17 +105,17 @@ pdfi_font0_map_glyph_to_unicode(gs_font *font, gs_glyph glyph, int ch, ushort *u if (cc > 65535) { code = 4; if (unicode_return != NULL && length >= code) { - unicode_return[3] = (cc & 0xFF000000)>> 24; - unicode_return[2] = (cc & 0x00FF0000) >> 16; - unicode_return[1] = (cc & 0x0000FF00) >> 8; - unicode_return[0] = (cc & 0x000000FF); + unicode_return[0] = (cc & 0xFF000000)>> 24; + unicode_return[1] = (cc & 0x00FF0000) >> 16; + unicode_return[2] = (cc & 0x0000FF00) >> 8; + unicode_return[3] = (cc & 0x000000FF); } } else { code = 2; if (unicode_return != NULL && length >= code) { - unicode_return[1] = (cc & 0x0000FF00) >> 8; - unicode_return[0] = (cc & 0x000000FF); + unicode_return[0] = (cc & 0x0000FF00) >> 8; + unicode_return[1] = (cc & 0x000000FF); } } } diff --git a/pdf/pdf_font11.c b/pdf/pdf_font11.c index 7a05d0855..8535d4a20 100644 --- a/pdf/pdf_font11.c +++ b/pdf/pdf_font11.c @@ -56,18 +56,19 @@ static int pdfi_cidtype2_CIDMap_proc(gs_font_cid2 *pfont, gs_glyph glyph) unsigned int ucc = 0; int code = pfont->procs.decode_glyph((gs_font *)pfont, glyph, -1, NULL, 0); if (code == 2) { - ushort sccode = 0; - (void)pfont->procs.decode_glyph((gs_font *)pfont, glyph, -1, &sccode, 2); - ucc = (uint)sccode; + uchar sccode[2] = {0}; + (void)pfont->procs.decode_glyph((gs_font *)pfont, glyph, -1, (ushort *)&sccode, 2); + ucc = (sccode[0] << 8) + sccode[1]; } else if (code == 4) { - uint iccode = 0; + uchar iccode[4] = {0}; (void)pfont->procs.decode_glyph((gs_font *)pfont, glyph, -1, (ushort *)&iccode, 2); - ucc = iccode; + ucc = (iccode[0] << 24) + (iccode[1] << 16) + (iccode[2] << 8) + iccode[3]; + } if (code == 2 || code == 4) { code = pdfi_fapi_check_cmap_for_GID((gs_font *)pfont, (unsigned int)ucc, &gid); - if (code < 0) + if (code < 0 || gid == 0) gid = glyph - GS_MIN_CID_GLYPH; } } @@ -94,18 +95,19 @@ static uint pdfi_cidtype2_get_glyph_index(gs_font_type42 *pfont, gs_glyph glyph) unsigned int ucc = 0; code = pfont->procs.decode_glyph((gs_font *)pfont, glyph, -1, NULL, 0); if (code == 2) { - ushort sccode = 0; - (void)pfont->procs.decode_glyph((gs_font *)pfont, glyph, -1, &sccode, 2); - ucc = (uint)sccode; + uchar sccode[2] = {0}; + (void)pfont->procs.decode_glyph((gs_font *)pfont, glyph, -1, (ushort *)&sccode, 2); + ucc = (sccode[0] << 8) + sccode[1]; } else if (code == 4) { - uint iccode = 0; + uchar iccode[4] = {0}; (void)pfont->procs.decode_glyph((gs_font *)pfont, glyph, -1, (ushort *)&iccode, 2); - ucc = iccode; + ucc = (iccode[0] << 24) + (iccode[1] << 16) + (iccode[2] << 8) + iccode[3]; + } if (code == 2 || code == 4) { code = pdfi_fapi_check_cmap_for_GID((gs_font *)pfont, (unsigned int)ucc, &gid); - if (code < 0) + if (code < 0 || gid == 0) gid = glyph - GS_MIN_CID_GLYPH; } } |