summaryrefslogtreecommitdiff
path: root/gs/base/gsfcmap1.c
diff options
context:
space:
mode:
Diffstat (limited to 'gs/base/gsfcmap1.c')
-rw-r--r--gs/base/gsfcmap1.c593
1 files changed, 593 insertions, 0 deletions
diff --git a/gs/base/gsfcmap1.c b/gs/base/gsfcmap1.c
new file mode 100644
index 000000000..35652aa90
--- /dev/null
+++ b/gs/base/gsfcmap1.c
@@ -0,0 +1,593 @@
+/* Copyright (C) 2001-2006 Artifex Software, Inc.
+ All Rights Reserved.
+
+ This software is provided AS-IS with no warranty, either express or
+ implied.
+
+ This software is distributed under license and may not be copied, modified
+ or distributed except as expressly authorized under the terms of that
+ license. Refer to licensing information at http://www.artifex.com/
+ or contact Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134,
+ San Rafael, CA 94903, U.S.A., +1(415)492-9861, for further information.
+*/
+
+/* $Id$ */
+/* Adobe-based CMap character decoding */
+#include "memory_.h"
+#include "string_.h"
+#include "gx.h"
+#include "gserrors.h"
+#include "gsstruct.h"
+#include "gsutil.h" /* for gs_next_ids */
+#include "gxfcmap1.h"
+
+/* Get a big-endian integer. */
+static inline ulong
+bytes2int(const byte *p, int n)
+{
+ ulong v = 0;
+ int i;
+
+ for (i = 0; i < n; ++i)
+ v = (v << 8) + p[i];
+ return v;
+}
+
+/* ---------------- GC descriptors ---------------- */
+
+public_st_cmap_adobe1();
+/* Because lookup ranges can be elements of arrays, */
+/* their enum_ptrs procedure must never return 0 prematurely. */
+static
+ENUM_PTRS_WITH(cmap_lookup_range_enum_ptrs,
+ gx_cmap_lookup_range_t *pclr) return 0;
+case 0:
+ if (pclr->value_type == CODE_VALUE_GLYPH) {
+ const byte *pv = pclr->values.data;
+ int size = pclr->value_size;
+ int k;
+
+ for (k = 0; k < pclr->num_entries; ++k, pv += size) {
+ gs_glyph glyph = bytes2int(pv, size);
+
+ pclr->cmap->mark_glyph(mem, glyph, pclr->cmap->mark_glyph_data);
+ }
+ }
+ return ENUM_OBJ(pclr->cmap);
+case 1: return ENUM_STRING(&pclr->keys);
+case 2: return ENUM_STRING(&pclr->values);
+ENUM_PTRS_END
+static
+RELOC_PTRS_WITH(cmap_lookup_range_reloc_ptrs, gx_cmap_lookup_range_t *pclr)
+ RELOC_VAR(pclr->cmap);
+ RELOC_STRING_VAR(pclr->keys);
+ RELOC_STRING_VAR(pclr->values);
+RELOC_PTRS_END
+public_st_cmap_lookup_range();
+public_st_cmap_lookup_range_element();
+
+/* ---------------- Procedures ---------------- */
+
+ /* ------ Decoding ------ */
+
+/*
+ * multi-dimensional range comparator
+ */
+
+static void
+print_msg_str_in_range(const byte *str,
+ const byte *key_lo, const byte *key_hi,
+ int key_size)
+{
+ debug_print_string_hex(str, key_size);
+ dlprintf(" in ");
+ debug_print_string_hex(key_lo, key_size);
+ dlprintf(" - ");
+ debug_print_string_hex(key_hi, key_size);
+ dlprintf("\n");
+}
+
+static int
+gs_cmap_get_shortest_chr(const gx_code_map_t * pcmap, uint *pfidx)
+{
+ int i;
+ int len_shortest = MAX_CMAP_CODE_SIZE;
+ uint fidx_shortest = 0; /* font index for this fallback */
+
+ for (i = pcmap->num_lookup - 1; i >= 0; --i) {
+ const gx_cmap_lookup_range_t *pclr = &pcmap->lookup[i];
+ if ((pclr->key_prefix_size + pclr->key_size) <= len_shortest) {
+ len_shortest = (pclr->key_prefix_size + pclr->key_size);
+ fidx_shortest = pclr->font_index;
+ }
+ }
+
+ *pfidx = fidx_shortest;
+ return len_shortest;
+}
+
+/*
+ * multi-dimensional relative position calculator
+ *
+ * Returns offset of the given CID, considering CID range
+ * as array of CIDs (the last index changes fastest).
+ */
+static int
+gs_multidim_CID_offset(const byte *key_str,
+ const byte *key_lo, const byte *key_hi,
+ int key_size)
+{
+
+ int i; /* index for current dimension */
+ int CID_offset = 0;
+
+ if (gs_debug_c('J')) {
+ dlprintf("[J]gmCo() calc CID_offset for 0x");
+ print_msg_str_in_range(key_str, key_lo, key_hi, key_size);
+ }
+
+ for (i = 0; i < key_size; i++)
+ CID_offset = CID_offset * (key_hi[i] - key_lo[i] + 1) +
+ key_str[i] - key_lo[i];
+
+ if_debug1('J', "[J]gmCo() CID_offset = %d\n", CID_offset);
+ return CID_offset;
+}
+
+/*
+ * Decode a character from a string using a code map, updating the index.
+ * Return 0 for a CID or name, N > 0 for a character code where N is the
+ * number of bytes in the code, or an error. Store the decoded bytes in
+ * *pchr. For undefined characters, set *pglyph = gs_no_glyph and return 0.
+ */
+static int
+code_map_decode_next_multidim_regime(const gx_code_map_t * pcmap,
+ const gs_const_string * pstr,
+ uint * pindex, uint * pfidx,
+ gs_char * pchr, gs_glyph * pglyph)
+{
+ const byte *str = pstr->data + *pindex;
+ uint ssize = pstr->size - *pindex;
+ /*
+ * The keys are not sorted due to 'usecmap'. Possible optimization :
+ * merge and sort keys in 'zbuildcmap', then use binary search here.
+ * This would be valuable for UniJIS-UTF8-H, which contains about 7000
+ * keys.
+ */
+ int i;
+
+ /*
+ * In the fallback of CMap decoding procedure, there is "partial matching".
+ * For detail, refer PostScript Ref. Manual v3 at the end of Fonts chapter.
+ */
+
+ /* "pm" stands for partial match (not pointer), temporal use. */
+ int pm_maxlen = 0; /* partial match: max length */
+ int pm_index = *pindex; /* partial match: ptr index (in str) */
+ uint pm_fidx = *pfidx; /* partial match: ptr font index */
+ gs_char pm_chr = *pchr; /* partial match: ptr character */
+
+ *pchr = '\0';
+
+ if (gs_debug_c('J')) {
+ dlprintf("[J]CMDNmr() is called: str=(");
+ debug_print_string_hex(str, ssize);
+ dlprintf3(") @ 0x%lx ssize=%d, %d ranges to check\n",
+ (ulong)str, ssize, pcmap->num_lookup);
+ }
+
+ for (i = pcmap->num_lookup - 1; i >= 0; --i) {
+ /* main loop - scan the map passed via pcmap */
+ /* reverse scan order due to 'usecmap' */
+
+ const gx_cmap_lookup_range_t *pclr = &pcmap->lookup[i];
+ int pre_size = pclr->key_prefix_size, key_size = pclr->key_size,
+ chr_size = pre_size + key_size;
+
+ int j = 0;
+ /* length of the given byte stream is shorter than
+ * chr-length of current range, no need for further check,
+ * skip to the next range.
+ */
+ if (ssize < chr_size)
+ continue;
+
+ if (0 < pre_size) {
+ const byte * prefix = pclr->key_prefix;
+ /* check partial match in prefix */
+ for (j = 0; j < pre_size; j++)
+ if (prefix[j] != str[j])
+ break;
+
+ if (0 == j) /* no match, skip to next i */
+ continue;
+ else if (j < pre_size) { /* not exact, partial match */
+ if (gs_debug_c('J')) {
+ dlprintf("[J]CMDNmr() partial match with prefix:");
+ print_msg_str_in_range(str, prefix,
+ prefix, pre_size);
+ }
+
+ if (pm_maxlen < j) {
+ pm_maxlen = chr_size;
+ pm_chr = bytes2int(str, chr_size);
+ pm_index = (*pindex) + chr_size;
+ pm_fidx = pclr->font_index;
+ }
+ continue ; /* no need to check key, skip to next i */
+ }
+
+ if (gs_debug_c('J')) {
+ dlprintf("[J]CMDNmr() full match with prefix:");
+ print_msg_str_in_range(str, prefix, prefix, pre_size);
+ }
+
+ } /* if (0 < pre_size) */
+
+ /* full match in prefix. check key */
+ {
+ const byte *key = pclr->keys.data;
+ int step = key_size;
+ int k, l;
+ const byte *pvalue = NULL;
+
+ /* when range is "range", 2 keys for lo-end and hi-end
+ * are stacked. So twice the step. current "key" points
+ * lo-end of current range, and the pointer for hi-end
+ * is calculated by (key + step - key_size).
+ */
+
+ if (pclr->key_is_range)
+ step <<=1; /* step = step * 2; */
+
+ for (k = 0; k < pclr->num_entries; ++k, key += step) {
+
+ if_debug0('j', "[j]CMDNmr() check key:");
+ if (gs_debug_c('j'))
+ print_msg_str_in_range(str + pre_size,
+ key, key + step - key_size, key_size) ;
+
+ for (l = 0; l < key_size; l++) {
+ byte c = str[l + pre_size];
+ if (c < key[l] || c > key[step - key_size + l])
+ break;
+ }
+
+ if (pm_maxlen < pre_size + l) {
+ pm_maxlen = chr_size;
+ pm_chr = bytes2int(str, chr_size);
+ pm_index = (*pindex) + chr_size;
+ pm_fidx = pclr->font_index;
+ }
+ if (l == key_size)
+ break;
+ }
+
+ /* all keys are tried, but found no match. */
+ /* go to next prefix. */
+ if (k == pclr->num_entries)
+ continue;
+
+ /* We have a match. Return the result. */
+ *pchr = bytes2int(str, chr_size);
+ *pindex += chr_size;
+ *pfidx = pclr->font_index;
+ pvalue = pclr->values.data + k * pclr->value_size;
+
+ if (gs_debug_c('J')) {
+ dlprintf("[J]CMDNmr() full matched pvalue=(");
+ debug_print_string_hex(pvalue, pclr->value_size);
+ dlprintf(")\n");
+ }
+
+ switch (pclr->value_type) {
+ case CODE_VALUE_CID:
+ *pglyph = gs_min_cid_glyph +
+ bytes2int(pvalue, pclr->value_size) +
+ gs_multidim_CID_offset(str + pre_size,
+ key, key + step - key_size, key_size);
+ return 0;
+ case CODE_VALUE_NOTDEF:
+ *pglyph = gs_min_cid_glyph +
+ bytes2int(pvalue, pclr->value_size);
+ return 0;
+ case CODE_VALUE_GLYPH:
+ *pglyph = bytes2int(pvalue, pclr->value_size);
+ return 0;
+ case CODE_VALUE_CHARS:
+ *pglyph =
+ bytes2int(pvalue, pclr->value_size) +
+ bytes2int(str + pre_size, key_size) -
+ bytes2int(key, key_size);
+ return pclr->value_size;
+ default: /* shouldn't happen */
+ return_error(gs_error_rangecheck);
+ }
+ }
+ }
+ /* No mapping. */
+ *pchr = pm_chr;
+ *pindex = pm_index;
+ *pfidx = pm_fidx;
+ *pglyph = gs_no_glyph;
+ if (gs_debug_c('J')) {
+ dlprintf("[J]CMDNmr() no full match, use partial match for (");
+ debug_print_string_hex(str, pm_maxlen);
+ dlprintf(")\n");
+ }
+ return 0;
+}
+
+/*
+ * Decode a character from a string using a CMap.
+ * Return like code_map_decode_next.
+ * At present, the range specification by (begin|end)codespacerange
+ * is not used in this function. Therefore, this function accepts
+ * some invalid CMap which def & undef maps exceed the codespacerange.
+ * It should be checked in this function, or some procedure in gs_cmap.ps.
+ */
+static int
+gs_cmap_adobe1_decode_next(const gs_cmap_t * pcmap_in,
+ const gs_const_string * pstr,
+ uint * pindex, uint * pfidx,
+ gs_char * pchr, gs_glyph * pglyph)
+{
+ const gs_cmap_adobe1_t *pcmap = (const gs_cmap_adobe1_t *)pcmap_in;
+ uint save_index = *pindex;
+ int code;
+
+ uint pm_index;
+ uint pm_fidx;
+ gs_char pm_chr;
+
+ /* For first, check defined map */
+ if_debug0('J', "[J]GCDN() check def CMap\n");
+ code =
+ code_map_decode_next_multidim_regime(&pcmap->def, pstr, pindex, pfidx, pchr, pglyph);
+
+ /* This is defined character */
+ if (code != 0 || *pglyph != gs_no_glyph)
+ return code;
+
+ /* In here, this is NOT defined character */
+ /* save partially matched results */
+ pm_index = *pindex;
+ pm_fidx = *pfidx;
+ pm_chr = *pchr;
+
+ /* check notdef map. */
+ if_debug0('J', "[J]GCDN() check notdef CMap\n");
+ *pindex = save_index;
+ code =
+ code_map_decode_next_multidim_regime(&pcmap->notdef, pstr, pindex, pfidx, pchr, pglyph);
+
+ /* This is defined "notdef" character. */
+ if (code != 0 || *pglyph != gs_no_glyph)
+ return code;
+
+ /*
+ * This is undefined in def & undef maps,
+ * use partially matched result with default notdef (CID = 0).
+ */
+ if (save_index < pm_index) {
+
+ /* there was some partially matched */
+
+ *pglyph = gs_min_cid_glyph; /* CID = 0 */
+ *pindex = pm_index;
+ *pfidx = pm_fidx;
+ *pchr = '\0';
+ return 0; /* should return some error for partial matched .notdef? */
+ }
+ else {
+ /* no match */
+
+ /* Even partial match is failed.
+ * Getting the shortest length from defined characters,
+ * and take the leading bytes (with same length of the shortest
+ * defined chr) as an unidentified character: CID = 0.
+ * Also this procedure is specified in PS Ref. Manual v3,
+ * at the end of Fonts chapter.
+ */
+
+ const byte *str = pstr->data + save_index;
+ uint ssize = pstr->size - save_index;
+ int chr_size_shortest =
+ gs_cmap_get_shortest_chr(&pcmap->def, pfidx);
+
+ if (chr_size_shortest <= ssize) {
+ *pglyph = gs_min_cid_glyph; /* CID = 0, this is CMap fallback */
+ *pindex = save_index + chr_size_shortest;
+ *pchr = '\0';
+ if (gs_debug_c('J')) {
+ dlprintf1("[J]GCDN() no partial match, skip %d byte (",
+ chr_size_shortest);
+ debug_print_string_hex(str, chr_size_shortest);
+ dlprintf(")\n");
+ }
+ return 0; /* should return some error for fallback .notdef? */
+ }
+ else {
+ /* Undecodable string is shorter than the shortest character,
+ * return 'gs_no_glyph' and update index to end-of-string
+ */
+ if (gs_debug_c('J')) {
+ dlprintf2("[J]GCDN() left data in buffer (%d) is shorter than shortest defined character (%d)\n",
+ ssize, chr_size_shortest);
+ }
+ *pglyph = gs_no_glyph;
+ *pindex += ssize;
+ return 0; /* fixme: should return a code != 0 if caller needs to know */
+ }
+ }
+}
+
+ /* ------ Initialization/creation ------ */
+
+/*
+ * Allocate and initialize an Adobe1 CMap. The caller must still fill in
+ * the code space ranges, lookup tables, keys, and values.
+ */
+
+static int
+adobe1_next_range(gs_cmap_ranges_enum_t *penum)
+{
+ const gs_cmap_adobe1_t *const pcmap =
+ (const gs_cmap_adobe1_t *)penum->cmap;
+
+ if (penum->index >= pcmap->code_space.num_ranges)
+ return 1;
+ penum->range = pcmap->code_space.ranges[penum->index++];
+ return 0;
+}
+static const gs_cmap_ranges_enum_procs_t adobe1_range_procs = {
+ adobe1_next_range
+};
+static void
+gs_cmap_adobe1_enum_ranges(const gs_cmap_t *pcmap, gs_cmap_ranges_enum_t *pre)
+{
+ gs_cmap_ranges_enum_setup(pre, pcmap, &adobe1_range_procs);
+}
+static int
+adobe1_next_lookup(gs_cmap_lookups_enum_t *penum, const gx_code_map_t *pcm)
+{
+ const gx_cmap_lookup_range_t *lookup = &pcm->lookup[penum->index[0]];
+
+ if (penum->index[0] >= pcm->num_lookup)
+ return 1;
+ penum->entry.key_size = lookup->key_prefix_size + lookup->key_size;
+ penum->entry.key_is_range = lookup->key_is_range;
+ penum->entry.value_type = lookup->value_type;
+ penum->entry.value.size = lookup->value_size;
+ penum->entry.font_index = lookup->font_index;
+ penum->index[0]++;
+ penum->index[1] = 0;
+ return 0;
+}
+static int
+adobe1_next_lookup_def(gs_cmap_lookups_enum_t *penum)
+{
+ return adobe1_next_lookup(penum,
+ &((const gs_cmap_adobe1_t *)penum->cmap)->def);
+}
+static int
+adobe1_next_lookup_notdef(gs_cmap_lookups_enum_t *penum)
+{
+ return adobe1_next_lookup(penum,
+ &((const gs_cmap_adobe1_t *)penum->cmap)->notdef);
+}
+static int
+adobe1_next_entry(gs_cmap_lookups_enum_t *penum, const gx_code_map_t *pcm)
+{
+ const gx_cmap_lookup_range_t *lookup = &pcm->lookup[penum->index[0] - 1];
+ int psize = lookup->key_prefix_size;
+ int ksize = lookup->key_size;
+ const byte *key =
+ lookup->keys.data + penum->index[1] * ksize *
+ (lookup->key_is_range ? 2 : 1);
+ int i;
+
+ if (penum->index[1] >= lookup->num_entries)
+ return 1;
+ if (psize + ksize > MAX_CMAP_CODE_SIZE)
+ return_error(gs_error_rangecheck);
+ for (i = 0; i < 2; ++i, key += ksize) {
+ memcpy(penum->entry.key[i], lookup->key_prefix, psize);
+ memcpy(penum->entry.key[i] + psize, key, ksize);
+ }
+ penum->entry.value.data =
+ lookup->values.data + penum->index[1] * lookup->value_size;
+ penum->entry.value.size = lookup->value_size;
+ penum->index[1]++;
+ return 0;
+}
+static int
+adobe1_next_entry_def(gs_cmap_lookups_enum_t *penum)
+{
+ return adobe1_next_entry(penum,
+ &((const gs_cmap_adobe1_t *)penum->cmap)->def);
+}
+static int
+adobe1_next_entry_notdef(gs_cmap_lookups_enum_t *penum)
+{
+ return adobe1_next_entry(penum,
+ &((const gs_cmap_adobe1_t *)penum->cmap)->notdef);
+}
+static const gs_cmap_lookups_enum_procs_t adobe1_lookup_def_procs = {
+ adobe1_next_lookup_def, adobe1_next_entry_def
+};
+static const gs_cmap_lookups_enum_procs_t adobe1_lookup_notdef_procs = {
+ adobe1_next_lookup_notdef, adobe1_next_entry_notdef
+};
+static void
+gs_cmap_adobe1_enum_lookups(const gs_cmap_t *pcmap, int which,
+ gs_cmap_lookups_enum_t *pre)
+{
+ gs_cmap_lookups_enum_setup(pre, pcmap,
+ (which ? &adobe1_lookup_notdef_procs :
+ &adobe1_lookup_def_procs));
+}
+
+static const gs_cmap_procs_t cmap_adobe1_procs = {
+ gs_cmap_adobe1_decode_next,
+ gs_cmap_adobe1_enum_ranges,
+ gs_cmap_adobe1_enum_lookups,
+ gs_cmap_compute_identity
+};
+
+int
+gs_cmap_adobe1_alloc(gs_cmap_adobe1_t **ppcmap, int wmode,
+ const byte *map_name, uint name_size,
+ uint num_fonts, uint num_ranges, uint num_lookups,
+ uint keys_size, uint values_size,
+ const gs_cid_system_info_t *pcidsi_in, gs_memory_t *mem)
+{
+ gs_cmap_t *pcmap;
+ gs_cmap_adobe1_t *pcmap1;
+ gx_code_space_range_t *ranges = (gx_code_space_range_t *)
+ gs_alloc_byte_array(mem, num_ranges, sizeof(gx_code_space_range_t),
+ "gs_cmap_alloc(code space ranges)");
+ gx_cmap_lookup_range_t *lookups =
+ (num_lookups == 0 ? NULL :
+ gs_alloc_struct_array(mem, num_lookups, gx_cmap_lookup_range_t,
+ &st_cmap_lookup_range,
+ "gs_cmap_alloc(lookup ranges)"));
+ byte *keys =
+ (keys_size == 0 ? NULL :
+ gs_alloc_string(mem, keys_size, "gs_cmap_alloc(keys)"));
+ byte *values =
+ (values_size == 0 ? NULL :
+ gs_alloc_string(mem, values_size, "gs_cmap_alloc(values)"));
+ int code =
+ gs_cmap_alloc(&pcmap, &st_cmap_adobe1, wmode, map_name, name_size,
+ pcidsi_in, num_fonts, &cmap_adobe1_procs, mem);
+ uint i;
+
+ if (code < 0 || ranges == 0 || (num_lookups != 0 && lookups == 0) ||
+ (keys_size != 0 && keys == 0) || (values_size != 0 && values == 0)) {
+ gs_free_string(mem, values, values_size, "gs_cmap_alloc(values)");
+ gs_free_string(mem, keys, keys_size, "gs_cmap_alloc(keys)");
+ gs_free_object(mem, lookups, "gs_cmap_alloc(lookup ranges)");
+ gs_free_object(mem, ranges, "gs_cmap_alloc(code space ranges)");
+ return_error(gs_error_VMerror);
+ }
+ *ppcmap = pcmap1 = (gs_cmap_adobe1_t *)pcmap;
+ pcmap1->code_space.ranges = ranges;
+ pcmap1->code_space.num_ranges = num_ranges;
+ if (num_lookups > 0) {
+ for (i = 0; i < num_lookups; ++i) {
+ memset(&lookups[i], 0, sizeof(*lookups));
+ lookups[i].cmap = pcmap1;
+ }
+ lookups[0].keys.data = keys;
+ lookups[0].keys.size = keys_size;
+ lookups[0].values.data = values;
+ lookups[0].values.size = values_size;
+ }
+ pcmap1->def.lookup = lookups;
+ pcmap1->def.num_lookup = num_lookups;
+ pcmap1->notdef.lookup = 0;
+ pcmap1->notdef.num_lookup = 0;
+ /* no mark_glyph, mark_glyph_data, glyph_name, glyph_name_data */
+ return 0;
+}