summaryrefslogtreecommitdiff
path: root/gpdl
diff options
context:
space:
mode:
authorRobin Watts <Robin.Watts@artifex.com>2022-10-06 11:17:39 +0100
committerRobin Watts <Robin.Watts@artifex.com>2022-10-21 15:38:14 +0100
commit253e26c536e92f78ae44b1862480b001a4933971 (patch)
treea67fa7b61e4a1b716b8d3dd64defcf00eca1bb50 /gpdl
parentcff2b8b436d1f8775830a1c3d7427a7ad5056940 (diff)
downloadghostpdl-253e26c536e92f78ae44b1862480b001a4933971.tar.gz
Add text file interpreter for gpdl.
Spots ascii/utf8/utf16 and feeds to PCL interpreter. May need further tweaking with both text file spotting and font selection, but it's a decent start.
Diffstat (limited to 'gpdl')
-rw-r--r--gpdl/gpdl.mak10
-rw-r--r--gpdl/txttop.c845
2 files changed, 855 insertions, 0 deletions
diff --git a/gpdl/gpdl.mak b/gpdl/gpdl.mak
index 28183f8e2..25d9a6ee2 100644
--- a/gpdl/gpdl.mak
+++ b/gpdl/gpdl.mak
@@ -50,6 +50,9 @@ GPDL_JP2K_TOP_OBJ=$(GPDLOBJ)$(GPDL_JP2K_TOP_OBJ_FILE)
GPDL_PNG_TOP_OBJ_FILE=pngtop.$(OBJ)
GPDL_PNG_TOP_OBJ=$(GPDLOBJ)$(GPDL_PNG_TOP_OBJ_FILE)
+GPDL_TXT_TOP_OBJ_FILE=txttop.$(OBJ)
+GPDL_TXT_TOP_OBJ=$(GPDLOBJ)$(GPDL_TXT_TOP_OBJ_FILE)
+
GPDL_PSI_TOP_OBJS=\
$(GPDL_PNG_TOP_OBJ)\
$(GPDL_JP2K_TOP_OBJ)\
@@ -60,6 +63,7 @@ GPDL_PSI_TOP_OBJS=\
$(GPDL_URF_TOP_OBJ)\
$(GPDL_PSI_TOP_OBJ)\
$(GPDL_SO_TOP_OBJ)\
+ $(GPDL_TXT_TOP_OBJ)\
$(GPDLOBJ)gpdlimpl.$(OBJ)
LANG_CFLAGS=\
@@ -75,6 +79,7 @@ LANG_CFLAGS=\
$(D_)JP2K_INCLUDED$(_D)\
$(D_)PNG_INCLUDED$(_D)\
$(ENABLE_SO)\
+ $(D_)TXT_INCLUDED$(_D)\
GPDL_CFLAGS=$(LANG_CFLAGS) $(I_)$(PSSRCDIR)$(_I) $(I_)$(PLSRCDIR)$(_I) $(I_)$(GLSRCDIR)$(_I) $(I_)$(DEVSRCDIR)$(_I) $(I_)$(GLGENDIR)$(_I) $(C_)
@@ -163,3 +168,8 @@ $(GPDL_PNG_TOP_OBJ): $(GPDLSRC)pngtop.c $(AK)\
$(gxdevice_h) $(gserrors_h) $(gsstate_h) $(strimpl_h) $(gscoord_h)\
$(png__h) $(pltop_h) $(gsicc_manage_h) $(gspaint_h) $(plmain_h)
$(GPDLCC) $(II)$(PI_)$(_I) $(PCF_) $(GPDLSRC)pngtop.c $(GPDLO_)$(GPDL_PNG_TOP_OBJ_FILE)
+
+$(GPDL_TXT_TOP_OBJ): $(GPDLSRC)txttop.c $(AK)\
+ $(gxdevice_h) $(gserrors_h) $(gsstate_h) $(strimpl_h) $(gscoord_h)\
+ $(pltop_h) $(gsicc_manage_h) $(gspaint_h) $(plmain_h)
+ $(GPDLCC) $(II)$(PI_)$(_I) $(PCF_) $(GPDLSRC)txttop.c $(GPDLO_)$(GPDL_TXT_TOP_OBJ_FILE)
diff --git a/gpdl/txttop.c b/gpdl/txttop.c
new file mode 100644
index 000000000..73e4c7ac7
--- /dev/null
+++ b/gpdl/txttop.c
@@ -0,0 +1,845 @@
+/* Copyright (C) 2022 Artifex Software, Inc.
+ All Rights Reserved.
+
+ This software is provided AS-IS with no warranty, either express or
+ implied.
+
+ This software is distributed under license and may not be copied,
+ modified or distributed except as expressly authorized under the terms
+ of the license contained in the file LICENSE in this distribution.
+
+ Refer to licensing information at http://www.artifex.com or contact
+ Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato,
+ CA 94945, U.S.A., +1(415)492-9861, for further information.
+*/
+
+/* Top-level API implementation for text file handling */
+
+/* Language wrapper implementation (see pltop.h) */
+
+
+/* Enable the following for a dump of the codepoints to stdout. */
+/* #define DEBUG_CODEPOINTS */
+
+/* Enable the following for a hacky dump of the output PCL to file. */
+/* #define DEBUG_DUMP_PCL */
+
+#ifdef DEBUG_DUMP_PCL
+#include <stdio.h>
+static FILE *debug_pcl_out = NULL;
+static void wipe(void)
+{
+ fclose(debug_pcl_out);
+ debug_pcl_out = NULL;
+}
+static void
+debug_as_pcl(const char *p, int n)
+{
+ if (debug_pcl_out == NULL)
+ {
+ debug_pcl_out = fopen("debug_pcl_out", "wb");
+ atexit(wipe);
+ }
+ fwrite(p, n, 1, debug_pcl_out);
+}
+#endif
+
+#include "pltop.h"
+#include "plmain.h"
+
+#include "plparse.h" /* for e_ExitLanguage */
+#include "plmain.h"
+#include "gxdevice.h" /* so we can include gxht.h below */
+#include "gserrors.h"
+#include "gp.h"
+#include "assert_.h"
+
+/*
+ * The TXT interpeter is identical to pl_interp_t.
+ * The TXT interpreter instance is derived from pl_interp_implementation_t.
+ */
+
+typedef enum
+{
+ TXT_STATE_INIT = 0,
+ TXT_STATE_UTF8,
+ TXT_STATE_UTF8_MAYBE,
+ TXT_STATE_UTF16_LE,
+ TXT_STATE_UTF16_BE,
+ TXT_STATE_ASCII
+} txt_state_t;
+
+typedef struct txt_interp_instance_s txt_interp_instance_t;
+
+struct txt_interp_instance_s
+{
+ gs_memory_t *memory; /* memory allocator to use */
+
+ pl_interp_implementation_t *sub;
+ gx_device *device;
+
+ int buffered;
+ byte buffer[4];
+
+ int state;
+ int detected;
+ int just_had_lf;
+ int just_had_cr;
+ int col;
+};
+
+enum
+{
+ TXT_UNDETECTED = -1,
+ TXT_UNKNOWN,
+ TXT_UTF8,
+ TXT_UTF8_MAYBE,
+ TXT_UTF16_LE,
+ TXT_UTF16_BE,
+ TXT_ASCII,
+};
+
+static int
+identify_from_buffer(const unsigned char *s, int len)
+{
+ int count_controls = 0;
+ int count_hi = 0;
+ int count_tabs = 0;
+ int plausibly_utf8 = 1;
+ int i;
+
+ /* UTF-8 with a BOM */
+ if (len >= 3 && s[0] == 0xef && s[1] == 0xbb && s[2] == 0xbf)
+ return TXT_UTF8;
+ /* UTF-16 (little endian) */
+ if (len >= 2 && s[0] == 0xff && s[1] == 0xfe)
+ return TXT_UTF16_LE;
+ /* UTF-16 (big endian) */
+ if (len >= 2 && s[0] == 0xfe && s[1] == 0xff)
+ return TXT_UTF16_BE;
+
+ /* Gather some stats. */
+ for (i = 0; i < len; i++)
+ {
+ if (s[i] == 9)
+ {
+ count_tabs++;
+ }
+ else if (s[i] == 12)
+ {
+ /* Form feed. We'll let that slide. */
+ }
+ else if (s[i] == 10)
+ {
+ if (i+1 < len && s[i+1] == 13)
+ i++;
+ }
+ else if (s[i] == 13)
+ {
+ if (i+1 < len && s[i+1] == 10)
+ i++;
+ }
+ else if (s[i] < 32 || s[i] == 0x7f)
+ {
+ count_controls++;
+ }
+ else if (s[i] < 0x7f)
+ {
+ /* Seems like a reasonable ASCII value. */
+ }
+ else
+ {
+ count_hi++;
+ if ((s[i] & 0xF8) == 0xF0)
+ {
+ /* 3 following bytes */
+ if (i+1 < len && (s[i+1] & 0xC0) != 0x80)
+ plausibly_utf8 = 0;
+ else if (i+2 < len && (s[i+2] & 0xC0) != 0x80)
+ plausibly_utf8 = 0;
+ else if (i+3 < len && (s[i+3] & 0xC0) != 0x80)
+ plausibly_utf8 = 0;
+ else
+ i+=3;
+ }
+ else if ((s[i] & 0xF0) == 0xE0)
+ {
+ /* 2 following bytes */
+ if (i+1 < len && (s[i+1] & 0xC0) != 0x80)
+ plausibly_utf8 = 0;
+ else if (i+2 < len && (s[i+2] & 0xC0) != 0x80)
+ plausibly_utf8 = 0;
+ else
+ i+=2;
+ }
+ else if ((s[i] & 0xE0) == 0xC0)
+ {
+ /* 1 following bytes */
+ if (i+1 < len && (s[i+1] & 0xC0) != 0x80)
+ plausibly_utf8 = 0;
+ else
+ i++;
+ }
+ else
+ plausibly_utf8 = 0;
+ }
+ }
+
+ /* Any (non tab/cr/lf/ff) control characters probably means this isn't text. */
+ if (count_controls > 0)
+ return TXT_UNKNOWN;
+ /* If we've managed to decode all that as utf8 without problem, it's probably text. */
+ if (plausibly_utf8)
+ return TXT_UTF8_MAYBE;
+ /* If we're hitting too many top bit set chars, give up. */
+ if (count_hi > len/10)
+ return TXT_UNKNOWN;
+
+ return TXT_ASCII;
+}
+
+static int
+txt_detect_language(const char *t, int len)
+{
+ const unsigned char *s = (const unsigned char *)t;
+
+ switch (identify_from_buffer(s, len))
+ {
+ case TXT_UTF8:
+ case TXT_UTF16_LE:
+ case TXT_UTF16_BE:
+ /* PCL spots files with lots of ESCs in them at confidence
+ * level 80. We'll use 70, cos we don't want to override that. */
+ return 70;
+ case TXT_UTF8_MAYBE:
+ case TXT_ASCII:
+ return 60;
+ default:
+ case TXT_UNKNOWN:
+ break;
+ }
+
+ return 0;
+}
+
+static const pl_interp_characteristics_t *
+txt_impl_characteristics(const pl_interp_implementation_t *pimpl)
+{
+ static pl_interp_characteristics_t txt_characteristics =
+ {
+ "TXT",
+ txt_detect_language,
+ };
+ return &txt_characteristics;
+}
+
+/* Do per-instance interpreter allocation/init. No device is set yet */
+static int
+txt_impl_allocate_interp_instance(pl_interp_implementation_t *impl,
+ gs_memory_t *pmem)
+{
+ txt_interp_instance_t *instance;
+
+ instance = (txt_interp_instance_t *) gs_alloc_bytes(pmem,
+ sizeof(txt_interp_instance_t), "txt_impl_allocate_interp_instance");
+
+ if (!instance)
+ return_error(gs_error_VMerror);
+
+ instance->memory = pmem;
+ instance->sub = NULL;
+
+ impl->interp_client_data = instance;
+
+ return 0;
+}
+
+/* Prepare interp instance for the next "job" */
+static int
+txt_impl_init_job(pl_interp_implementation_t *impl,
+ gx_device *pdevice)
+{
+ txt_interp_instance_t *instance = impl->interp_client_data;
+
+ instance->device = pdevice;
+ instance->state = TXT_STATE_INIT;
+ instance->buffered = 0;
+ instance->detected = TXT_UNDETECTED;
+ instance->just_had_lf = 0;
+ instance->just_had_cr = 0;
+ instance->col = 0;
+
+ instance->sub = pl_main_get_pcl_instance(instance->memory);
+
+ return pl_init_job(instance->sub, instance->device);
+}
+
+#define ESC 27
+
+static int
+send_bytes(txt_interp_instance_t *instance, const byte *p, int n)
+{
+ stream_cursor_read cursor;
+
+#ifdef DEBUG_DUMP_PCL
+ debug_as_pcl(p, n);
+#endif
+
+ cursor.ptr = p-1;
+ cursor.limit = cursor.ptr + n;
+
+ return instance->sub->proc_process(instance->sub, &cursor);
+}
+
+static void
+drop_buffered(txt_interp_instance_t *instance, int n)
+{
+ assert(instance->buffered >= n);
+ instance->buffered -= n;
+ if (instance->buffered > 0)
+ memmove(instance->buffer, &instance->buffer[n], instance->buffered);
+}
+
+static int
+send_pcl_init(txt_interp_instance_t *instance)
+{
+ static byte init[] = {
+ ESC, 'E', // Reset
+ ESC, '&', 'l', '0', 'O', // Orientation
+ ESC, '&', 'k', '1', '0', 'H', // Horizontal spacing 10/120 of an inch.
+ ESC, '&', 'l', '8', 'C', // Vertical line spacing 8/48 of an inch.
+ ESC, '&', 't', '8', '3', 'P', // &t = double byte parsing, 83 = utf-8, P = ?
+ ESC, '(', '1', '8', 'N', // Primary symbol set = 18N = Unicode
+ ESC, '(', 's', '0', 'P', // Fixed pitch
+ ESC, '(', 's', '1', '2', 'H', // Secondary fixed pitch 12cpi
+ ESC, '(', 's', '8', 'V', // Point size 8
+ ESC, '(', 's', '3', 'T', // Typeface number 3
+ ESC, '&', 's', '0', 'C' // Wrappity wrap wrap
+ };
+
+ return send_bytes(instance, init, sizeof(init));
+}
+
+static int
+send_urc(txt_interp_instance_t *instance, int n)
+{
+ static byte unicode_replacement_char_as_utf8[] = { 0xe3, 0xbf, 0xbd };
+
+ if (instance->state == TXT_STATE_UTF8_MAYBE)
+ {
+ /* We were guessing that this was UTF8. Now we know it's not. Drop back to ascii. */
+ instance->state = TXT_STATE_ASCII;
+ return 0;
+ }
+
+ drop_buffered(instance, n);
+
+ return send_bytes(instance, unicode_replacement_char_as_utf8, sizeof(unicode_replacement_char_as_utf8));
+}
+
+static int
+send_utf8(txt_interp_instance_t *instance, int val)
+{
+ byte buf[4];
+ int n;
+
+ /* Finally, send the val! */
+ if (val < 0x80)
+ {
+ buf[0] = val;
+ n = 1;
+ }
+ else if (val < 0x800)
+ {
+ buf[0] = 0xC0 + (val>>6);
+ buf[1] = 0x80 + (val & 0x3F);
+ n = 2;
+ }
+ else if (val < 0x10000)
+ {
+ buf[0] = 0xE0 + (val>>12);
+ buf[1] = 0x80 + ((val>>6) & 0x3F);
+ buf[2] = 0x80 + (val & 0x3F);
+ n = 3;
+ }
+ else
+ {
+ buf[0] = 0xF0 + (val>>18);
+ buf[1] = 0x80 + ((val>>12) & 0x3F);
+ buf[2] = 0x80 + ((val>>6) & 0x3F);
+ buf[3] = 0x80 + (val & 0x3F);
+ n = 4;
+ }
+ return send_bytes(instance, buf, n);
+}
+
+/* All our actual codepoints should flow through here. So this is where
+ * we do the housekeeping. */
+static int
+send_codepoint(txt_interp_instance_t *instance, int val)
+{
+ int code;
+
+#ifdef DEBUG_CODEPOINTS
+ dprintf3("Sending codepoint %d (%x) %c\n", val, val, val >= 32 && val <= 255 && val != 127 ? val : '.');
+#endif
+
+ /* Tidy up whatever mess of CR/LF we are passed. */
+ if (val == '\r')
+ {
+ /* If we've got a CR and we've just had a LF, swallow this. */
+ if (instance->just_had_lf)
+ {
+ instance->just_had_lf = 0;
+ return 0;
+ }
+ instance->just_had_cr = 1;
+ val = '\n';
+ }
+ else if (val == '\n')
+ {
+ /* If we've got a LF and we've just had a CR, swallow this. */
+ if (instance->just_had_cr)
+ {
+ instance->just_had_cr = 0;
+ return 0;
+ }
+ instance->just_had_lf = 1;
+ }
+ else
+ {
+ instance->just_had_cr = 0;
+ instance->just_had_lf = 0;
+ }
+
+ /* Keep track of what column we're at to so we can do tab handling. */
+ if (val == '\n')
+ {
+ instance->col = 0;
+ code = send_utf8(instance, '\n');
+ if (code < 0)
+ return code;
+ return send_utf8(instance, '\r');
+ }
+ if (val == '\t')
+ {
+ int spaces = 8 - (instance->col & 7);
+ while (spaces--)
+ {
+ int code = send_utf8(instance, ' ');
+ if (code < 0)
+ return code;
+ instance->col++;
+ }
+ return 0;
+ }
+ instance->col++;
+
+#if 0
+ /* No need for this as PCL line wrapping works for us. If PCL ever
+ * decides to wrap at a number of columns that aren't a multiple of
+ * 8 then we'll need to do it manually again!. */
+ if (instance->col == 80)
+ {
+ instance->col = 0;
+ code = send_utf8(instance, '\n');
+ if (code < 0)
+ return code;
+ return send_utf8(instance, '\r');
+ }
+#endif
+
+ return send_utf8(instance, val);
+}
+
+static int
+process_block(txt_interp_instance_t *instance, const byte *ptr, int n)
+{
+ int code;
+ byte *s = &instance->buffer[0];
+ int old_state = instance->state;
+ int val;
+
+ if (instance->detected == TXT_UNDETECTED)
+ {
+ instance->detected = identify_from_buffer(ptr, n);
+ /* If we're thinking we're ASCII, go straight there. Otherwise, we'll let the
+ * BOM detection below run its course. */
+ if (instance->detected == TXT_ASCII)
+ instance->state = TXT_STATE_ASCII;
+ }
+
+ while (n)
+ {
+ if (instance->state == old_state)
+ {
+ assert(instance->buffered < 4);
+ s[instance->buffered++] = *ptr++;
+ n--;
+ }
+ old_state = instance->state;
+
+ switch (instance->state)
+ {
+ case TXT_STATE_INIT:
+
+ if (instance->buffered == 3 && s[0] == 0xef && s[1] == 0xbb && s[2] == 0xbf)
+ {
+ instance->state = TXT_STATE_UTF8;
+ }
+ else if (instance->buffered == 2 && s[0] == 0xff && s[1] == 0xfe)
+ {
+ instance->state = TXT_STATE_UTF16_LE;
+ }
+ else if (instance->buffered == 2 && s[0] == 0xfe && s[1] == 0xff)
+ {
+ instance->state = TXT_STATE_UTF16_BE;
+ }
+ else if (instance->buffered >= 3)
+ {
+ /* We haven't found a BOM, try for utf8. */
+ instance->state = TXT_STATE_UTF8_MAYBE;
+ }
+
+ /* If we've recognised the BOM, then send the init string. */
+ if (instance->state != TXT_STATE_INIT)
+ {
+ code = send_pcl_init(instance);
+ if (code < 0)
+ return code;
+ }
+ break;
+ case TXT_STATE_UTF8:
+ case TXT_STATE_UTF8_MAYBE:
+ if ((s[0] & 0xF8) == 0xF0)
+ {
+ /* 3 following bytes */
+ if (instance->buffered >= 2 && (s[1] & 0xC0) != 0x80)
+ {
+ code = send_urc(instance, 1);
+ if (code < 0)
+ return code;
+ }
+ else if (instance->buffered >= 3 && (s[2] & 0xC0) != 0x80)
+ {
+ code = send_urc(instance, 2);
+ if (code < 0)
+ return code;
+ }
+ else if (instance->buffered == 4 && (s[3] & 0xC0) != 0x80)
+ {
+ code = send_urc(instance, 3);
+ if (code < 0)
+ return code;
+ }
+ else if (instance->buffered == 4)
+ {
+ /* Valid encoding of 4 bytes */
+ val = ((s[0] & 0x7)<<18) | ((s[1] & 0x3f)<<12) | ((s[2] & 0x3f)<<6) | (s[3] & 0x3f);
+ drop_buffered(instance, 4);
+ code = send_codepoint(instance, val);
+ if (code < 0)
+ return code;
+ }
+ else if (instance->buffered != 1 && instance->buffered != 2 && instance->buffered != 3)
+ {
+ /* Should never happen. */
+ return_error(gs_error_Fatal);
+ }
+ }
+ else if ((s[0] & 0xF0) == 0xE0)
+ {
+ /* 2 following bytes */
+ if (instance->buffered >= 2 && (s[1] & 0xC0) != 0x80)
+ {
+ code = send_urc(instance, 1);
+ if (code < 0)
+ return code;
+ }
+ else if (instance->buffered >= 3 && (s[2] & 0xC0) != 0x80)
+ {
+ code = send_urc(instance, 2);
+ if (code < 0)
+ return code;
+ }
+ else if (instance->buffered == 3)
+ {
+ /* Valid encoding of 3 bytes */
+ val = ((s[0] & 0xF)<<12) | ((s[1] & 0x3f)<<6) | (s[2] & 0x3f);
+ drop_buffered(instance, 3);
+ code = send_codepoint(instance, val);
+ if (code < 0)
+ return code;
+ }
+ else if (instance->buffered != 1 && instance->buffered != 2)
+ {
+ /* Should never happen. */
+ return_error(gs_error_Fatal);
+ }
+ }
+ else if ((s[0] & 0xE0) == 0xC0)
+ {
+ /* 1 following bytes */
+ if (instance->buffered >= 2 && (s[1] & 0xC0) != 0x80)
+ {
+ code = send_urc(instance, 1);
+ if (code < 0)
+ return code;
+ }
+ else if (instance->buffered == 2)
+ {
+ /* Valid encoding of 2 bytes */
+ val = ((s[0] & 0x1F)<<6) | (s[1] & 0x3f);
+ drop_buffered(instance, 2);
+ code = send_codepoint(instance, val);
+ if (code < 0)
+ return code;
+ }
+ else if (instance->buffered != 1)
+ {
+ /* Should never happen. */
+ return_error(gs_error_Fatal);
+ }
+ }
+ else if ((s[0] & 0xC0) == 0x80)
+ {
+ /* A continuation byte at the start. Should never see this. */
+ code = send_urc(instance, 1);
+ if (code < 0)
+ return code;
+ }
+ else if (s[0] < 0x80)
+ {
+ /* Simple byte. */
+ val = s[0];
+ drop_buffered(instance, 1);
+ code = send_codepoint(instance, val);
+ if (code < 0)
+ return code;
+ }
+ else
+ {
+ /* Bytes we should never see in a UTF-8 file! (0xf8-0xff) */
+ code = send_urc(instance, 1);
+ if (code < 0)
+ return code;
+ }
+ break;
+ case TXT_STATE_UTF16_LE:
+ if (instance->buffered < 2)
+ break;
+ if (s[1] >= 0xD8 && s[1] < 0xDC)
+ {
+ /* High surrogate */
+ if (instance->buffered < 4)
+ break;
+ if (s[3] < 0xDC || s[3] > 0xDF)
+ {
+ /* Not followed by a low surrogate! Ignore the high surrogate. */
+ code = send_urc(instance, 2);
+ if (code < 0)
+ return code;
+ }
+ val = (((s[0] | (s[1]<<8)) - 0xdc00)<<10) + (s[2] | (s[3]<<8)) - 0xdc00 + 0x10000;
+ drop_buffered(instance, 4);
+ }
+ else
+ {
+ val = s[0] | (s[1]<<8);
+ drop_buffered(instance, 2);
+ }
+ code = send_codepoint(instance, val);
+ if (code < 0)
+ return code;
+ break;
+ case TXT_STATE_UTF16_BE:
+ if (instance->buffered < 2)
+ break;
+ if (s[0] >= 0xD8 && s[0] < 0xDC)
+ {
+ /* High surrogate */
+ if (instance->buffered < 4)
+ break;
+ if (s[2] < 0xDC || s[2] > 0xDF)
+ {
+ /* Not followed by a low surrogate! Ignore the high surrogate. */
+ code = send_urc(instance, 2);
+ if (code < 0)
+ return code;
+ }
+ val = (((s[1] | (s[0]<<8)) - 0xdc00)<<10) + (s[3] | (s[2]<<8)) - 0xdc00 + 0x10000;
+ drop_buffered(instance, 4);
+ }
+ else
+ {
+ val = s[1] | (s[0]<<8);
+ drop_buffered(instance, 2);
+ }
+ code = send_codepoint(instance, val);
+ if (code < 0)
+ return code;
+ break;
+ case TXT_STATE_ASCII:
+ do
+ {
+ code = send_codepoint(instance, s[0]);
+ if (code < 0)
+ return code;
+ drop_buffered(instance, 1);
+ }
+ while (instance->buffered > 0);
+ break;
+ default:
+ return_error(gs_error_Fatal);
+ }
+ }
+ return 0;
+}
+
+/* Parse an entire random access file */
+#if 0
+static int
+txt_impl_process_file(pl_interp_implementation_t *impl, const char *filename)
+{
+ txt_interp_instance_t *instance = impl->interp_client_data;
+ int code, code1;
+ gp_file *file;
+
+ file = gp_fopen(instance->memory, filename, "rb");
+ if (file == 0)
+ return_error(gs_error_ioerror);
+
+ instance->sub = pl_main_get_pcl_instance(instance->memory);
+
+ code = pl_init_job(instance->sub, instance->device);
+ if (code >= 0)
+ {
+ code = pl_process_file(instance->sub, filename);
+ }
+
+ code1 = pl_dnit_job(instance->sub);
+ if (code >= 0)
+ code = code1;
+
+ gp_fclose(file);
+
+ return code;
+}
+#endif
+
+/* Do any setup for parser per-cursor */
+static int /* ret 0 or +ve if ok, else -ve error code */
+txt_impl_process_begin(pl_interp_implementation_t * impl)
+{
+ return 0;
+}
+
+/* Parse a cursor-full of data */
+static int
+txt_impl_process(pl_interp_implementation_t *impl, stream_cursor_read *cursor)
+{
+ txt_interp_instance_t *instance = impl->interp_client_data;
+ int avail;
+ int code;
+
+ avail = cursor->limit - cursor->ptr;
+ code = process_block(instance, cursor->ptr + 1, avail);
+ cursor->ptr = cursor->limit;
+
+ return code;
+}
+
+static int /* ret 0 or +ve if ok, else -ve error code */
+txt_impl_process_end(pl_interp_implementation_t * impl)
+{
+ return 0;
+}
+
+/* Skip to end of job.
+ * Return 1 if done, 0 ok but EOJ not found, else negative error code.
+ */
+static int
+txt_impl_flush_to_eoj(pl_interp_implementation_t *impl, stream_cursor_read *pcursor)
+{
+ /* assume SO files cannot be pjl embedded */
+ pcursor->ptr = pcursor->limit;
+ return 0;
+}
+
+/* Parser action for end-of-file */
+static int
+txt_impl_process_eof(pl_interp_implementation_t *impl)
+{
+ txt_interp_instance_t *instance = impl->interp_client_data;
+
+ if (instance->sub)
+ return pl_process_eof(instance->sub);
+
+ return 0;
+}
+
+/* Report any errors after running a job */
+static int
+txt_impl_report_errors(pl_interp_implementation_t *impl,
+ int code, /* prev termination status */
+ long file_position, /* file position of error, -1 if unknown */
+ bool force_to_cout /* force errors to cout */
+ )
+{
+ txt_interp_instance_t *instance = impl->interp_client_data;
+ int ret = 0;
+
+ if (instance->sub)
+ ret = pl_report_errors(instance->sub, code, file_position, force_to_cout);
+
+ return ret;
+}
+
+/* Wrap up interp instance after a "job" */
+static int
+txt_impl_dnit_job(pl_interp_implementation_t *impl)
+{
+ txt_interp_instance_t *instance = impl->interp_client_data;
+ int code = 0;
+
+ if (instance->sub)
+ code = pl_dnit_job(instance->sub);
+ instance->sub = NULL;
+ instance->device = NULL;
+
+ return code;
+}
+
+/* Deallocate a interpreter instance */
+static int
+txt_impl_deallocate_interp_instance(pl_interp_implementation_t *impl)
+{
+ txt_interp_instance_t *instance = impl->interp_client_data;
+
+ gs_free_object(instance->memory, instance, "so_impl_deallocate_interp_instance");
+
+ return 0;
+}
+
+/* Parser implementation descriptor */
+pl_interp_implementation_t txt_implementation =
+{
+ txt_impl_characteristics,
+ txt_impl_allocate_interp_instance,
+ NULL, /* get_device_memory */
+ NULL, /* set_param */
+ NULL, /* add_path */
+ NULL, /* post_args_init */
+ txt_impl_init_job,
+ NULL, /* run_prefix_commands */
+ NULL, /* txt_impl_process_file, */
+ txt_impl_process_begin,
+ txt_impl_process,
+ txt_impl_process_end,
+ txt_impl_flush_to_eoj,
+ txt_impl_process_eof,
+ txt_impl_report_errors,
+ txt_impl_dnit_job,
+ txt_impl_deallocate_interp_instance,
+ NULL,
+};