Extend gpdl to cope with being fed PDFs via run_string.

More generally, we can now cope with any language implementation requesting that data being fed in via run_string should be buffered up and then fed in via run_file instead, so formats that require seeking can cope. We add a new gs_error_NeedFile error code. If a run_string implementation returns this, the calling gpdl layers captures the unused incoming data into an internal "buffered_file" object. When we reach run_string_end, we then register a new filing system with gs_add_fs() to allow this buffered file to be found. We then run that file using the standard run_file mechanism. Then we remove that filing system, and free the buffered file. The only implementation that currently uses this is the postscript one, which is amended to skip over leading whitespace and comments, looking for a PDF header. XPS already has a mechanism in it for collating data into a file using run_string, but this was broken (due to process_eof not being called). Fixed here, but still using its own mechanism rather than gs_error_NeedFile for now.
author: Robin Watts <Robin.Watts@artifex.com> 2020-07-07 12:55:42 +0100
committer: Robin Watts <Robin.Watts@artifex.com> 2020-07-07 19:34:34 +0100
commit: db8f3a277d0ae43cf5cffea16ee1c9149d4eb3de (patch)
tree: ed15a5035595ef42661c5b05f16f5803eeab9649 /gpdl
parent: 6eb675b18b8234256bb37190fd35243eab369e36 (diff)
download: ghostpdl-db8f3a277d0ae43cf5cffea16ee1c9149d4eb3de.tar.gz
1 files changed, 59 insertions, 1 deletions
diff --git a/gpdl/psitop.c b/gpdl/psitop.c
index ce755e0b4..ed2cb4e64 100644
--- a/gpdl/psitop.c
+++ b/gpdl/psitop.c
@@ -481,9 +481,67 @@ static int
 ps_impl_process(pl_interp_implementation_t * impl, stream_cursor_read * pr)
 {
     ps_interp_instance_t *psi = (ps_interp_instance_t *)impl->interp_client_data;
-    const unsigned int len = pr->limit - pr->ptr;
+    unsigned int len;
     int code, exit_code = 0;
 
+    if (psi->bytes_fed == 0)
+    {
+        /* Skip over whitespace/comments looking for a PDF marker. */
+        while (pr->ptr < pr->limit)
+        {
+            int i;
+
+            /* Skip over whitespace (as defined in PLRM) */
+            if (pr->ptr[1] == 0 ||
+                pr->ptr[1] == 9 ||
+                pr->ptr[1] == 10 ||
+                pr->ptr[1] == 12 ||
+                pr->ptr[1] == 13 ||
+                pr->ptr[1] == 32) {
+                pr->ptr++;
+                continue;
+            }
+
+            /* If we're not starting a comment, exit. */
+            if (pr->ptr[1] != '%')
+                break;
+
+            /* If we're starting with a PDF header, swap to file mode. */
+            if (pr->limit - pr->ptr >= 8 &&
+                strncmp((const char *)&pr->ptr[2], "PDF-", 4) == 0 &&
+                (pr->ptr[6] >= '1' && pr->ptr[6] <= '9') &&
+                pr->ptr[7] == '.' &&
+                (pr->ptr[8] >= '0' && pr->ptr[8] <= '9'))
+                return_error(gs_error_NeedFile);
+
+            /* Check for a historical PDF header. */
+            if (pr->limit - pr->ptr >= 22 &&
+                strncmp((const char *)&pr->ptr[2], "!PS-Adobe-", 10) == 0 &&
+                (pr->ptr[12] >= '0' && pr->ptr[12] <= '9') &&
+                pr->ptr[13] == '.' &&
+                (pr->ptr[14] >= '0' && pr->ptr[14] <= '9') &&
+                strncmp((const char *)&pr->ptr[15], " PDF-", 5) == 0 &&
+                (pr->ptr[20] >= '0' && pr->ptr[20] <= '9') &&
+                pr->ptr[21] == '.' &&
+                (pr->ptr[22] >= '0' && pr->ptr[22] <= '9'))
+                return_error(gs_error_NeedFile);
+
+            /* Do we have a complete comment that we can skip? */
+            for (i = 1; pr->ptr + i < pr->limit; i++)
+                if (pr->ptr[i+1] == 10 || pr->ptr[i+1] == 13) {
+                    pr->ptr += i;
+                    i = 0; /* Loop again in case there are more comments. */
+                    break;
+                }
+            /* If we fall out of the loop naturally, then we hit the end
+             * of the buffer without terminating our comment. We need to
+             * abort the loop and return. */
+            if (i != 0)
+                return_error(gs_error_NeedInput);
+        }
+    }
+
+    len = pr->limit - pr->ptr;
     code = psapi_run_string_continue(psi->psapi_instance, (const char *)pr->ptr + 1, len, 0, &exit_code);
     if (exit_code == gs_error_InterpreterExit) {
         int64_t offset;
author	Robin Watts <Robin.Watts@artifex.com>	2020-07-07 12:55:42 +0100
committer	Robin Watts <Robin.Watts@artifex.com>	2020-07-07 19:34:34 +0100
commit	db8f3a277d0ae43cf5cffea16ee1c9149d4eb3de (patch)
tree	ed15a5035595ef42661c5b05f16f5803eeab9649 /gpdl
parent	6eb675b18b8234256bb37190fd35243eab369e36 (diff)
download	ghostpdl-db8f3a277d0ae43cf5cffea16ee1c9149d4eb3de.tar.gz