GhostPDF - add minimal support for /F in streams

Bug #705627 "New PDF parser ignores /F in streams" The /F key in a stream dictionary indicates that the actual stream is to be found i an external file. This seems to me to be of limited utility; it breaks portability because the files have to be in specific locations or they cannot be located. In Ghostscript the interpreter must also be instructed to permit opening of these files. Nevertheless, as the report states, this did work (up to a point) with the old interpreter so this commit adds the same degree of support. If the /F key is associated with a file specification string value then we will attempt to open that file and us it in place of the embedded stream. In addition, if the /F value is a file specification dictionary then we will attempt to read a file specification string (also /F) from that dictionary and, if successful, use that. The old interpreter does not appear to have supported file specification dictionaries here at all.
author: Ken Sharp <ken.sharp@artifex.com> 2022-07-05 14:33:53 +0100
committer: Ken Sharp <ken.sharp@artifex.com> 2022-07-05 14:34:07 +0100
commit: f5d3e6405c33f7c91d171624c419679cba2d70eb (patch)
tree: d58033dc5044c4ab51c770cff977b51d38efb30f /pdf/pdf_file.c
parent: 6fd0c14775db5d10563f8b0401ee9f3f6bd29310 (diff)
download: ghostpdl-f5d3e6405c33f7c91d171624c419679cba2d70eb.tar.gz
1 files changed, 99 insertions, 0 deletions
diff --git a/pdf/pdf_file.c b/pdf/pdf_file.c
index 6db45350f..fc6b0523b 100644
--- a/pdf/pdf_file.c
+++ b/pdf/pdf_file.c
@@ -1060,6 +1060,9 @@ int pdfi_filter(pdf_context *ctx, pdf_stream *stream_obj, pdf_c_stream *source,
     pdf_c_stream *crypt_stream = NULL, *SubFile_stream = NULL;
     pdf_string *StreamKey = NULL;
     pdf_dict *stream_dict = NULL;
+    pdf_obj *FileSpec = NULL;
+    pdf_stream *NewStream = NULL;
+    bool known = false;
 
     *new_stream = NULL;
 
@@ -1067,6 +1070,100 @@ int pdfi_filter(pdf_context *ctx, pdf_stream *stream_obj, pdf_c_stream *source,
     if (code < 0)
         goto error;
 
+    /* Horrifyingly, any stream dictionary can contain a file specification, which means that
+     * instead of using the stream from the PDF file we must use an external file.
+     * So much for portability!
+     * Note: We must not do this for inline images as an inline image dictionary can
+     * contain the abbreviation /F for the Filter, and an inline image is never a
+     * separate stream, it is (obviously) contained in the current stream.
+     */
+    if (!inline_image) {
+        code = pdfi_dict_known(ctx, stream_dict, "F", &known);
+        if (code >= 0 && known) {
+            pdf_obj *FS = NULL, *o = NULL;
+            pdf_dict *dict = NULL;
+            char *filename;
+            int len;
+            stream *gstream = NULL;
+
+            code = pdfi_dict_get(ctx, stream_dict, "F", &FileSpec);
+            if (code < 0)
+                goto error;
+            if (pdfi_type_of(FileSpec) == PDF_DICT) {
+                /* We don't really support FileSpec dictionaries, partly because we
+                 * don't really know which platform to use. If there is a /F string
+                 * then we will use that, just as if we had been given a string in
+                 * the first place.
+                 */
+                code = pdfi_dict_knownget(ctx, (pdf_dict *)FileSpec, "F", &FS);
+                if (code < 0) {
+                    goto error;
+                }
+                pdfi_countdown(FileSpec);
+                FileSpec = FS;
+                FS = NULL;
+            }
+            if (pdfi_type_of(FileSpec) != PDF_STRING) {
+                code = gs_note_error(gs_error_typecheck);
+                goto error;
+            }
+            /* We should now have a string with the filename (or URL). We need
+             * to open the file and create a stream, if that succeeds.
+             */
+            gstream = sfopen((const char *)((pdf_string *)FileSpec)->data, "r", ctx->memory);
+            if (gstream == NULL) {
+                emprintf1(ctx->memory, "Failed to open file %s\n", (const char *)((pdf_string *)FileSpec)->data);
+                code = gs_note_error(gs_error_ioerror);
+                goto error;
+            }
+
+            source = (pdf_c_stream *)gs_alloc_bytes(ctx->memory, sizeof(pdf_c_stream), "external stream");
+            if (source == NULL) {
+                code = gs_note_error(gs_error_VMerror);
+                goto error;
+            }
+            memset(source, 0x00, sizeof(pdf_c_stream));
+            source->s = gstream;
+
+            code = pdfi_object_alloc(ctx, PDF_STREAM, 0, (pdf_obj **)&NewStream);
+            if (code < 0)
+                goto error;
+            pdfi_countup(NewStream);
+            code = pdfi_dict_alloc(ctx, 32, &dict);
+            if (code < 0){
+                pdfi_countdown(NewStream);
+                goto error;
+            }
+            pdfi_countup(dict);
+            NewStream->stream_dict = dict;
+            code = pdfi_dict_get(ctx, stream_dict, "FFilter", &o);
+            if (code >= 0) {
+                code = pdfi_dict_put(ctx, NewStream->stream_dict, "Filter", o);
+                if (code < 0) {
+                    pdfi_countdown(NewStream);
+                    goto error;
+                }
+            }
+            code = pdfi_dict_get(ctx, stream_dict, "FPredictor", &o);
+            if (code >= 0) {
+                code = pdfi_dict_put(ctx, NewStream->stream_dict, "Predictor", o);
+                if (code < 0) {
+                    pdfi_countdown(NewStream);
+                    goto error;
+                }
+            }
+            pdfi_countup(NewStream->stream_dict);
+            NewStream->stream_offset = 0;
+            NewStream->Length = 0;
+            NewStream->length_valid = 0;
+            NewStream->stream_written = 0;
+            NewStream->is_marking = 0;
+            NewStream->parent_obj = NULL;
+            stream_obj = NewStream;
+            stream_dict = NewStream->stream_dict;
+        }
+    }
+
     /* If the file isn't encrypted, don't apply encryption. If this is an inline
      * image then its in a content stream and will already be decrypted, so don't
      * apply decryption again.
@@ -1153,7 +1250,9 @@ int pdfi_filter(pdf_context *ctx, pdf_stream *stream_obj, pdf_c_stream *source,
         code = pdfi_filter_no_decryption(ctx, stream_obj, source, new_stream, inline_image);
     }
 error:
+    pdfi_countdown(NewStream);
     pdfi_countdown(StreamKey);
+    pdfi_countdown(FileSpec);
     return code;
 }
author	Ken Sharp <ken.sharp@artifex.com>	2022-07-05 14:33:53 +0100
committer	Ken Sharp <ken.sharp@artifex.com>	2022-07-05 14:34:07 +0100
commit	f5d3e6405c33f7c91d171624c419679cba2d70eb (patch)
tree	d58033dc5044c4ab51c770cff977b51d38efb30f /pdf/pdf_file.c
parent	6fd0c14775db5d10563f8b0401ee9f3f6bd29310 (diff)
download	ghostpdl-f5d3e6405c33f7c91d171624c419679cba2d70eb.tar.gz