summaryrefslogtreecommitdiff
path: root/pdf
diff options
context:
space:
mode:
authorKen Sharp <ken.sharp@artifex.com>2023-04-20 15:52:59 +0100
committerKen Sharp <ken.sharp@artifex.com>2023-04-20 15:52:59 +0100
commit1b160b94329d28f5c0b7bc682abd6335175bd50e (patch)
treedbbb280648748af446da38eb73b0879669a734bd /pdf
parent7c8ea1623b22247145a438ab2bf081b697e5dc19 (diff)
downloadghostpdl-1b160b94329d28f5c0b7bc682abd6335175bd50e.tar.gz
GhostPDF - fix Portfolio PDF with pdfwrite
No file or bug report for this, the customer requested the files be kept private. However any PDF Collection (Portfolio) file will show the problem. GhostPDF supports preserving embedded files from the input, but when we are processing a PDF Collection we don't want to do that, because in this case we run each of the embedded files individually. If we copy the EmbeddedFIles as well then we end up duplicating them in the output. So, when processing EmbeddedFiles, check the Catalog to see if there is a /Collection key, if there is then stop processing EmbeddedFiles. The customer also pointed out there was no way to avoid embedding any EmbeddedFiles from the input, so additionally add a new switch -dPreserveEmbeddedFiles to control this. While we're doing that, add one to control the preservation of 'DOCVIEW' (PageMode, PageLayout, OpenAction) as well, -dPreserveDocView. This then leads on to preventing the EmbeddedFiles in a PDF Collection from writing their DocView information. If we let them do that then we end up opening the file incorrectly. To facilitate similar changes in the future I've rejigged the way .PDFInit works, so that it calls a helper function to read any interpreter parameters and applies them to the PDF context. I've also added a new PostScript operator '.PDFSetParams' which takes a PDF context and a dictionary of key/value pairs which it applies to the context. Sadly I can't actually use that for the docview control, because the PDF initialisation is what processes the document, so changing it afterwards is no help. So I've altered runpdfbegin to call a new function runpdfbegin_with_params and pass an empty dictionary. That then allows me to call runpdfbegin_with_params from the PDF Collection processing, and turn off PreserveDocView. So in summary; new controls PreserveDocView and PreserveEmbeddedFiles and a new function .PDFSetParams to allow us to alter the PDF interpreter parameters after .PDFInit is executed. PDF Collections no longer embed duplicate files.
Diffstat (limited to 'pdf')
-rw-r--r--pdf/ghostpdf.c2
-rw-r--r--pdf/ghostpdf.h2
-rw-r--r--pdf/pdf_doc.c33
-rw-r--r--pdf/pdftop.c10
4 files changed, 35 insertions, 12 deletions
diff --git a/pdf/ghostpdf.c b/pdf/ghostpdf.c
index 5adac672c..6d101c7b4 100644
--- a/pdf/ghostpdf.c
+++ b/pdf/ghostpdf.c
@@ -1820,6 +1820,8 @@ pdf_context *pdfi_create_context(gs_memory_t *mem)
/* Setup some flags that don't default to 'false' */
ctx->args.showannots = true;
ctx->args.preserveannots = true;
+ ctx->args.preserveembeddedfiles = true;
+ ctx->args.preservedocview = true;
/* NOTE: For testing certain annotations on cluster, might want to set this to false */
ctx->args.printed = false; /* True if OutputFile is set, false otherwise see pdftop.c, pdf_impl_set_param() */
diff --git a/pdf/ghostpdf.h b/pdf/ghostpdf.h
index acf6b0d72..8c91ebcf6 100644
--- a/pdf/ghostpdf.h
+++ b/pdf/ghostpdf.h
@@ -137,6 +137,8 @@ typedef struct cmd_args_s {
bool preserveannots;
char **preserveannottypes; /* Null terminated array of strings, NULL if none */
bool preservemarkedcontent;
+ bool preserveembeddedfiles;
+ bool preservedocview;
bool nouserunit;
bool renderttnotdef;
bool pdfinfo;
diff --git a/pdf/pdf_doc.c b/pdf/pdf_doc.c
index ca32e55aa..272703dfa 100644
--- a/pdf/pdf_doc.c
+++ b/pdf/pdf_doc.c
@@ -1602,6 +1602,13 @@ static int pdfi_doc_EmbeddedFiles(pdf_context *ctx)
pdf_array *Names_array = NULL;
pdf_array *Kids = NULL;
+ code = pdfi_dict_knownget_type(ctx, ctx->Root, "Collection", PDF_DICT, (pdf_obj **)&Names);
+ if (code < 0) goto exit;
+ if (code > 0) {
+ code = 0;
+ goto exit;
+ }
+
code = pdfi_dict_knownget_type(ctx, ctx->Root, "Names", PDF_DICT, (pdf_obj **)&Names);
if (code <= 0) goto exit;
@@ -1796,8 +1803,6 @@ int pdfi_doc_trailer(pdf_context *ctx)
}
if (ctx->device_state.writepdfmarks) {
- code = pdfi_doc_view(ctx);
-
/* Handle Outlines */
code = pdfi_doc_Outlines(ctx);
if (code < 0) {
@@ -1807,11 +1812,13 @@ int pdfi_doc_trailer(pdf_context *ctx)
}
/* Handle Docview pdfmark stuff */
- code = pdfi_doc_view(ctx);
- if (code < 0) {
- pdfi_set_warning(ctx, code, NULL, W_PDF_BAD_VIEW, "pdfi_doc_view", NULL);
- if (ctx->args.pdfstoponerror)
- goto exit;
+ if (ctx->args.preservedocview) {
+ code = pdfi_doc_view(ctx);
+ if (code < 0) {
+ pdfi_set_warning(ctx, code, NULL, W_PDF_BAD_VIEW, "pdfi_doc_view", NULL);
+ if (ctx->args.pdfstoponerror)
+ goto exit;
+ }
}
/* Handle Info */
@@ -1828,11 +1835,13 @@ int pdfi_doc_trailer(pdf_context *ctx)
/* Handle EmbeddedFiles */
/* TODO: add a configuration option to embed or omit */
- code = pdfi_doc_EmbeddedFiles(ctx);
- if (code < 0) {
- pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_EMBEDDEDFILES, "pdfi_doc_trailer", NULL);
- if (ctx->args.pdfstoponerror)
- goto exit;
+ if (ctx->args.preserveembeddedfiles) {
+ code = pdfi_doc_EmbeddedFiles(ctx);
+ if (code < 0) {
+ pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_EMBEDDEDFILES, "pdfi_doc_trailer", NULL);
+ if (ctx->args.pdfstoponerror)
+ goto exit;
+ }
}
}
diff --git a/pdf/pdftop.c b/pdf/pdftop.c
index d80f9198c..10ea92c43 100644
--- a/pdf/pdftop.c
+++ b/pdf/pdftop.c
@@ -632,6 +632,16 @@ pdf_impl_set_param(pl_interp_implementation_t *impl,
if (code < 0)
return code;
}
+ if (argis(param, "PreserveEmbeddedFiles")) {
+ code = plist_value_get_bool(&pvalue, &ctx->args.preserveembeddedfiles);
+ if (code < 0)
+ return code;
+ }
+ if (argis(param, "PreserveDocView")) {
+ code = plist_value_get_bool(&pvalue, &ctx->args.preservedocview);
+ if (code < 0)
+ return code;
+ }
if (argis(param, "NoUserUnit")) {
code = plist_value_get_bool(&pvalue, &ctx->args.nouserunit);
if (code < 0)