summaryrefslogtreecommitdiff
path: root/pdf/pdf_doc.c
diff options
context:
space:
mode:
Diffstat (limited to 'pdf/pdf_doc.c')
-rw-r--r--pdf/pdf_doc.c83
1 files changed, 82 insertions, 1 deletions
diff --git a/pdf/pdf_doc.c b/pdf/pdf_doc.c
index 1658941a5..6e236456a 100644
--- a/pdf/pdf_doc.c
+++ b/pdf/pdf_doc.c
@@ -348,7 +348,8 @@ error:
int pdfi_read_Pages(pdf_context *ctx)
{
pdf_obj *o, *o1;
- int code;
+ pdf_array *a = NULL;
+ int code, pagecount = 0;
double d;
if (ctx->args.pdfdebug)
@@ -426,6 +427,86 @@ int pdfi_read_Pages(pdf_context *ctx)
ctx->num_pages = (int)floor(d);
}
+ /* A simple confidence check in the value of Count. We only do this because
+ * the OSS-fuzz tool keeps on coming up with files that time out because the
+ * initial Count is insanely huge, and we spend much time trying to find
+ * millions of pages which don't exist.
+ */
+ code = pdfi_dict_knownget_type(ctx, (pdf_dict *)o1, "Kids", PDF_ARRAY, (pdf_obj **)&a);
+ if (code == 0)
+ code = gs_note_error(gs_error_undefined);
+ if (code < 0) {
+ pdfi_countdown(o1);
+ return code;
+ }
+
+ /* Firstly check if the Kids array has enough nodes, in which case it's
+ * probably flat (the common case)
+ */
+ if (a->size != ctx->num_pages) {
+ int i = 0;
+ pdf_obj *p = NULL, *p1 = NULL;
+ pdf_num *c = NULL;
+
+ /* Either its not a flat tree, or the top node /Count is incorrect.
+ * Get each entry in the Kids array in turn and total the /Count of
+ * each node and add any leaf nodes.
+ */
+ for (i=0;i < a->size; i++) {
+ code = pdfi_array_get(ctx, a, i, &p);
+ if (code < 0)
+ continue;
+ if (pdfi_type_of(p) != PDF_DICT) {
+ pdfi_countdown(p);
+ p = NULL;
+ continue;
+ }
+ code = pdfi_dict_knownget_type(ctx, (pdf_dict *)p, "Type", PDF_NAME, (pdf_obj **)&p1);
+ if (code <= 0) {
+ pdfi_countdown(p);
+ p = NULL;
+ continue;
+ }
+ if (pdfi_name_is((pdf_name *)p1, "Page")) {
+ pagecount++;
+ } else {
+ if (pdfi_name_is((pdf_name *)p1, "Pages")) {
+ code = pdfi_dict_knownget(ctx, (pdf_dict *)o1, "Count", (pdf_obj **)&c);
+ if (code >= 0) {
+ if (pdfi_type_of(c) == PDF_INT)
+ pagecount += c->value.i;
+ if (pdfi_type_of(c) == PDF_REAL)
+ pagecount += (int)c->value.d;
+ pdfi_countdown(c);
+ c = NULL;
+ }
+ }
+ }
+ pdfi_countdown(p1);
+ p1 = NULL;
+ pdfi_countdown(p);
+ p = NULL;
+ }
+ } else
+ pagecount = a->size;
+
+ pdfi_countdown(a);
+
+ /* If the count of the top level of the tree doesn't match the /Count
+ * of the root node then something is wrong. We could abort right now
+ * and will if this continues to be a problem, but initially let's assume
+ * the count of the top level is correct and the root node /Count is wrong.
+ * This will allow us to recover if only the root /Count gets corrupted.
+ * In future we could also try validating the entire tree at this point,
+ * though I suspect that's pointless; if the tree is corrupted we aren't
+ * likely to get much that's usable from it.
+ */
+ if (pagecount != ctx->num_pages) {
+ ctx->num_pages = pagecount;
+ code = pdfi_dict_put_int(ctx, (pdf_dict *)o1, "Count", ctx->num_pages);
+ pdfi_set_error(ctx, 0, NULL, E_PDF_BADPAGECOUNT, "pdfi_read_Pages", NULL);
+ }
+
/* We don't pdfi_countdown(o1) now, because we've transferred our
* reference to the pointer in the pdf_context structure.
*/