diff options
author | Robin Watts <Robin.Watts@artifex.com> | 2022-02-28 15:14:45 +0000 |
---|---|---|
committer | Robin Watts <Robin.Watts@artifex.com> | 2022-05-05 14:48:01 +0100 |
commit | 37bfab6a555c0de04d72d7b2ceefc3018c40fb55 (patch) | |
tree | 827dd57ab8ecbe48b84e94ed7bed08d231e5792a /pdf | |
parent | c20f3914daf63feee4cc1df9bf766b8045095f22 (diff) | |
download | ghostpdl-37bfab6a555c0de04d72d7b2ceefc3018c40fb55.tar.gz |
Add pdfi_read_bare_keyword function.
Performs exactly the same as reading a token, but doesn't actually
allocate a pdf_obj for it, just returns an enum value. Saves on
allocating/deallocating keywords.
There are still places in the code that read keywords as objects,
hence pdf_keyword still exists. These will be dealt with in future
commits.
Diffstat (limited to 'pdf')
-rw-r--r-- | pdf/pdf_deref.c | 69 | ||||
-rw-r--r-- | pdf/pdf_int.c | 51 | ||||
-rw-r--r-- | pdf/pdf_int.h | 1 | ||||
-rw-r--r-- | pdf/pdf_repair.c | 27 | ||||
-rw-r--r-- | pdf/pdf_xref.c | 30 |
5 files changed, 91 insertions, 87 deletions
diff --git a/pdf/pdf_deref.c b/pdf/pdf_deref.c index dc27074fb..ac93800af 100644 --- a/pdf/pdf_deref.c +++ b/pdf/pdf_deref.c @@ -200,7 +200,6 @@ static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_ { int code = 0; int64_t i; - pdf_keyword *keyword = NULL; pdf_dict *dict = NULL; gs_offset_t offset; pdf_stream *stream_obj = NULL; @@ -305,8 +304,8 @@ static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_ stream_obj->Length = 0; stream_obj->length_valid = false; - code = pdfi_read_token(ctx, ctx->main_stream, objnum, gen); - if (code < 0 || pdfi_count_stack(ctx) < 2) { + code = pdfi_read_bare_keyword(ctx, ctx->main_stream); + if (code == 0) { char extra_info[gp_file_name_sizeof]; gs_snprintf(extra_info, sizeof(extra_info), "Failed to find a valid object at end of stream object %u.\n", objnum); @@ -318,34 +317,27 @@ static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_ pdfi_countdown(stream_obj); /* get rid of extra ref */ return code; } - } - else { - if (((pdf_obj *)ctx->stack_top[-1])->type != PDF_KEYWORD) { - char extra_info[gp_file_name_sizeof]; + } else if (code < 0) { + char extra_info[gp_file_name_sizeof]; - gs_snprintf(extra_info, sizeof(extra_info), "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum); - pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info); - } else { - keyword = ((pdf_keyword *)ctx->stack_top[-1]); - if (keyword->key != TOKEN_ENDSTREAM) { - char extra_info[gp_file_name_sizeof]; - - gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i); - pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info); - } else { - /* Cache the Length in the stream object and mark it valid */ - stream_obj->Length = i; - stream_obj->length_valid = true; - } - } - pdfi_pop(ctx, 1); + gs_snprintf(extra_info, sizeof(extra_info), "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum); + pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info); + } else if (code != TOKEN_ENDSTREAM) { + char extra_info[gp_file_name_sizeof]; + + gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i); + pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info); + } else { + /* Cache the Length in the stream object and mark it valid */ + stream_obj->Length = i; + stream_obj->length_valid = true; } } /* If we failed to find a valid object, or the object wasn't a keyword, or the * keywrod wasn't 'endstream' then the Length is wrong. We need to have the correct * Length for streams if we have encrypted files, because we must install a - * SubFileDecode filter iwth a Length (EODString is incompatible with AES encryption) + * SubFileDecode filter with a Length (EODString is incompatible with AES encryption) * Rather than mess about checking for encryption, we'll choose to just correctly * calculate the Length of all streams. Although this takes time, it will only * happen for files which are invalid. @@ -393,7 +385,7 @@ static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_ return 0; } - code = pdfi_read_token(ctx, ctx->main_stream, objnum, gen); + code = pdfi_read_bare_keyword(ctx, ctx->main_stream); if (code < 0) { pdfi_countdown(stream_obj); /* get rid of extra ref */ if (ctx->args.pdfstoponerror) @@ -406,14 +398,13 @@ static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_ return 0; } - if (pdfi_count_stack(ctx) < 2) { + if (code == 0) { pdfi_countdown(stream_obj); /* get rid of extra ref */ return_error(gs_error_stackunderflow); } - if (((pdf_obj *)ctx->stack_top[-1])->type != PDF_KEYWORD) { + if (code != TOKEN_ENDOBJ) { pdfi_countdown(stream_obj); /* get rid of extra ref */ - pdfi_pop(ctx, 1); if (ctx->args.pdfstoponerror) return_error(gs_error_typecheck); pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL); @@ -424,12 +415,6 @@ static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_ } pdfi_countdown(stream_obj); /* get rid of extra ref */ - keyword = ((pdf_keyword *)ctx->stack_top[-1]); - if (keyword->key != TOKEN_ENDOBJ) { - pdfi_pop(ctx, 2); - return_error(gs_error_typecheck); - } - pdfi_pop(ctx, 1); return 0; } @@ -535,9 +520,8 @@ int pdfi_read_bare_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_ static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset) { - int code = 0, stack_size = pdfi_count_stack(ctx); + int code = 0; int objnum = 0, gen = 0; - pdf_keyword *keyword = NULL; /* An object consists of 'num gen obj' followed by a token, follwed by an endobj * A stream dictionary might have a 'stream' instead of an 'endobj', in which case we @@ -556,21 +540,14 @@ static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t strea if (code == 0) return_error(gs_error_syntaxerror); - code = pdfi_read_token(ctx, s, 0, 0); + code = pdfi_read_bare_keyword(ctx, s); if (code < 0) return code; - if (stack_size >= pdfi_count_stack(ctx)) + if (code == 0) return gs_note_error(gs_error_ioerror); - if (((pdf_obj *)ctx->stack_top[-1])->type != PDF_KEYWORD) { - pdfi_pop(ctx, 1); - return_error(gs_error_typecheck); - } - keyword = ((pdf_keyword *)ctx->stack_top[-1]); - if (keyword->key != TOKEN_OBJ) { - pdfi_pop(ctx, 1); + if (code != TOKEN_OBJ) { return_error(gs_error_syntaxerror); } - pdfi_pop(ctx, 1); return pdfi_read_bare_object(ctx, s, stream_offset, objnum, gen); } diff --git a/pdf/pdf_int.c b/pdf/pdf_int.c index 53167b6be..d4605d219 100644 --- a/pdf/pdf_int.c +++ b/pdf/pdf_int.c @@ -36,6 +36,7 @@ #include "pdf_trans.h" #include "pdf_optcontent.h" #include "pdf_sec.h" +#include <stdlib.h> #include "gsstate.h" /* for gs_gstate_free */ @@ -783,6 +784,56 @@ int pdfi_skip_comment(pdf_context *ctx, pdf_c_stream *s) return 0; } +#define PARAM1(A) # A, +#define PARAM2(A,B) A, +static const char pdf_token_strings[][10] = { +#include "pdf_tokens.h" +}; + +#define nelems(A) (sizeof(A)/sizeof(A[0])) + +typedef int (*bsearch_comparator)(const void *, const void *); + +int pdfi_read_bare_keyword(pdf_context *ctx, pdf_c_stream *s) +{ + byte Buffer[256]; + int index = 0; + int c; + void *t; + + pdfi_skip_white(ctx, s); + + do { + c = pdfi_read_byte(ctx, s); + if (c < 0) + break; + + if (iswhite(c) || isdelimiter(c)) { + pdfi_unread_byte(ctx, s, (byte)c); + break; + } + Buffer[index] = (byte)c; + index++; + } while (index < 255); + + if (index >= 255 || index == 0) { + if (ctx->args.pdfstoponerror) + return_error(gs_error_syntaxerror); + return TOKEN_INVALID_KEY; + } + + Buffer[index] = 0x00; + t = bsearch((const void *)Buffer, + (const void *)pdf_token_strings[TOKEN_INVALID_KEY+1], + nelems(pdf_token_strings)-(TOKEN_INVALID_KEY+1), + sizeof(pdf_token_strings[0]), + (bsearch_comparator)&strcmp); + if (t == NULL) + return TOKEN_INVALID_KEY; + + return (((const char *)t) - pdf_token_strings[0]) / sizeof(pdf_token_strings[0]); +} + /* This function is slightly misnamed, for some keywords we do * indeed read the keyword and return a PDF_KEYWORD object, but * for null, true, false and R we create an appropriate object diff --git a/pdf/pdf_int.h b/pdf/pdf_int.h index a2ab28b3f..4cf1dcb64 100644 --- a/pdf/pdf_int.h +++ b/pdf/pdf_int.h @@ -30,6 +30,7 @@ int pdfi_name_alloc(pdf_context *ctx, byte *key, uint32_t size, pdf_obj **o); int pdfi_read_dict(pdf_context *ctx, pdf_c_stream *s, uint32_t indirect_num, uint32_t indirect_gen); int pdfi_read_bare_int(pdf_context *ctx, pdf_c_stream *s, int *parsed_int); +int pdfi_read_bare_keyword(pdf_context *ctx, pdf_c_stream *s); void local_save_stream_state(pdf_context *ctx, stream_save *local_save); void local_restore_stream_state(pdf_context *ctx, stream_save *local_save); diff --git a/pdf/pdf_repair.c b/pdf/pdf_repair.c index 7e02dcb9a..cc44e13d7 100644 --- a/pdf/pdf_repair.c +++ b/pdf/pdf_repair.c @@ -245,27 +245,16 @@ int pdfi_repair_file(pdf_context *ctx) index = 0; } while (index < 9); do { - code = pdfi_read_token(ctx, ctx->main_stream, 0, 0); - if (code < 0) { - if (code != gs_error_VMerror && code != gs_error_ioerror) - continue; + code = pdfi_read_bare_keyword(ctx, ctx->main_stream); + if (code == gs_error_VMerror || code == gs_error_ioerror) goto exit; + if (code == TOKEN_ENDOBJ) { + code = pdfi_repair_add_object(ctx, object_num, generation_num, offset); + if (code == gs_error_VMerror || code == gs_error_ioerror) + goto exit; + break; } - if (code > 0) { - if (ctx->stack_top[-1]->type == PDF_KEYWORD){ - pdf_keyword *k = (pdf_keyword *)ctx->stack_top[-1]; - if (k->key == TOKEN_ENDOBJ) { - code = pdfi_repair_add_object(ctx, object_num, generation_num, offset); - if (code < 0) { - if (code != gs_error_VMerror && code != gs_error_ioerror) - break; - goto exit; - } - break; - } - } - } - }while(ctx->main_stream->eof == false); + } while(ctx->main_stream->eof == false); pdfi_clearstack(ctx); break; diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c index e060ef7f1..71f4b76b7 100644 --- a/pdf/pdf_xref.c +++ b/pdf/pdf_xref.c @@ -667,7 +667,6 @@ static int write_offset(byte *B, gs_offset_t o, unsigned int g, unsigned char fr static int read_xref_section(pdf_context *ctx, pdf_c_stream *s, uint64_t *section_start, uint64_t *section_size) { int code = 0, i, j; - pdf_obj *o = NULL; int start = 0; int size = 0; int64_t bytes = 0; @@ -681,17 +680,15 @@ static int read_xref_section(pdf_context *ctx, pdf_c_stream *s, uint64_t *sectio code = pdfi_read_bare_int(ctx, ctx->main_stream, &start); if (code < 0) { /* Not an int, might be a keyword */ - code = pdfi_read_token(ctx, ctx->main_stream, 0, 0); + code = pdfi_read_bare_keyword(ctx, ctx->main_stream); if (code < 0) return code; - o = ctx->stack_top[-1]; - if (o->type == PDF_KEYWORD) - return 0; - - /* element is not an integer, and not a keyword - not a valid xref */ - pdfi_pop(ctx, 1); - return_error(gs_error_typecheck); + if (code != TOKEN_TRAILER) { + /* element is not an integer, and not a keyword - not a valid xref */ + return_error(gs_error_typecheck); + } + return 1; } *section_start = start; @@ -798,8 +795,6 @@ static int read_xref_section(pdf_context *ctx, pdf_c_stream *s, uint64_t *sectio static int read_xref(pdf_context *ctx, pdf_c_stream *s) { int code = 0; - pdf_obj **o = NULL; - pdf_keyword *k; pdf_dict *d = NULL; uint64_t size = 0, max_obj = 0; int64_t num; @@ -807,7 +802,6 @@ static int read_xref(pdf_context *ctx, pdf_c_stream *s) do { uint64_t section_start, section_size; - o = ctx->stack_top; code = read_xref_section(ctx, s, §ion_start, §ion_size); if (code < 0) return code; @@ -815,16 +809,8 @@ static int read_xref(pdf_context *ctx, pdf_c_stream *s) if (section_size > 0 && section_start + section_size - 1 > max_obj) max_obj = section_start + section_size - 1; - if (ctx->stack_top - o > 0) { - k = (pdf_keyword *)ctx->stack_top[-1]; - if(k->type != PDF_KEYWORD || k->key != TOKEN_TRAILER) - return_error(gs_error_syntaxerror); - else { - pdfi_pop(ctx, 1); - break; - } - } - } while (1); + /* code == 1 => read_xref_section ended with a trailer. */ + } while (code != 1); code = pdfi_read_dict(ctx, ctx->main_stream, 0, 0); if (code < 0) |