summaryrefslogtreecommitdiff
path: root/pdf
diff options
context:
space:
mode:
authorRobin Watts <Robin.Watts@artifex.com>2022-02-28 15:14:45 +0000
committerRobin Watts <Robin.Watts@artifex.com>2022-05-05 14:48:01 +0100
commit37bfab6a555c0de04d72d7b2ceefc3018c40fb55 (patch)
tree827dd57ab8ecbe48b84e94ed7bed08d231e5792a /pdf
parentc20f3914daf63feee4cc1df9bf766b8045095f22 (diff)
downloadghostpdl-37bfab6a555c0de04d72d7b2ceefc3018c40fb55.tar.gz
Add pdfi_read_bare_keyword function.
Performs exactly the same as reading a token, but doesn't actually allocate a pdf_obj for it, just returns an enum value. Saves on allocating/deallocating keywords. There are still places in the code that read keywords as objects, hence pdf_keyword still exists. These will be dealt with in future commits.
Diffstat (limited to 'pdf')
-rw-r--r--pdf/pdf_deref.c69
-rw-r--r--pdf/pdf_int.c51
-rw-r--r--pdf/pdf_int.h1
-rw-r--r--pdf/pdf_repair.c27
-rw-r--r--pdf/pdf_xref.c30
5 files changed, 91 insertions, 87 deletions
diff --git a/pdf/pdf_deref.c b/pdf/pdf_deref.c
index dc27074fb..ac93800af 100644
--- a/pdf/pdf_deref.c
+++ b/pdf/pdf_deref.c
@@ -200,7 +200,6 @@ static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_
{
int code = 0;
int64_t i;
- pdf_keyword *keyword = NULL;
pdf_dict *dict = NULL;
gs_offset_t offset;
pdf_stream *stream_obj = NULL;
@@ -305,8 +304,8 @@ static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_
stream_obj->Length = 0;
stream_obj->length_valid = false;
- code = pdfi_read_token(ctx, ctx->main_stream, objnum, gen);
- if (code < 0 || pdfi_count_stack(ctx) < 2) {
+ code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
+ if (code == 0) {
char extra_info[gp_file_name_sizeof];
gs_snprintf(extra_info, sizeof(extra_info), "Failed to find a valid object at end of stream object %u.\n", objnum);
@@ -318,34 +317,27 @@ static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_
pdfi_countdown(stream_obj); /* get rid of extra ref */
return code;
}
- }
- else {
- if (((pdf_obj *)ctx->stack_top[-1])->type != PDF_KEYWORD) {
- char extra_info[gp_file_name_sizeof];
+ } else if (code < 0) {
+ char extra_info[gp_file_name_sizeof];
- gs_snprintf(extra_info, sizeof(extra_info), "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum);
- pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info);
- } else {
- keyword = ((pdf_keyword *)ctx->stack_top[-1]);
- if (keyword->key != TOKEN_ENDSTREAM) {
- char extra_info[gp_file_name_sizeof];
-
- gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i);
- pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info);
- } else {
- /* Cache the Length in the stream object and mark it valid */
- stream_obj->Length = i;
- stream_obj->length_valid = true;
- }
- }
- pdfi_pop(ctx, 1);
+ gs_snprintf(extra_info, sizeof(extra_info), "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum);
+ pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info);
+ } else if (code != TOKEN_ENDSTREAM) {
+ char extra_info[gp_file_name_sizeof];
+
+ gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i);
+ pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info);
+ } else {
+ /* Cache the Length in the stream object and mark it valid */
+ stream_obj->Length = i;
+ stream_obj->length_valid = true;
}
}
/* If we failed to find a valid object, or the object wasn't a keyword, or the
* keywrod wasn't 'endstream' then the Length is wrong. We need to have the correct
* Length for streams if we have encrypted files, because we must install a
- * SubFileDecode filter iwth a Length (EODString is incompatible with AES encryption)
+ * SubFileDecode filter with a Length (EODString is incompatible with AES encryption)
* Rather than mess about checking for encryption, we'll choose to just correctly
* calculate the Length of all streams. Although this takes time, it will only
* happen for files which are invalid.
@@ -393,7 +385,7 @@ static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_
return 0;
}
- code = pdfi_read_token(ctx, ctx->main_stream, objnum, gen);
+ code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
if (code < 0) {
pdfi_countdown(stream_obj); /* get rid of extra ref */
if (ctx->args.pdfstoponerror)
@@ -406,14 +398,13 @@ static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_
return 0;
}
- if (pdfi_count_stack(ctx) < 2) {
+ if (code == 0) {
pdfi_countdown(stream_obj); /* get rid of extra ref */
return_error(gs_error_stackunderflow);
}
- if (((pdf_obj *)ctx->stack_top[-1])->type != PDF_KEYWORD) {
+ if (code != TOKEN_ENDOBJ) {
pdfi_countdown(stream_obj); /* get rid of extra ref */
- pdfi_pop(ctx, 1);
if (ctx->args.pdfstoponerror)
return_error(gs_error_typecheck);
pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL);
@@ -424,12 +415,6 @@ static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_
}
pdfi_countdown(stream_obj); /* get rid of extra ref */
- keyword = ((pdf_keyword *)ctx->stack_top[-1]);
- if (keyword->key != TOKEN_ENDOBJ) {
- pdfi_pop(ctx, 2);
- return_error(gs_error_typecheck);
- }
- pdfi_pop(ctx, 1);
return 0;
}
@@ -535,9 +520,8 @@ int pdfi_read_bare_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_
static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset)
{
- int code = 0, stack_size = pdfi_count_stack(ctx);
+ int code = 0;
int objnum = 0, gen = 0;
- pdf_keyword *keyword = NULL;
/* An object consists of 'num gen obj' followed by a token, follwed by an endobj
* A stream dictionary might have a 'stream' instead of an 'endobj', in which case we
@@ -556,21 +540,14 @@ static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t strea
if (code == 0)
return_error(gs_error_syntaxerror);
- code = pdfi_read_token(ctx, s, 0, 0);
+ code = pdfi_read_bare_keyword(ctx, s);
if (code < 0)
return code;
- if (stack_size >= pdfi_count_stack(ctx))
+ if (code == 0)
return gs_note_error(gs_error_ioerror);
- if (((pdf_obj *)ctx->stack_top[-1])->type != PDF_KEYWORD) {
- pdfi_pop(ctx, 1);
- return_error(gs_error_typecheck);
- }
- keyword = ((pdf_keyword *)ctx->stack_top[-1]);
- if (keyword->key != TOKEN_OBJ) {
- pdfi_pop(ctx, 1);
+ if (code != TOKEN_OBJ) {
return_error(gs_error_syntaxerror);
}
- pdfi_pop(ctx, 1);
return pdfi_read_bare_object(ctx, s, stream_offset, objnum, gen);
}
diff --git a/pdf/pdf_int.c b/pdf/pdf_int.c
index 53167b6be..d4605d219 100644
--- a/pdf/pdf_int.c
+++ b/pdf/pdf_int.c
@@ -36,6 +36,7 @@
#include "pdf_trans.h"
#include "pdf_optcontent.h"
#include "pdf_sec.h"
+#include <stdlib.h>
#include "gsstate.h" /* for gs_gstate_free */
@@ -783,6 +784,56 @@ int pdfi_skip_comment(pdf_context *ctx, pdf_c_stream *s)
return 0;
}
+#define PARAM1(A) # A,
+#define PARAM2(A,B) A,
+static const char pdf_token_strings[][10] = {
+#include "pdf_tokens.h"
+};
+
+#define nelems(A) (sizeof(A)/sizeof(A[0]))
+
+typedef int (*bsearch_comparator)(const void *, const void *);
+
+int pdfi_read_bare_keyword(pdf_context *ctx, pdf_c_stream *s)
+{
+ byte Buffer[256];
+ int index = 0;
+ int c;
+ void *t;
+
+ pdfi_skip_white(ctx, s);
+
+ do {
+ c = pdfi_read_byte(ctx, s);
+ if (c < 0)
+ break;
+
+ if (iswhite(c) || isdelimiter(c)) {
+ pdfi_unread_byte(ctx, s, (byte)c);
+ break;
+ }
+ Buffer[index] = (byte)c;
+ index++;
+ } while (index < 255);
+
+ if (index >= 255 || index == 0) {
+ if (ctx->args.pdfstoponerror)
+ return_error(gs_error_syntaxerror);
+ return TOKEN_INVALID_KEY;
+ }
+
+ Buffer[index] = 0x00;
+ t = bsearch((const void *)Buffer,
+ (const void *)pdf_token_strings[TOKEN_INVALID_KEY+1],
+ nelems(pdf_token_strings)-(TOKEN_INVALID_KEY+1),
+ sizeof(pdf_token_strings[0]),
+ (bsearch_comparator)&strcmp);
+ if (t == NULL)
+ return TOKEN_INVALID_KEY;
+
+ return (((const char *)t) - pdf_token_strings[0]) / sizeof(pdf_token_strings[0]);
+}
+
/* This function is slightly misnamed, for some keywords we do
* indeed read the keyword and return a PDF_KEYWORD object, but
* for null, true, false and R we create an appropriate object
diff --git a/pdf/pdf_int.h b/pdf/pdf_int.h
index a2ab28b3f..4cf1dcb64 100644
--- a/pdf/pdf_int.h
+++ b/pdf/pdf_int.h
@@ -30,6 +30,7 @@ int pdfi_name_alloc(pdf_context *ctx, byte *key, uint32_t size, pdf_obj **o);
int pdfi_read_dict(pdf_context *ctx, pdf_c_stream *s, uint32_t indirect_num, uint32_t indirect_gen);
int pdfi_read_bare_int(pdf_context *ctx, pdf_c_stream *s, int *parsed_int);
+int pdfi_read_bare_keyword(pdf_context *ctx, pdf_c_stream *s);
void local_save_stream_state(pdf_context *ctx, stream_save *local_save);
void local_restore_stream_state(pdf_context *ctx, stream_save *local_save);
diff --git a/pdf/pdf_repair.c b/pdf/pdf_repair.c
index 7e02dcb9a..cc44e13d7 100644
--- a/pdf/pdf_repair.c
+++ b/pdf/pdf_repair.c
@@ -245,27 +245,16 @@ int pdfi_repair_file(pdf_context *ctx)
index = 0;
} while (index < 9);
do {
- code = pdfi_read_token(ctx, ctx->main_stream, 0, 0);
- if (code < 0) {
- if (code != gs_error_VMerror && code != gs_error_ioerror)
- continue;
+ code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
+ if (code == gs_error_VMerror || code == gs_error_ioerror)
goto exit;
+ if (code == TOKEN_ENDOBJ) {
+ code = pdfi_repair_add_object(ctx, object_num, generation_num, offset);
+ if (code == gs_error_VMerror || code == gs_error_ioerror)
+ goto exit;
+ break;
}
- if (code > 0) {
- if (ctx->stack_top[-1]->type == PDF_KEYWORD){
- pdf_keyword *k = (pdf_keyword *)ctx->stack_top[-1];
- if (k->key == TOKEN_ENDOBJ) {
- code = pdfi_repair_add_object(ctx, object_num, generation_num, offset);
- if (code < 0) {
- if (code != gs_error_VMerror && code != gs_error_ioerror)
- break;
- goto exit;
- }
- break;
- }
- }
- }
- }while(ctx->main_stream->eof == false);
+ } while(ctx->main_stream->eof == false);
pdfi_clearstack(ctx);
break;
diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c
index e060ef7f1..71f4b76b7 100644
--- a/pdf/pdf_xref.c
+++ b/pdf/pdf_xref.c
@@ -667,7 +667,6 @@ static int write_offset(byte *B, gs_offset_t o, unsigned int g, unsigned char fr
static int read_xref_section(pdf_context *ctx, pdf_c_stream *s, uint64_t *section_start, uint64_t *section_size)
{
int code = 0, i, j;
- pdf_obj *o = NULL;
int start = 0;
int size = 0;
int64_t bytes = 0;
@@ -681,17 +680,15 @@ static int read_xref_section(pdf_context *ctx, pdf_c_stream *s, uint64_t *sectio
code = pdfi_read_bare_int(ctx, ctx->main_stream, &start);
if (code < 0) {
/* Not an int, might be a keyword */
- code = pdfi_read_token(ctx, ctx->main_stream, 0, 0);
+ code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
if (code < 0)
return code;
- o = ctx->stack_top[-1];
- if (o->type == PDF_KEYWORD)
- return 0;
-
- /* element is not an integer, and not a keyword - not a valid xref */
- pdfi_pop(ctx, 1);
- return_error(gs_error_typecheck);
+ if (code != TOKEN_TRAILER) {
+ /* element is not an integer, and not a keyword - not a valid xref */
+ return_error(gs_error_typecheck);
+ }
+ return 1;
}
*section_start = start;
@@ -798,8 +795,6 @@ static int read_xref_section(pdf_context *ctx, pdf_c_stream *s, uint64_t *sectio
static int read_xref(pdf_context *ctx, pdf_c_stream *s)
{
int code = 0;
- pdf_obj **o = NULL;
- pdf_keyword *k;
pdf_dict *d = NULL;
uint64_t size = 0, max_obj = 0;
int64_t num;
@@ -807,7 +802,6 @@ static int read_xref(pdf_context *ctx, pdf_c_stream *s)
do {
uint64_t section_start, section_size;
- o = ctx->stack_top;
code = read_xref_section(ctx, s, &section_start, &section_size);
if (code < 0)
return code;
@@ -815,16 +809,8 @@ static int read_xref(pdf_context *ctx, pdf_c_stream *s)
if (section_size > 0 && section_start + section_size - 1 > max_obj)
max_obj = section_start + section_size - 1;
- if (ctx->stack_top - o > 0) {
- k = (pdf_keyword *)ctx->stack_top[-1];
- if(k->type != PDF_KEYWORD || k->key != TOKEN_TRAILER)
- return_error(gs_error_syntaxerror);
- else {
- pdfi_pop(ctx, 1);
- break;
- }
- }
- } while (1);
+ /* code == 1 => read_xref_section ended with a trailer. */
+ } while (code != 1);
code = pdfi_read_dict(ctx, ctx->main_stream, 0, 0);
if (code < 0)