diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2009-10-29 14:59:16 +0000 |
---|---|---|
committer | Tor Andersson <tor.andersson@artifex.com> | 2009-10-29 14:59:16 +0000 |
commit | e70374db44db0e7db84502deb0ea34d11f6523df (patch) | |
tree | e8caedf47498509afe93fefa3456d4d918958f4c | |
parent | 83a7c1bdeaa8f7f11f2d5ad7ba8df1c61134ae8c (diff) | |
download | ghostpdl-e70374db44db0e7db84502deb0ea34d11f6523df.tar.gz |
Implement a random file access mode for the XPS parser that uses less memory.
To enable this mode, set the XPS_ZIP_SEEK environment variable
which will cause the interpreter to spool the data to a temp file
and then run the parser in the new mode.
This is going to change in a later patch to plmain that will enable
the interpreters to directly access the job files.
git-svn-id: http://svn.ghostscript.com/ghostscript/trunk@10243 a1074d23-0009-0410-80fe-cf8c14f379e6
-rw-r--r-- | xps/ghostxps.h | 93 | ||||
-rw-r--r-- | xps/xps.mak | 4 | ||||
-rw-r--r-- | xps/xpscff.c | 7 | ||||
-rw-r--r-- | xps/xpscolor.c | 4 | ||||
-rw-r--r-- | xps/xpsdoc.c | 608 | ||||
-rw-r--r-- | xps/xpsfont.c | 4 | ||||
-rw-r--r-- | xps/xpsglyphs.c | 27 | ||||
-rw-r--r-- | xps/xpsgradient.c | 2 | ||||
-rw-r--r-- | xps/xpsimage.c | 4 | ||||
-rw-r--r-- | xps/xpsmem.c | 8 | ||||
-rw-r--r-- | xps/xpspage.c | 3 | ||||
-rw-r--r-- | xps/xpspath.c | 2 | ||||
-rw-r--r-- | xps/xpspng.c | 5 | ||||
-rw-r--r-- | xps/xpsresource.c | 14 | ||||
-rw-r--r-- | xps/xpstiff.c | 49 | ||||
-rw-r--r-- | xps/xpstop.c | 138 | ||||
-rw-r--r-- | xps/xpsttf.c | 37 | ||||
-rw-r--r-- | xps/xpsxml.c | 20 | ||||
-rw-r--r-- | xps/xpszip.c | 344 | ||||
-rw-r--r-- | xps/xpszipseek.c | 514 |
20 files changed, 1187 insertions, 700 deletions
diff --git a/xps/ghostxps.h b/xps/ghostxps.h index c85787f91..9c7c878ac 100644 --- a/xps/ghostxps.h +++ b/xps/ghostxps.h @@ -19,6 +19,8 @@ #include <stdlib.h> #include <ctype.h> /* for toupper() */ +#include "gp.h" + #include "gsgc.h" #include "gstypes.h" #include "gsstate.h" @@ -28,6 +30,7 @@ #include "gsparam.h" #include "gsdevice.h" #include "scommon.h" +#include "gdebug.h" #include "gserror.h" #include "gserrors.h" #include "gspaint.h" @@ -78,13 +81,26 @@ #define noXPS_LOAD_TYPE_MAPS /* + * XPS and ZIP strings + */ + +#define REL_START_PART "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation" +#define REL_REQUIRED_RESOURCE "http://schemas.microsoft.com/xps/2005/06/required-resource" +#define REL_REQUIRED_RESOURCE_RECURSIVE "http://schemas.microsoft.com/xps/2005/06/required-resource#recursive" + +#define ZIP_LOCAL_FILE_SIG 0x04034b50 +#define ZIP_DATA_DESC_SIG 0x08074b50 +#define ZIP_CENTRAL_DIRECTORY_SIG 0x02014b50 +#define ZIP_END_OF_CENTRAL_DIRECTORY_SIG 0x06054b50 + +/* * Forward declarations. */ typedef struct xps_context_s xps_context_t; +typedef struct xps_entry_s xps_entry_t; typedef struct xps_part_s xps_part_t; -typedef struct xps_type_map_s xps_type_map_t; typedef struct xps_relation_s xps_relation_t; typedef struct xps_document_s xps_document_t; typedef struct xps_page_s xps_page_t; @@ -99,10 +115,15 @@ typedef struct xps_glyph_metrics_s xps_glyph_metrics_t; * Context and memory. */ +extern int xps_zip_trace; +extern int xps_doc_trace; + +void * xps_realloc_imp(xps_context_t *ctx, void *ptr, int size, const char *func); + #define xps_alloc(ctx, size) \ ((void*)gs_alloc_bytes(ctx->memory, size, __func__)) #define xps_realloc(ctx, ptr, size) \ - gs_resize_object(ctx->memory, ptr, size, __func__) + xps_realloc_imp(ctx, ptr, size, __func__) #define xps_strdup(ctx, str) \ xps_strdup_imp(ctx, str, __func__) #define xps_free(ctx, ptr) \ @@ -134,15 +155,12 @@ void xps_hash_debug(xps_hash_table_t *table); * Packages, parts and relations. */ -int xps_process_data(xps_context_t *ctx, stream_cursor_read *buf); -int xps_process_part(xps_context_t *ctx, xps_part_t *part); +/* Process all of a seekable file at once */ +int xps_process_file(xps_context_t *ctx, FILE *file); -struct xps_type_map_s -{ - char *name; - char *type; - xps_type_map_t *next; -}; +/* Process one cursor full of data at a time */ +int xps_process_data(xps_context_t *ctx, stream_cursor_read *buf); +int xps_process_end_of_data(xps_context_t *ctx); struct xps_relation_s { @@ -165,13 +183,20 @@ struct xps_page_s xps_page_t *next; }; +struct xps_entry_s +{ + char *name; + int offset; + int csize; + int usize; +}; + struct xps_context_s { void *instance; gs_memory_t *memory; gs_state *pgs; gs_font_dir *fontdir; - FILE *file; gs_color_space *gray; gs_color_space *srgb; @@ -179,19 +204,24 @@ struct xps_context_s gs_color_space *cmyk; xps_hash_table_t *part_table; - xps_part_t *first_part; - xps_part_t *last_part; - - xps_type_map_t *defaults; - xps_type_map_t *overrides; + xps_part_t *part_list; char *start_part; /* fixed document sequence */ xps_document_t *first_fixdoc; /* first fixed document */ xps_document_t *last_fixdoc; /* last fixed document */ - xps_page_t *first_page; /* first page of document */ xps_page_t *last_page; /* last page of document */ - xps_page_t *next_page; /* next page to process when its resources are completed */ + + char *base_uri; /* base uri for parsing XML and resolving relative paths */ + char *part_uri; /* part uri for parsing metadata relations */ + + /* Seek mode state: */ + + FILE *file; + int zip_count; + xps_entry_t *zip_table; + + /* Feed mode state: */ unsigned int zip_state; unsigned int zip_version; @@ -204,14 +234,16 @@ struct xps_context_s z_stream zip_stream; char zip_file_name[2048]; - char *base_uri; /* base uri for parsing metadata and scanning parts for resources */ - char *state; /* temporary state for various processing */ + xps_page_t *next_page; /* next page to process when its resources are completed */ + xps_part_t *current_part; /* part for the current zip entry being decompressed */ + + /* Graphics context state: */ int use_transparency; /* global toggle for transparency */ /* Hack to workaround ghostscript's lack of understanding * the pdf 1.4 specification of Alpha only transparency groups. - * We have to force all colors to be white whenever we are computing + * We have to force all colors to be grayscale whenever we are computing * opacity masks. */ int opacity_only; @@ -251,24 +283,27 @@ struct xps_part_s xps_part_t *next; }; +xps_part_t *xps_read_zip_part(xps_context_t *ctx, char *name); + +void xps_part_name_from_relation_part_name(char *output, char *name); + xps_part_t *xps_new_part(xps_context_t *ctx, char *name, int capacity); xps_part_t *xps_find_part(xps_context_t *ctx, char *name); +xps_part_t *xps_read_part(xps_context_t *ctx, char *name); void xps_free_part(xps_context_t *ctx, xps_part_t *part); void xps_free_part_caches(xps_context_t *ctx, xps_part_t *part); +void xps_free_part_data(xps_context_t *ctx, xps_part_t *part); +void xps_release_part(xps_context_t *ctx, xps_part_t *part); void xps_debug_item(xps_item_t *item, int level); int xps_add_relation(xps_context_t *ctx, char *source, char *target, char *type); -char *xps_get_content_type(xps_context_t *ctx, char *partname); - -void xps_free_type_map(xps_context_t *ctx, xps_type_map_t *node); void xps_free_relations(xps_context_t *ctx, xps_relation_t *node); void xps_free_fixed_pages(xps_context_t *ctx); void xps_free_fixed_documents(xps_context_t *ctx); void xps_debug_parts(xps_context_t *ctx); -void xps_debug_type_map(xps_context_t *ctx, char *label, xps_type_map_t *node); void xps_debug_fixdocseq(xps_context_t *ctx); /* @@ -330,7 +365,7 @@ struct xps_glyph_metrics_s int xps_init_font_cache(xps_context_t *ctx); -xps_font_t *xps_new_font(xps_context_t *ctx, char *buf, int buflen, int index); +xps_font_t *xps_new_font(xps_context_t *ctx, byte *buf, int buflen, int index); void xps_free_font(xps_context_t *ctx, xps_font_t *font); int xps_count_font_encodings(xps_font_t *font); @@ -354,13 +389,15 @@ void xps_debug_path(xps_context_t *ctx); * XML and content. */ -xps_item_t * xps_parse_xml(xps_context_t *ctx, char *buf, int len); +xps_item_t * xps_parse_xml(xps_context_t *ctx, byte *buf, int len); xps_item_t * xps_next(xps_item_t *item); xps_item_t * xps_down(xps_item_t *item); void xps_free_item(xps_context_t *ctx, xps_item_t *item); char * xps_tag(xps_item_t *item); -char * xps_att(xps_item_t *item, const char *att); +char * xps_att(xps_item_t *item, char *att); +int xps_parse_content_relations(xps_context_t *ctx, xps_part_t *part); +int xps_parse_metadata(xps_context_t *ctx, xps_part_t *part); int xps_parse_fixed_page(xps_context_t *ctx, xps_part_t *part); int xps_parse_canvas(xps_context_t *ctx, char *base_uri, xps_resource_t *dict, xps_item_t *node); int xps_parse_path(xps_context_t *ctx, char *base_uri, xps_resource_t *dict, xps_item_t *node); diff --git a/xps/xps.mak b/xps/xps.mak index a93d4eee3..8cb71b4cc 100644 --- a/xps/xps.mak +++ b/xps/xps.mak @@ -64,6 +64,9 @@ $(XPSOBJ)xpshdp.$(OBJ): $(XPSSRC)xpshdp.c $(XPSINCLUDES) $(XPSOBJ)xpszip.$(OBJ): $(XPSSRC)xpszip.c $(XPSINCLUDES) $(XPSCCC) $(XPSSRC)xpszip.c $(XPSO_)xpszip.$(OBJ) +$(XPSOBJ)xpszipseek.$(OBJ): $(XPSSRC)xpszipseek.c $(XPSINCLUDES) + $(XPSCCC) $(XPSSRC)xpszipseek.c $(XPSO_)xpszipseek.$(OBJ) + $(XPSOBJ)xpsxml.$(OBJ): $(XPSSRC)xpsxml.c $(XPSINCLUDES) $(XPSCCC) $(XPSSRC)xpsxml.c $(XPSO_)xpsxml.$(OBJ) @@ -130,6 +133,7 @@ XPS_OBJS=\ $(XPSOBJ)xpstiff.$(OBJ) \ $(XPSOBJ)xpshdp.$(OBJ) \ $(XPSOBJ)xpszip.$(OBJ) \ + $(XPSOBJ)xpszipseek.$(OBJ) \ $(XPSOBJ)xpsxml.$(OBJ) \ $(XPSOBJ)xpsdoc.$(OBJ) \ $(XPSOBJ)xpspage.$(OBJ) \ diff --git a/xps/xpscff.c b/xps/xpscff.c index dc7836fa8..d4f932823 100644 --- a/xps/xpscff.c +++ b/xps/xpscff.c @@ -757,10 +757,9 @@ xps_cff_append(gs_state *pgs, gs_font_type1 *pt1, gs_glyph glyph, int donthint) } static int -xps_post_callback_build_char(gs_text_enum_t *ptextenum, gs_state *pgs, +xps_post_callback_build_char(gs_show_enum *penum, gs_state *pgs, gs_font *pfont, gs_char chr, gs_glyph glyph) { - gs_show_enum *penum = (gs_show_enum*)ptextenum; gs_font_type1 *pt1 = (gs_font_type1*)pfont; const gs_rect *pbbox; float w2[6]; @@ -874,8 +873,8 @@ xps_init_postscript_font(xps_context_t *ctx, xps_font_t *font) pt1->procs.next_char_glyph = gs_default_next_char_glyph; pt1->procs.build_char = xps_post_callback_build_char; - strcpy(pt1->font_name.chars, "PostScriptFont"); - pt1->font_name.size = strlen(pt1->font_name.chars); + strcpy((char*)pt1->font_name.chars, "PostScriptFont"); + pt1->font_name.size = strlen((char*)pt1->font_name.chars); pt1->key_name.size = 0; diff --git a/xps/xpscolor.c b/xps/xpscolor.c index 70e149bc6..605be3581 100644 --- a/xps/xpscolor.c +++ b/xps/xpscolor.c @@ -155,7 +155,7 @@ xps_parse_color(xps_context_t *ctx, char *base_uri, char *string, gs_color_space /* Find ICC colorspace part */ xps_absolute_path(partname, base_uri, profile); - part = xps_find_part(ctx, partname); + part = xps_read_part(ctx, partname); if (!part) return gs_throw1(-1, "cannot find icc profile part '%s'", partname); @@ -171,6 +171,8 @@ xps_parse_color(xps_context_t *ctx, char *base_uri, char *string, gs_color_space *csp = part->icc; #endif + xps_release_part(ctx, part); + return 0; } diff --git a/xps/xpsdoc.c b/xps/xpsdoc.c index 3a53383a7..fc5db2dbd 100644 --- a/xps/xpsdoc.c +++ b/xps/xpsdoc.c @@ -17,142 +17,193 @@ #include <expat.h> -#define REL_START_PART "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation" -#define REL_REQUIRED_RESOURCE "http://schemas.microsoft.com/xps/2005/06/required-resource" -#define REL_REQUIRED_RESOURCE_RECURSIVE "http://schemas.microsoft.com/xps/2005/06/required-resource#recursive" - -#define CT_FIXDOC "application/vnd.ms-package.xps-fixeddocument+xml" -#define CT_FIXDOCSEQ "application/vnd.ms-package.xps-fixeddocumentsequence+xml" -#define CT_FIXPAGE "application/vnd.ms-package.xps-fixedpage+xml" - -int xps_doc_trace = 0; - /* - * Content types are stored in two lookup tables. - * One contains Override entries, which map a part name to a type. - * The other contains Default entries, which map a file extension to a type. + * The part table stores both incomplete (interleaved) and completed parts. + * In feed mode the completed parts are buffered until they can be safely freed. + * In feed mode, the parts may also be completely empty since they are used to + * store relationships between parts that may not yet have been encountered. */ -void xps_debug_type_map(xps_context_t *ctx, char *label, xps_type_map_t *node) +void xps_debug_parts(xps_context_t *ctx) { - while (node) + xps_part_t *part = ctx->part_list; + xps_relation_t *rel; + while (part) { - dprintf3("%s name=%s type=%s\n", label, node->name, node->type); - node = node->next; + dprintf2("part '%s' size=%d\n", part->name, part->size); + for (rel = part->relations; rel; rel = rel->next) + dprintf2(" target=%s type=%s\n", rel->target, rel->type); + part = part->next; } } -static xps_type_map_t * -xps_new_type_map(xps_context_t *ctx, char *name, char *type) +xps_part_t * +xps_new_part(xps_context_t *ctx, char *name, int capacity) { - xps_type_map_t *node; + xps_part_t *part; - node = xps_alloc(ctx, sizeof(xps_type_map_t)); - if (!node) - goto cleanup; + part = xps_alloc(ctx, sizeof(xps_part_t)); + if (!part) + return NULL; - node->name = xps_strdup(ctx, name); - node->type = xps_strdup(ctx, type); - node->next = NULL; + part->name = NULL; + part->size = 0; + part->interleave = 0; + part->capacity = 0; + part->complete = 0; + part->data = NULL; + part->relations = NULL; + part->relations_complete = 0; - if (!node->name) - goto cleanup; - if (!node->type) - goto cleanup; + part->font = NULL; + part->image = NULL; + part->icc = NULL; + part->xml = NULL; - return node; + part->deobfuscated = 0; -cleanup: - if (node) + part->name = xps_strdup(ctx, name); + if (!part->name) { - if (node->name) - xps_free(ctx, node->name); - if (node->type) - xps_free(ctx, node->type); - xps_free(ctx, node); + xps_free(ctx, part); + return NULL; } - return NULL; + + if (capacity == 0) + capacity = 1024; + + part->size = 0; + part->capacity = capacity; + part->data = xps_alloc(ctx, part->capacity); + if (!part->data) + { + xps_free(ctx, part->name); + xps_free(ctx, part); + return NULL; + } + + part->next = NULL; + + /* add it to the list of parts */ + part->next = ctx->part_list; + ctx->part_list = part; + + /* add it to the hash table of parts */ + xps_hash_insert(ctx, ctx->part_table, part->name, part); + + return part; } void -xps_free_type_map(xps_context_t *ctx, xps_type_map_t *node) +xps_free_part_caches(xps_context_t *ctx, xps_part_t *part) { - xps_type_map_t *next; - while (node) +#if 0 + /* Can't free fonts because pdfwrite needs them alive */ + if (part->font) { - next = node->next; - xps_free(ctx, node->name); - xps_free(ctx, node->type); - xps_free(ctx, node); - node = next; + xps_free_font(ctx, part->font); + part->font = NULL; } -} -static char * -xps_lookup_type_map(xps_type_map_t *node, char *name) -{ - while (node) + if (part->icc) { - if (strcmp(node->name, name) == 0) - return node->type; - node = node->next; + xps_free_colorspace(ctx, part->icc); + part->icc = NULL; } - return NULL; -} +#endif -static void -xps_add_override(xps_context_t *ctx, char *part_name, char *content_type) -{ - xps_type_map_t *node; - if (!xps_lookup_type_map(ctx->overrides, part_name)) + if (part->image) { - node = xps_new_type_map(ctx, part_name, content_type); - if (node) - { - node->next = ctx->overrides; - ctx->overrides = node; - } + xps_free_image(ctx, part->image); + part->image = NULL; } -} -static void -xps_add_default(xps_context_t *ctx, char *extension, char *content_type) -{ - xps_type_map_t *node; - if (!xps_lookup_type_map(ctx->defaults, extension)) + if (part->xml) { - node = xps_new_type_map(ctx, extension, content_type); - if (node) - { - node->next = ctx->defaults; - ctx->defaults = node; - } + xps_free_item(ctx, part->xml); + part->xml = NULL; } } -char * -xps_get_content_type(xps_context_t *ctx, char *partname) +void +xps_free_part_data(xps_context_t *ctx, xps_part_t *part) { - char *extension; - char *type; + if (part->data) + xps_free(ctx, part->data); + part->data = NULL; + part->size = 0; + part->capacity = 0; + part->complete = 0; + part->deobfuscated = 0; +} - type = xps_lookup_type_map(ctx->overrides, partname); - if (type) - { - return type; - } +void +xps_release_part(xps_context_t *ctx, xps_part_t *part) +{ + /* since fonts need to live for the duration of + the job there's no point in freeing them */ + if (part->font) + return; + + /* never free the part data if we're in feed mode, + since we may need it later */ + if (ctx->file) + xps_free_part_data(ctx, part); + + /* free any parsed representations */ + xps_free_part_caches(ctx, part); +} + +void +xps_free_part(xps_context_t *ctx, xps_part_t *part) +{ + xps_free_part_caches(ctx, part); + xps_free_part_data(ctx, part); + + /* Nu-uh, can't free fonts because pdfwrite needs them alive */ + if (part->font) + return; + + if (part->name) + xps_free(ctx, part->name); + part->name = NULL; + + /* TODO: remove from context part list */ - extension = strrchr(partname, '.'); - if (extension) - extension ++; + xps_free_relations(ctx, part->relations); + xps_free(ctx, part); +} + +/* + * Lookup a part in the part table. It may be + * unloaded, partially loaded, or loaded. + */ + +xps_part_t * +xps_find_part(xps_context_t *ctx, char *name) +{ + return xps_hash_lookup(ctx->part_table, name); +} - type = xps_lookup_type_map(ctx->defaults, extension); - if (type) +/* + * Find and ensure that the contents of the part have been loaded. + * Will return NULL if used on on an incomplete or unloaded part in feed mode. + */ + +xps_part_t * +xps_read_part(xps_context_t *ctx, char *name) +{ + xps_part_t *part; + part = xps_hash_lookup(ctx->part_table, name); + if (ctx->file) { - return type; + if (!part) + part = xps_read_zip_part(ctx, name); + if (part && !part->complete) + part = xps_read_zip_part(ctx, name); + return part; } - - return NULL; + return part; } /* @@ -160,16 +211,22 @@ xps_get_content_type(xps_context_t *ctx, char *partname) * is the source of the relation. */ -void xps_debug_parts(xps_context_t *ctx) +void +xps_part_name_from_relation_part_name(char *output, char *name) { - xps_part_t *part = ctx->first_part; - xps_relation_t *rel; - while (part) + char *p, *q; + strcpy(output, name); + p = strstr(output, "_rels/"); + q = strstr(name, "_rels/"); + if (p) { - dprintf2("part '%s' size=%d\n", part->name, part->size); - for (rel = part->relations; rel; rel = rel->next) - dprintf2(" target=%s type=%s\n", rel->target, rel->type); - part = part->next; + *p = 0; + strcat(output, q + 6); + } + p = strstr(output, ".rels"); + if (p) + { + *p = 0; } } @@ -211,6 +268,11 @@ xps_add_relation(xps_context_t *ctx, char *source, char *target, char *type) node->next = part->relations; part->relations = node; + if (xps_doc_trace) + { + dprintf2(" relation %s -> %s\n", source, target); + } + return 0; } @@ -229,13 +291,8 @@ xps_free_relations(xps_context_t *ctx, xps_relation_t *node) } /* - * <DocumentReference> -- fixdocseq - * <PageContent> -- fixdoc - * - * TODO: We should really look at the root StartPart relationship - * for the FixedDocumentSequence and follow the DocumentReferences - * therein for the page sequence. For now, we'll cheat and read - * any PageContent references in the order they are in the file. + * The FixedDocumentSequence and FixedDocument parts determine + * which parts correspond to actual pages, and the page order. */ void xps_debug_fixdocseq(xps_context_t *ctx) @@ -377,31 +434,7 @@ xps_free_fixed_pages(xps_context_t *ctx) } /* - * Periodically free old parts and resources that - * will not be used any more. This looks at discard control - * information, and assumes that a given fixed page will - * not be drawn more than once. - */ - -static void -xps_free_used_parts(xps_context_t *ctx) -{ - /* Free parsed resources that were used on the last page */ - xps_part_t *part = ctx->first_part; - while (part) - { - xps_part_t *next = part->next; - xps_free_part_caches(ctx, part); - part = next; - } - - /* TODO: Free the data for page parts we have rendered */ - /* TODO: Free the data for parts we don't recognize */ - /* TODO: Parse DiscardControl parts to free stuff */ -} - -/* - * Parse the metadata [Content_Types.xml] and _rels/XXX.rels parts. + * Parse the metadata document structure and _rels/XXX.rels parts. * These should be parsed eagerly as they are interleaved, so the * parsing needs to be able to cope with incomplete xml. * @@ -411,75 +444,23 @@ xps_free_used_parts(xps_context_t *ctx) * * We hook up unique expat handlers for this, and ignore any expat * errors that occur. + * + * The seekable mode only parses the document structure parts, + * and ignores all other metadata. */ static void -xps_part_from_relation(char *output, char *name) -{ - char *p, *q; - strcpy(output, name); - p = strstr(output, "_rels/"); - q = strstr(name, "_rels/"); - if (p) - { - *p = 0; - strcat(output, q + 6); - } - p = strstr(output, ".rels"); - if (p) - { - *p = 0; - } -} - -static void -xps_handle_metadata(void *zp, char *name, char **atts) +xps_parse_metadata_imp(void *zp, char *name, char **atts) { xps_context_t *ctx = zp; int i; - if (!strcmp(name, "Default")) - { - char *extension = NULL; - char *type = NULL; - - for (i = 0; atts[i]; i += 2) - { - if (!strcmp(atts[i], "Extension")) - extension = atts[i + 1]; - if (!strcmp(atts[i], "ContentType")) - type = atts[i + 1]; - } - - if (extension && type) - xps_add_default(ctx, extension, type); - } - - if (!strcmp(name, "Override")) - { - char *partname = NULL; - char *type = NULL; - - for (i = 0; atts[i]; i += 2) - { - if (!strcmp(atts[i], "PartName")) - partname = atts[i + 1]; - if (!strcmp(atts[i], "ContentType")) - type = atts[i + 1]; - } - - if (partname && type) - xps_add_override(ctx, partname, type); - } - if (!strcmp(name, "Relationship")) { - char srcbuf[1024]; + char realpart[1024]; char tgtbuf[1024]; - char dirbuf[1024]; char *target = NULL; char *type = NULL; - char *p; for (i = 0; atts[i]; i += 2) { @@ -491,16 +472,9 @@ xps_handle_metadata(void *zp, char *name, char **atts) if (target && type) { - xps_part_from_relation(srcbuf, ctx->last_part->name); - - strcpy(dirbuf, srcbuf); - p = strrchr(dirbuf, '/'); - if (p) - p[1] = 0; - - xps_absolute_path(tgtbuf, dirbuf, target); - - xps_add_relation(ctx, srcbuf, tgtbuf, type); + xps_part_name_from_relation_part_name(realpart, ctx->part_uri); + xps_absolute_path(tgtbuf, ctx->base_uri, target); + xps_add_relation(ctx, realpart, tgtbuf, type); } } @@ -547,8 +521,8 @@ xps_handle_metadata(void *zp, char *name, char **atts) } } -static int -xps_process_metadata(xps_context_t *ctx, xps_part_t *part) +int +xps_parse_metadata(xps_context_t *ctx, xps_part_t *part) { XML_Parser xp; char buf[1024]; @@ -558,9 +532,18 @@ xps_process_metadata(xps_context_t *ctx, xps_part_t *part) strcpy(buf, part->name); s = strrchr(buf, '/'); if (s) - s[1] = 0; + s[0] = 0; + + /* _rels parts are voodoo: their URI references are from + * the part they are associated with, not the actual _rels + * part being parsed. + */ + s = strstr(buf, "/_rels"); + if (s) + *s = 0; ctx->base_uri = buf; + ctx->part_uri = part->name; xp = XML_ParserCreate(NULL); if (!xp) @@ -568,19 +551,23 @@ xps_process_metadata(xps_context_t *ctx, xps_part_t *part) XML_SetUserData(xp, ctx); XML_SetParamEntityParsing(xp, XML_PARAM_ENTITY_PARSING_NEVER); - XML_SetStartElementHandler(xp, (XML_StartElementHandler)xps_handle_metadata); + XML_SetStartElementHandler(xp, (XML_StartElementHandler)xps_parse_metadata_imp); - (void) XML_Parse(xp, part->data, part->size, 1); + (void) XML_Parse(xp, (char*)part->data, part->size, 1); XML_ParserFree(xp); ctx->base_uri = NULL; + ctx->part_uri = NULL; return 0; } /* - * Scan FixedPage XML for required resources: + * Parse a FixedPage part and infer the required relationships. The + * relationship parts are often placed at the end of the file, so we don't want + * to rely on them. This function gets called if a FixedPage part is + * encountered and its relationship part has not been parsed yet. * * <Glyphs FontUri=... > * <ImageBrush ImageSource=... > @@ -615,7 +602,7 @@ xps_parse_color_relation(xps_context_t *ctx, char *string) *ep = 0; xps_absolute_path(path, ctx->base_uri, sp); xps_trim_url(path); - xps_add_relation(ctx, ctx->state, path, REL_REQUIRED_RESOURCE); + xps_add_relation(ctx, ctx->part_uri, path, REL_REQUIRED_RESOURCE); } } } @@ -642,7 +629,7 @@ xps_parse_image_relation(xps_context_t *ctx, char *string) *ep = 0; xps_absolute_path(path, ctx->base_uri, sp); xps_trim_url(path); - xps_add_relation(ctx, ctx->state, path, REL_REQUIRED_RESOURCE); + xps_add_relation(ctx, ctx->part_uri, path, REL_REQUIRED_RESOURCE); sp = ep + 1; ep = strchr(sp, '}'); @@ -651,7 +638,7 @@ xps_parse_image_relation(xps_context_t *ctx, char *string) *ep = 0; xps_absolute_path(path, ctx->base_uri, sp); xps_trim_url(path); - xps_add_relation(ctx, ctx->state, path, REL_REQUIRED_RESOURCE); + xps_add_relation(ctx, ctx->part_uri, path, REL_REQUIRED_RESOURCE); } } } @@ -660,7 +647,7 @@ xps_parse_image_relation(xps_context_t *ctx, char *string) { xps_absolute_path(path, ctx->base_uri, string); xps_trim_url(path); - xps_add_relation(ctx, ctx->state, path, REL_REQUIRED_RESOURCE); + xps_add_relation(ctx, ctx->part_uri, path, REL_REQUIRED_RESOURCE); } } @@ -686,7 +673,7 @@ xps_parse_content_relations_imp(void *zp, char *ns_name, char **atts) { xps_absolute_path(path, ctx->base_uri, atts[i+1]); xps_trim_url(path); - xps_add_relation(ctx, ctx->state, path, REL_REQUIRED_RESOURCE); + xps_add_relation(ctx, ctx->part_uri, path, REL_REQUIRED_RESOURCE); } } } @@ -706,7 +693,7 @@ xps_parse_content_relations_imp(void *zp, char *ns_name, char **atts) { xps_absolute_path(path, ctx->base_uri, atts[i+1]); xps_trim_url(path); - xps_add_relation(ctx, ctx->state, path, REL_REQUIRED_RESOURCE_RECURSIVE); + xps_add_relation(ctx, ctx->part_uri, path, REL_REQUIRED_RESOURCE_RECURSIVE); } } } @@ -726,7 +713,7 @@ xps_parse_content_relations_imp(void *zp, char *ns_name, char **atts) } } -static int +int xps_parse_content_relations(xps_context_t *ctx, xps_part_t *part) { XML_Parser xp; @@ -737,9 +724,9 @@ xps_parse_content_relations(xps_context_t *ctx, xps_part_t *part) strcpy(buf, part->name); s = strrchr(buf, '/'); if (s) - s[1] = 0; + s[0] = 0; - ctx->state = part->name; + ctx->part_uri = part->name; ctx->base_uri = buf; if (xps_doc_trace) @@ -753,192 +740,13 @@ xps_parse_content_relations(xps_context_t *ctx, xps_part_t *part) XML_SetParamEntityParsing(xp, XML_PARAM_ENTITY_PARSING_NEVER); XML_SetStartElementHandler(xp, (XML_StartElementHandler)xps_parse_content_relations_imp); - (void) XML_Parse(xp, part->data, part->size, 1); + (void) XML_Parse(xp, (char*)part->data, part->size, 1); XML_ParserFree(xp); - if (xps_doc_trace) - { - xps_relation_t *rel; - for (rel = part->relations; rel; rel = rel->next) - dprintf1(" relation %s\n", rel->target); - } - - ctx->state = NULL; + ctx->part_uri = NULL; ctx->base_uri = NULL; return 0; } -static int -xps_validate_resources(xps_context_t *ctx, xps_part_t *part) -{ - xps_relation_t *rel; - xps_part_t *subpart; - - for (rel = part->relations; rel; rel = rel->next) - { - if (!strcmp(rel->type, REL_REQUIRED_RESOURCE_RECURSIVE)) - { - subpart = xps_find_part(ctx, rel->target); - if (!subpart || !subpart->complete) - return 0; - if (!subpart->relations_complete) - { - xps_parse_content_relations(ctx, subpart); - subpart->relations_complete = 1; - } - if (!xps_validate_resources(ctx, subpart)) - return 0; - } - - if (!strcmp(rel->type, REL_REQUIRED_RESOURCE)) - { - subpart = xps_find_part(ctx, rel->target); - if (!subpart || !subpart->complete) - return 0; - } - } - - return 1; -} - -int -xps_process_part(xps_context_t *ctx, xps_part_t *part) -{ - xps_document_t *fixdoc; - - if (getenv("XPS_DOC_TRACE")) - xps_doc_trace = 1; - - if (xps_doc_trace && part->complete) - dprintf2("doc: found part %s %s\n", part->name, part->complete ? "" : "(piece)"); - - /* - * These two are magic Open Packaging Convention names. - */ - - if (strstr(part->name, "[Content_Types].xml")) - { - xps_process_metadata(ctx, part); - } - - if (strstr(part->name, "_rels/")) - { - xps_process_metadata(ctx, part); - - if (part->complete) - { - char realname[1024]; - xps_part_t *realpart; - xps_part_from_relation(realname, part->name); - realpart = xps_find_part(ctx, realname); - if (realpart) - { - realpart->relations_complete = 1; - } - } - } - - /* DiscardControl parts are not used by files in the wild, so we don't bother */ - - /* - * For the rest we need to track the relationships - * and content-types given by the previous two types. - * - * We can't do anything until we have the relationship - * for the start part. - */ - - if (!ctx->start_part) - { - xps_part_t *rootpart; - rootpart = xps_find_part(ctx, "/"); - if (rootpart) - { - xps_relation_t *rel; - for (rel = rootpart->relations; rel; rel = rel->next) - { - if (!strcmp(rel->type, REL_START_PART)) - { - xps_part_t *startpart; - - ctx->start_part = rel->target; - - if (xps_doc_trace) - dprintf1("doc: adding start part '%s'\n", ctx->start_part); - - startpart = xps_find_part(ctx, rel->target); - if (startpart) - xps_process_metadata(ctx, startpart); - } - } - } - } - - /* - * Read the start part (which is a FixedDocumentSequence) if it - * is the current part. - */ - - if (ctx->start_part) - { - if (!strcmp(part->name, ctx->start_part)) - { - xps_process_metadata(ctx, part); - } - } - - /* - * Follow the FixedDocumentSequence and parse the - * listed FixedDocuments that we have available. - */ - - for (fixdoc = ctx->first_fixdoc; fixdoc; fixdoc = fixdoc->next) - { - xps_part_t *fixdocpart = xps_find_part(ctx, fixdoc->name); - if (fixdocpart) - { - xps_process_metadata(ctx, fixdocpart); - if (!fixdocpart->complete) - break; /* incomplete fixdocpart, try parsing more later */ - } - } - - /* - * If we know which page part is next, check if we - * have all the page dependencies. If everything is - * ready: parse and render. - */ - - while (ctx->next_page) - { - xps_part_t *pagepart = xps_find_part(ctx, ctx->next_page->name); - if (pagepart && pagepart->complete) - { - if (!pagepart->relations_complete) - { - xps_parse_content_relations(ctx, pagepart); - pagepart->relations_complete = 1; - } - - if (xps_validate_resources(ctx, pagepart)) - { - int code = xps_parse_fixed_page(ctx, pagepart); - if (code < 0) - return code; - - ctx->next_page = ctx->next_page->next; - - xps_free_used_parts(ctx); - - continue; - } - } - - break; - } - - return 0; -} - diff --git a/xps/xpsfont.c b/xps/xpsfont.c index 8e185390c..0ddb6b265 100644 --- a/xps/xpsfont.c +++ b/xps/xpsfont.c @@ -53,7 +53,7 @@ int xps_init_font_cache(xps_context_t *ctx) } xps_font_t * -xps_new_font(xps_context_t *ctx, char *buf, int buflen, int index) +xps_new_font(xps_context_t *ctx, byte *buf, int buflen, int index) { xps_font_t *font; int code; @@ -65,7 +65,7 @@ xps_new_font(xps_context_t *ctx, char *buf, int buflen, int index) return NULL; } - font->data = (byte*)buf; + font->data = buf; font->length = buflen; font->font = NULL; diff --git a/xps/xpsglyphs.c b/xps/xpsglyphs.c index 11cf1aea7..aefc9c09b 100644 --- a/xps/xpsglyphs.c +++ b/xps/xpsglyphs.c @@ -109,18 +109,6 @@ xps_deobfuscate_font_resource(xps_context_t *ctx, xps_part_t *part) part->data[i+16] ^= key[15-i]; } - if (getenv("XPS_SAVE_FONTS")) - { - static int id = 0; - char buf[25]; - FILE *fp; - sprintf(buf, "font%d.otf", id++); - dprintf1("saving font data to %s\n", buf); - fp = fopen(buf, "wb"); - fwrite(part->data, part->size, 1, fp); - fclose(fp); - } - return 0; } @@ -497,7 +485,6 @@ xps_parse_glyphs(xps_context_t *ctx, xps_font_t *font; char partname[1024]; - char *parttype; char *subfont; gs_matrix matrix; @@ -578,23 +565,17 @@ xps_parse_glyphs(xps_context_t *ctx, subfontid = atoi(subfont + 1); *subfont = 0; } - part = xps_find_part(ctx, partname); + part = xps_read_part(ctx, partname); if (!part) return gs_throw1(-1, "cannot find font resource part '%s'", partname); /* deobfuscate if necessary */ if (!part->deobfuscated) { - parttype = xps_get_content_type(ctx, part->name); - if (parttype && !strcmp(parttype, "application/vnd.ms-package.obfuscated-opentype")) + if (strstr(part->name, ".odttf")) xps_deobfuscate_font_resource(ctx, part); - - /* stupid XPS files with content-types after the parts */ - if (!parttype && strstr(part->name, ".odttf")) + if (strstr(part->name, ".ODTTF")) xps_deobfuscate_font_resource(ctx, part); - if (!parttype && strstr(part->name, ".ODTTF")) - xps_deobfuscate_font_resource(ctx, part); - part->deobfuscated = 1; } @@ -698,6 +679,8 @@ xps_parse_glyphs(xps_context_t *ctx, xps_unclip(ctx, &saved_bounds); } + xps_release_part(ctx, part); + return 0; } diff --git a/xps/xpsgradient.c b/xps/xpsgradient.c index 0eed8ae99..dd7060ba3 100644 --- a/xps/xpsgradient.c +++ b/xps/xpsgradient.c @@ -262,7 +262,7 @@ xps_free_gradient_stop_function(xps_context_t *ctx, gs_function_t *func) for (i = 0; i < sparams->k; i++) { - lfunc = sparams->Functions[i]; + lfunc = (gs_function_t*) sparams->Functions[i]; /* discard const */ lparams = (gs_function_ElIn_params_t*) &lfunc->params; xps_free(ctx, (void*)lparams->Domain); xps_free(ctx, (void*)lparams->Range); diff --git a/xps/xpsimage.c b/xps/xpsimage.c index 490720147..0efe3f89e 100644 --- a/xps/xpsimage.c +++ b/xps/xpsimage.c @@ -310,7 +310,7 @@ xps_find_image_brush_source_part(xps_context_t *ctx, char *base_uri, xps_item_t profile_name, image_name); xps_absolute_path(partname, base_uri, image_name); - part = xps_find_part(ctx, partname); + part = xps_read_part(ctx, partname); if (!part) return gs_throw1(-1, "cannot find image resource part '%s'", partname); @@ -341,7 +341,7 @@ xps_parse_image_brush(xps_context_t *ctx, char *base_uri, xps_resource_t *dict, xps_parse_tiling_brush(ctx, base_uri, dict, root, xps_paint_image_brush, part->image); - /* TODO: free the image data here if the image is only used once on the page */ + xps_release_part(ctx, part); return 0; } diff --git a/xps/xpsmem.c b/xps/xpsmem.c index 03ec70a64..9901230d8 100644 --- a/xps/xpsmem.c +++ b/xps/xpsmem.c @@ -15,6 +15,14 @@ #include "ghostxps.h" +void * +xps_realloc_imp(xps_context_t *ctx, void *ptr, int size, const char *func) +{ + if (!ptr) + return gs_alloc_bytes(ctx->memory, size, func); + return gs_resize_object(ctx->memory, ptr, size, func); +} + static inline int xps_tolower(int c) { diff --git a/xps/xpspage.c b/xps/xpspage.c index 0c214cd92..df8a48997 100644 --- a/xps/xpspage.c +++ b/xps/xpspage.c @@ -120,7 +120,8 @@ xps_parse_fixed_page(xps_context_t *ctx, xps_part_t *part) char base_uri[1024]; char *s; - /* dprintf1("xps: page %s\n", part->name); */ + if (xps_doc_trace) + dprintf1("doc: parsing page %s\n", part->name); strcpy(base_uri, part->name); s = strrchr(base_uri, '/'); diff --git a/xps/xpspath.c b/xps/xpspath.c index eaa606ae5..39e492bbe 100644 --- a/xps/xpspath.c +++ b/xps/xpspath.c @@ -121,6 +121,7 @@ xps_restore_bounds(xps_context_t *ctx, gs_rect *save) ctx->bounds.q.y = save->q.y; } +#if 0 static void xps_debug_bounds(xps_context_t *ctx) { @@ -153,6 +154,7 @@ xps_debug_bounds(xps_context_t *ctx) gs_grestore(ctx->pgs); } +#endif int xps_unclip(xps_context_t *ctx, gs_rect *saved_bounds) diff --git a/xps/xpspng.c b/xps/xpspng.c index 48cc8b8e7..0529f0360 100644 --- a/xps/xpspng.c +++ b/xps/xpspng.c @@ -18,6 +18,11 @@ #include "stream.h" #include "strimpl.h" #include "gsstate.h" + +/* silence a warning where #if SHARE_LIBPNG is used when it's undefined */ +#ifndef SHARE_LIBPNG +#define SHARE_LIBPNG 0 +#endif #include "png_.h" /* diff --git a/xps/xpsresource.c b/xps/xpsresource.c index 4dd3a665c..164862b7f 100644 --- a/xps/xpsresource.c +++ b/xps/xpsresource.c @@ -72,12 +72,13 @@ xps_parse_remote_resource_dictionary(xps_context_t *ctx, char *base_uri, char *s { char part_name[1024]; char part_uri[1024]; + xps_resource_t *dict; xps_part_t *part; char *s; /* External resource dictionaries MUST NOT reference other resource dictionaries */ xps_absolute_path(part_name, base_uri, source_att); - part = xps_find_part(ctx, part_name); + part = xps_read_part(ctx, part_name); if (!part) { gs_throw1(-1, "cannot find remote resource part '%s'", part_name); @@ -105,7 +106,16 @@ xps_parse_remote_resource_dictionary(xps_context_t *ctx, char *base_uri, char *s if (s) s[1] = 0; - return xps_parse_resource_dictionary(ctx, part_uri, part->xml); + dict = xps_parse_resource_dictionary(ctx, part_uri, part->xml); + if (!dict) + { + gs_rethrow1(-1, "cannot parse remote resource dictionary %s", part_uri); + return NULL; + } + + xps_release_part(ctx, part); + + return dict; } xps_resource_t * diff --git a/xps/xpstiff.c b/xps/xpstiff.c index 31ff67423..3ed9ed4c2 100644 --- a/xps/xpstiff.c +++ b/xps/xpstiff.c @@ -152,55 +152,6 @@ static const byte bitrev[256] = 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff }; -static void -xps_debug_tiff(gs_memory_t *mem, xps_tiff_t *tiff) -{ - int i, n; - - dputs("TIFF <<\n"); - dprintf1("\t/NewSubfileType %u\n", tiff->subfiletype); - dprintf1("\t/PhotometricInterpretation %u\n", tiff->photometric); - dprintf1("\t/Compression %u\n", tiff->compression); - dprintf1("\t/ImageWidth %u\n", tiff->imagewidth); - dprintf1("\t/ImageLength %u\n", tiff->imagelength); - dprintf1("\t/BitsPerSample %u\n", tiff->bitspersample); - dprintf1("\t/SamplesPerPixel %u\n", tiff->samplesperpixel); - dprintf1("\t/PlanarConfiguration %u\n", tiff->planar); - dprintf1("\t/ExtraSamples %u\n", tiff->extrasamples); - dprintf1("\t/ColorMap $%p\n", tiff->colormap); - dprintf1("\t/XResolution %u\n", tiff->xresolution); - dprintf1("\t/YResolution %u\n", tiff->yresolution); - dprintf1("\t/ResolutionUnit %u\n", tiff->resolutionunit); - dprintf1("\t/FillOrder %u\n", tiff->fillorder); - dprintf1("\t/T4Options %u\n", tiff->g3opts); - dprintf1("\t/T6Options %u\n", tiff->g4opts); - dprintf1("\t/Predictor %u\n", tiff->predictor); - dprintf1("\t/JPEGTables %u\n", tiff->jpegtableslen); - dprintf2("\t/YCbCrSubSampling %u %u\n", tiff->ycbcrsubsamp[0], tiff->ycbcrsubsamp[1]); - - n = (tiff->imagelength + tiff->rowsperstrip - 1) / tiff->rowsperstrip; - - dprintf1("\t/RowsPerStrip %u\n", tiff->rowsperstrip); - - if (tiff->stripoffsets) - { - dputs("\t/StripOffsets "); - for (i = 0; i < n; i++) - dprintf1("%u ", tiff->stripoffsets[i]); - dputs("\n"); - } - - if (tiff->stripbytecounts) - { - dputs("\t/StripByteCounts "); - for (i = 0; i < n; i++) - dprintf1("%u ", tiff->stripbytecounts[i]); - dputs("\n"); - } - - dputs(">>\n"); -} - static int xps_report_error(stream_state * st, const char *str) { diff --git a/xps/xpstop.c b/xps/xpstop.c index 55cee16ac..5479abb99 100644 --- a/xps/xpstop.c +++ b/xps/xpstop.c @@ -23,6 +23,9 @@ #include "gxht.h" /* gsht1.h is incomplete, we need storage size of gs_halftone */ #include "gsht1.h" +int xps_zip_trace = 0; +int xps_doc_trace = 0; + static int xps_install_halftone(xps_context_t *ctx, gx_device *pdevice); #define XPS_PARSER_MIN_INPUT_SIZE 8192 @@ -44,6 +47,9 @@ struct xps_interp_instance_s void *post_page_closure; /* closure to call post_page_action with */ xps_context_t *ctx; + + FILE *tempfile; + char tempname[gp_file_name_sizeof]; }; /* version and build date are not currently used */ @@ -111,6 +117,7 @@ xps_imp_allocate_interp_instance(pl_interp_instance_t **ppinstance, ctx->pgs = pgs; ctx->fontdir = NULL; ctx->file = NULL; + ctx->zip_table = NULL; /* TODO: load some builtin ICC profiles here */ ctx->gray = gs_cspace_new_DeviceGray(ctx->memory); /* profile for gray images */ @@ -129,6 +136,25 @@ xps_imp_allocate_interp_instance(pl_interp_instance_t **ppinstance, *ppinstance = (pl_interp_instance_t *)instance; + strcpy(instance->tempname, ""); + instance->tempfile = NULL; + + if (getenv("XPS_ZIP_SEEK")) + { + instance->tempfile = gp_open_scratch_file(gp_scratch_file_name_prefix, + instance->tempname, "wb+"); + if (!instance->tempfile) + gs_warn("cannot open temporary buffer file; switching to streaming mode"); + } + + if (xps_zip_trace) + { + if (instance->tempfile) + dprintf1("zip: seek mode on temp file '%s'\n", instance->tempname); + else + dputs("zip: feed mode on data stream\n"); + } + return 0; } @@ -216,13 +242,41 @@ xps_imp_get_device_memory(pl_interp_instance_t *pinstance, gs_memory_t **ppmem) return 0; } +/* Parse an entire random access file */ +static int +xps_imp_process_file(pl_interp_instance_t *pinstance, FILE *file) +{ + xps_interp_instance_t *instance = (xps_interp_instance_t *)pinstance; + xps_context_t *ctx = instance->ctx; + int code; + + code = xps_process_file(ctx, file); + if (code) + gs_catch(code, "cannot process xps file"); + + return code; +} + /* Parse a cursor-full of data */ static int xps_imp_process(pl_interp_instance_t *pinstance, stream_cursor_read *pcursor) { xps_interp_instance_t *instance = (xps_interp_instance_t *)pinstance; xps_context_t *ctx = instance->ctx; - return xps_process_data(ctx, pcursor); + int code; + + if (instance->tempfile) + { + fwrite(pcursor->ptr + 1, pcursor->limit - pcursor->ptr, 1, instance->tempfile); + pcursor->ptr = pcursor->limit; + return 0; + } + + code = xps_process_data(ctx, pcursor); + if (code < 0) + gs_catch(code, "cannot process xps data"); + + return code; } /* Skip to end of job. @@ -240,6 +294,23 @@ xps_imp_flush_to_eoj(pl_interp_instance_t *pinstance, stream_cursor_read *pcurso static int xps_imp_process_eof(pl_interp_instance_t *pinstance) { + xps_interp_instance_t *instance = (xps_interp_instance_t *)pinstance; + xps_context_t *ctx = instance->ctx; + int code; + + if (instance->tempfile) + { + xps_imp_process_file(pinstance, instance->tempfile); + fclose(instance->tempfile); + unlink(instance->tempname); + } + else + { + code = xps_process_end_of_data(ctx); + if (code) + gs_catch(code, "cannot process xps file"); + } + return 0; } @@ -261,9 +332,14 @@ xps_imp_init_job(pl_interp_instance_t *pinstance) xps_interp_instance_t *instance = (xps_interp_instance_t *)pinstance; xps_context_t *ctx = instance->ctx; + if (gs_debug_c('|')) + xps_zip_trace = 1; + if (gs_debug_c('|')) + xps_doc_trace = 1; + ctx->part_table = xps_hash_new(ctx); - ctx->first_part = NULL; - ctx->last_part = NULL; + ctx->part_list = NULL; + ctx->current_part = NULL; ctx->start_part = NULL; @@ -286,44 +362,14 @@ xps_imp_dnit_job(pl_interp_instance_t *pinstance) xps_interp_instance_t *instance = (xps_interp_instance_t *)pinstance; xps_context_t *ctx = instance->ctx; - while (ctx->next_page) - { - xps_part_t *pagepart; - pagepart = xps_find_part(ctx, ctx->next_page->name); - if (!pagepart) - dputs(" page part missing\n"); - else if (!pagepart->complete) - dputs(" page part incomplete\n"); - else - { - xps_relation_t *rel; - for (rel = pagepart->relations; rel; rel = rel->next) - { - xps_part_t *subpart = xps_find_part(ctx, rel->target); - if (!subpart) - dprintf1(" resource '%s' missing\n", rel->target); - else if (!subpart->complete) - dprintf1(" resource '%s' incomplete\n", rel->target); - // TODO: recursive resource check... - } - } - - ctx->next_page = ctx->next_page->next; - } - - if (getenv("XPS_DEBUG_PARTS")) + if (gs_debug_c('|')) xps_debug_parts(ctx); - if (getenv("XPS_DEBUG_TYPES")) - { - xps_debug_type_map(ctx, "Default", ctx->defaults); - xps_debug_type_map(ctx, "Override", ctx->overrides); - } - if (getenv("XPS_DEBUG_PAGES")) + if (gs_debug_c('|')) xps_debug_fixdocseq(ctx); /* Free XPS parsing stuff */ { - xps_part_t *part = ctx->first_part; + xps_part_t *part = ctx->part_list; while (part) { xps_part_t *next = part->next; @@ -331,27 +377,13 @@ xps_imp_dnit_job(pl_interp_instance_t *pinstance) part = next; } - ctx->first_part = NULL; - ctx->last_part = NULL; - xps_hash_free(ctx, ctx->part_table); ctx->part_table = NULL; + ctx->part_list = NULL; + ctx->current_part = NULL; xps_free_fixed_pages(ctx); xps_free_fixed_documents(ctx); - - if (ctx->overrides) - xps_free_type_map(ctx, ctx->overrides); - ctx->overrides = NULL; - if (ctx->defaults) - xps_free_type_map(ctx, ctx->defaults); - ctx->defaults = NULL; - } - - if (ctx->file) - { - fclose(ctx->file); - ctx->file = NULL; } return 0; @@ -476,7 +508,7 @@ identity_transfer(floatp tint, const gx_transfer_map *ignore_map) /* The following is a 45 degree spot screen with the spots enumerated * in a defined order. */ -static const byte order16x16[256] = { +static byte order16x16[256] = { 38, 11, 14, 32, 165, 105, 90, 171, 38, 12, 14, 33, 161, 101, 88, 167, 30, 6, 0, 16, 61, 225, 231, 125, 30, 6, 1, 17, 63, 222, 227, 122, 27, 3, 8, 19, 71, 242, 205, 110, 28, 4, 9, 20, 74, 246, 208, 106, diff --git a/xps/xpsttf.c b/xps/xpsttf.c index de4caf5cd..51f4469cd 100644 --- a/xps/xpsttf.c +++ b/xps/xpsttf.c @@ -21,12 +21,12 @@ * Some extra TTF parsing magic that isn't covered by the graphics library. */ -static inline int u16(byte *p) +static inline int u16(const byte *p) { return (p[0] << 8) | p[1]; } -static inline int u32(byte *p) +static inline int u32(const byte *p) { return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; } @@ -127,8 +127,8 @@ xps_true_callback_glyph_name(gs_font *pfont, gs_glyph glyph, gs_const_string *ps { /* This funciton is copied verbatim from plfont.c */ - uint table_length; - ulong table_offset; + int table_length; + int table_offset; ulong format; uint numGlyphs; @@ -141,8 +141,8 @@ xps_true_callback_glyph_name(gs_font *pfont, gs_glyph glyph, gs_const_string *ps glyph -= 29; if ( glyph >= 0 && glyph < 258 ) { - pstr->data = pl_mac_names[glyph]; - pstr->size = strlen(pstr->data); + pstr->data = (byte*) pl_mac_names[glyph]; + pstr->size = strlen((char*)pstr->data); return 0; } else @@ -176,9 +176,9 @@ xps_true_callback_glyph_name(gs_font *pfont, gs_glyph glyph, gs_const_string *ps { /* Invent a name if we don't know the table format. */ char buf[16]; - sprintf(buf, "glyph%d", glyph); - pstr->data = buf; - pstr->size = strlen(pstr->data); + sprintf(buf, "glyph%d", (int)glyph); + pstr->data = (byte*)buf; + pstr->size = strlen((char*)pstr->data); return 0; } @@ -200,15 +200,15 @@ xps_true_callback_glyph_name(gs_font *pfont, gs_glyph glyph, gs_const_string *ps if ( glyph_name_index < 258 ) { // dprintf2("glyph name (mac) %d = %s\n", glyph, pl_mac_names[glyph_name_index]); - pstr->data = pl_mac_names[glyph_name_index]; - pstr->size = strlen(pstr->data); + pstr->data = (byte*) pl_mac_names[glyph_name_index]; + pstr->size = strlen((char*)pstr->data); return 0; } /* not mac */ else { - char *mydata; + byte *mydata; /* and here's the tricky part */ const byte *pascal_stringp = postp + 34 + (numGlyphs * 2); @@ -251,10 +251,9 @@ xps_true_callback_glyph_name(gs_font *pfont, gs_glyph glyph, gs_const_string *ps } static int -xps_true_callback_build_char(gs_text_enum_t *ptextenum, gs_state *pgs, gs_font *pfont, +xps_true_callback_build_char(gs_show_enum *penum, gs_state *pgs, gs_font *pfont, gs_char chr, gs_glyph glyph) { - gs_show_enum *penum = (gs_show_enum*)ptextenum; gs_font_type42 *p42 = (gs_font_type42*)pfont; const gs_rect *pbbox; float sbw[4], w2[6]; @@ -291,7 +290,7 @@ xps_true_callback_build_char(gs_text_enum_t *ptextenum, gs_state *pgs, gs_font * code = gs_type42_append(glyph, pgs, gx_current_path(pgs), - ptextenum, (gs_font*)p42, + (gs_text_enum_t*)penum, (gs_font*)p42, gs_show_in_charpath(penum) != cpm_show); if (code < 0) return code; @@ -361,12 +360,12 @@ int xps_init_truetype_font(xps_context_t *ctx, xps_font_t *font) p42->procs.build_char = xps_true_callback_build_char; memset(p42->font_name.chars, 0, sizeof(p42->font_name.chars)); - xps_load_sfnt_name(font, p42->font_name.chars); - p42->font_name.size = strlen(p42->font_name.chars); + xps_load_sfnt_name(font, (char*)p42->font_name.chars); + p42->font_name.size = strlen((char*)p42->font_name.chars); memset(p42->key_name.chars, 0, sizeof(p42->key_name.chars)); - strcpy(p42->key_name.chars, p42->font_name.chars); - p42->key_name.size = strlen(p42->key_name.chars); + strcpy((char*)p42->key_name.chars, (char*)p42->font_name.chars); + p42->key_name.size = strlen((char*)p42->key_name.chars); /* Base font specific: */ diff --git a/xps/xpsxml.c b/xps/xpsxml.c index 84f29741a..956066018 100644 --- a/xps/xpsxml.c +++ b/xps/xpsxml.c @@ -29,7 +29,7 @@ struct xps_parser_s xps_context_t *ctx; xps_item_t *root; xps_item_t *head; - const char *error; + char *error; int compat; char *base; /* base of relative URIs */ }; @@ -51,7 +51,7 @@ static char *skip_namespace(char *s) return s; } -static void on_open_tag(void *zp, const char *ns_name, const char **atts) +static void on_open_tag(void *zp, char *ns_name, char **atts) { xps_parser_t *parser = zp; xps_context_t *ctx = parser->ctx; @@ -155,7 +155,7 @@ static void on_open_tag(void *zp, const char *ns_name, const char **atts) parser->head = item; } -static void on_close_tag(void *zp, const char *name) +static void on_close_tag(void *zp, char *name) { xps_parser_t *parser = zp; @@ -171,7 +171,7 @@ static inline int is_xml_space(int c) return c == ' ' || c == '\t' || c == '\r' || c == '\n'; } -static void on_text(void *zp, const char *buf, int len) +static void on_text(void *zp, char *buf, int len) { xps_parser_t *parser = zp; xps_context_t *ctx = parser->ctx; @@ -214,7 +214,7 @@ xps_process_compatibility(xps_context_t *ctx, xps_item_t *root) } xps_item_t * -xps_parse_xml(xps_context_t *ctx, char *buf, int len) +xps_parse_xml(xps_context_t *ctx, byte *buf, int len) { xps_parser_t parser; XML_Parser xp; @@ -235,11 +235,11 @@ xps_parse_xml(xps_context_t *ctx, char *buf, int len) XML_SetUserData(xp, &parser); XML_SetParamEntityParsing(xp, XML_PARAM_ENTITY_PARSING_NEVER); - XML_SetStartElementHandler(xp, on_open_tag); - XML_SetEndElementHandler(xp, on_close_tag); - XML_SetCharacterDataHandler(xp, on_text); + XML_SetStartElementHandler(xp, (XML_StartElementHandler)on_open_tag); + XML_SetEndElementHandler(xp, (XML_EndElementHandler)on_close_tag); + XML_SetCharacterDataHandler(xp, (XML_CharacterDataHandler)on_text); - code = XML_Parse(xp, buf, len, 1); + code = XML_Parse(xp, (char*)buf, len, 1); if (code == 0) { if (parser.root) @@ -276,7 +276,7 @@ xps_tag(xps_item_t *item) } char * -xps_att(xps_item_t *item, const char *att) +xps_att(xps_item_t *item, char *att) { int i; for (i = 0; item->atts[i]; i += 2) diff --git a/xps/xpszip.c b/xps/xpszip.c index d021f7ba2..f7380999a 100644 --- a/xps/xpszip.c +++ b/xps/xpszip.c @@ -11,16 +11,10 @@ San Rafael, CA 94903, U.S.A., +1(415)492-9861, for further information. */ -/* XPS interpreter - zip container parsing */ +/* XPS interpreter - zip container parsing in feed mode */ #include "ghostxps.h" -#define ZIP_LOCAL_FILE_SIG 0x04034b50 -#define ZIP_DATA_DESC_SIG 0x08074b50 -#define ZIP_CENTRAL_DIRECTORY_SIG 0x02014b50 - -int xps_zip_trace = 0; - static inline unsigned int scan4(byte *buf) { @@ -91,119 +85,214 @@ xps_zip_free(xps_context_t *ctx, void *ptr) xps_free(ctx, ptr); } -xps_part_t * -xps_new_part(xps_context_t *ctx, char *name, int capacity) +/* + * Check that we have all the resources used by a page + * already loaded before we parse it. + */ + +static int +xps_validate_resources(xps_context_t *ctx, xps_part_t *part) { - xps_part_t *part; + xps_relation_t *rel; + xps_part_t *subpart; - part = xps_alloc(ctx, sizeof(xps_part_t)); - if (!part) - return NULL; - - part->name = NULL; - part->size = 0; - part->interleave = 0; - part->capacity = 0; - part->complete = 0; - part->data = NULL; - part->relations = NULL; - part->relations_complete = 0; - - part->font = NULL; - part->image = NULL; - part->icc = NULL; - part->xml = NULL; - - part->deobfuscated = 0; - - part->name = xps_strdup(ctx, name); - if (!part->name) + for (rel = part->relations; rel; rel = rel->next) { - xps_free(ctx, part); - return NULL; + if (!strcmp(rel->type, REL_REQUIRED_RESOURCE_RECURSIVE)) + { + subpart = xps_find_part(ctx, rel->target); + if (!subpart || !subpart->complete) + return 0; + if (!subpart->relations_complete) + { + xps_parse_content_relations(ctx, subpart); + subpart->relations_complete = 1; + } + if (!xps_validate_resources(ctx, subpart)) + return 0; + } + + if (!strcmp(rel->type, REL_REQUIRED_RESOURCE)) + { + subpart = xps_find_part(ctx, rel->target); + if (!subpart || !subpart->complete) + return 0; + } } - if (capacity == 0) - capacity = 1024; + return 1; +} + +/* + * Periodically free old parts and resources that + * will not be used any more. This looks at discard control + * information, and assumes that a given fixed page will + * not be drawn more than once. + */ - part->size = 0; - part->capacity = capacity; - part->data = xps_alloc(ctx, part->capacity); - if (!part->data) +static void +xps_free_used_parts(xps_context_t *ctx) +{ + /* Free parsed resources that were used on the last page */ + xps_part_t *part = ctx->part_list; + while (part) { - xps_free(ctx, part->name); - xps_free(ctx, part); - return NULL; + xps_part_t *next = part->next; + xps_free_part_caches(ctx, part); + part = next; } - part->next = NULL; + /* TODO: Free the data for page parts we have rendered */ + /* TODO: Free the data for parts we don't recognize */ + /* TODO: Parse DiscardControl parts to free stuff */ +} + +/* + * Process the latest part. Parse document structure and metadata + * parts into relation and fixdoc structs. Buffer and save other + * parts for later use. Parse and run the next page when all of its + * resources are available. + */ - /* add it to the list of parts */ - part->next = ctx->first_part; - ctx->first_part = part; +static int +xps_process_part(xps_context_t *ctx, xps_part_t *part) +{ + xps_document_t *fixdoc; - /* add it to the hash table of parts */ - xps_hash_insert(ctx, ctx->part_table, part->name, part); + if (xps_doc_trace && part->complete) + dprintf2("doc: found part %s %s\n", part->name, part->complete ? "" : "(piece)"); - return part; -} + /* + * This is a magic Open Packaging Convention name. + */ -void -xps_free_part_caches(xps_context_t *ctx, xps_part_t *part) -{ -#if 0 - /* Can't free fonts because pdfwrite needs them alive */ - if (part->font) + if (strstr(part->name, "_rels/")) { - xps_free_font(ctx, part->font); - part->font = NULL; + xps_parse_metadata(ctx, part); + + if (part->complete) + { + char realname[1024]; + xps_part_t *realpart; + xps_part_name_from_relation_part_name(realname, part->name); + realpart = xps_find_part(ctx, realname); + if (realpart) + { + realpart->relations_complete = 1; + } + } } - if (part->icc) + /* TODO: DiscardControl parts. + * They are not used by files in the wild, so we don't bother. + */ + + /* + * For the rest we need to track the relationships + * and content-types given by the previous two types. + * + * We can't do anything until we have the relationship + * for the start part. + */ + + if (!ctx->start_part) { - xps_free_colorspace(ctx, part->icc); - part->icc = NULL; + xps_part_t *rootpart; + rootpart = xps_find_part(ctx, "/"); + if (rootpart) + { + xps_relation_t *rel; + for (rel = rootpart->relations; rel; rel = rel->next) + { + if (!strcmp(rel->type, REL_START_PART)) + { + xps_part_t *startpart; + + ctx->start_part = rel->target; + + if (xps_doc_trace) + dprintf1("doc: adding fixdocseq %s\n", ctx->start_part); + + startpart = xps_find_part(ctx, rel->target); + if (startpart) + xps_parse_metadata(ctx, startpart); + } + } + } } -#endif - if (part->image) + /* + * Read the start part (which is a FixedDocumentSequence) if it + * is the current part. + */ + + if (ctx->start_part) { - xps_free_image(ctx, part->image); - part->image = NULL; + if (!strcmp(part->name, ctx->start_part)) + { + xps_parse_metadata(ctx, part); + } } - if (part->xml) + /* + * Follow the FixedDocumentSequence and parse the + * listed FixedDocuments that we have available. + */ + + for (fixdoc = ctx->first_fixdoc; fixdoc; fixdoc = fixdoc->next) { - xps_free_item(ctx, part->xml); - part->xml = NULL; + xps_part_t *fixdocpart = xps_find_part(ctx, fixdoc->name); + if (fixdocpart) + { + xps_parse_metadata(ctx, fixdocpart); + if (!fixdocpart->complete) + break; /* incomplete fixdocpart, try parsing more later */ + } } -} -void -xps_free_part(xps_context_t *ctx, xps_part_t *part) -{ - xps_free_part_caches(ctx, part); + /* + * If we know which page part is next, check if we + * have all the page dependencies. If everything is + * ready: parse and render. + */ - /* Nu-uh, can't free fonts because pdfwrite needs them alive */ - if (part->font) - return; + while (ctx->next_page) + { + xps_part_t *pagepart = xps_find_part(ctx, ctx->next_page->name); + if (pagepart && pagepart->complete) + { + if (!pagepart->relations_complete) + { + xps_parse_content_relations(ctx, pagepart); + pagepart->relations_complete = 1; + } - if (part->name) xps_free(ctx, part->name); - if (part->data) xps_free(ctx, part->data); + if (xps_validate_resources(ctx, pagepart)) + { + int code = xps_parse_fixed_page(ctx, pagepart); + if (code < 0) + return code; - part->name = NULL; - part->data = NULL; + ctx->next_page = ctx->next_page->next; - xps_free_relations(ctx, part->relations); - xps_free(ctx, part); -} + xps_free_used_parts(ctx); -xps_part_t * -xps_find_part(xps_context_t *ctx, char *name) -{ - return xps_hash_lookup(ctx->part_table, name); + continue; + } + } + + break; + } + + return 0; } +/* + * Prepare the part corresponding to the current + * ZIP entry being decompressed. + * Create new parts and aggregate interleaved data. + */ + static int xps_prepare_part(xps_context_t *ctx) { @@ -240,7 +329,7 @@ xps_prepare_part(xps_context_t *ctx) if (!part) return gs_rethrow(-1, "cannot create part buffer"); - ctx->last_part = part; /* make it the current part */ + ctx->current_part = part; /* make it the current part */ } else { @@ -254,10 +343,10 @@ xps_prepare_part(xps_context_t *ctx) return gs_throw(-1, "cannot extend part buffer"); } - ctx->last_part = part; + ctx->current_part = part; } - ctx->last_part->complete = last_piece; + ctx->current_part->complete = last_piece; /* init decompression */ if (ctx->zip_method == 8) @@ -282,11 +371,18 @@ xps_prepare_part(xps_context_t *ctx) } } -/* return -1 = fail, 0 = need more data, 1 = finished */ +/* + * Call zlib to decompress the ZIP entry data. + * Return values: + * -1 = fail + * 0 = need more data + * 1 = finished + */ + static int -xps_read_part(xps_context_t *ctx, stream_cursor_read *buf) +xps_process_part_data(xps_context_t *ctx, stream_cursor_read *buf) { - xps_part_t *part = ctx->last_part; + xps_part_t *part = ctx->current_part; unsigned int crc32; unsigned int csize; unsigned int usize; @@ -304,7 +400,7 @@ xps_read_part(xps_context_t *ctx, stream_cursor_read *buf) return gs_throw(-1, "out of memory"); } - ctx->zip_stream.next_in = buf->ptr + 1; + ctx->zip_stream.next_in = (byte*) buf->ptr + 1; /* discard const */ ctx->zip_stream.avail_in = buf->limit - buf->ptr; ctx->zip_stream.next_out = part->data + part->size; ctx->zip_stream.avail_out = part->capacity - part->size; @@ -372,7 +468,8 @@ xps_read_part(xps_context_t *ctx, stream_cursor_read *buf) if (csize == usize && usize == part->size - part->interleave - sixteen) { - if (crc32 == xps_crc32(0, part->data + part->interleave, part->size - part->interleave - sixteen)) + if (crc32 == xps_crc32(0, part->data + part->interleave, + part->size - part->interleave - sixteen)) { part->size -= sixteen; return 1; @@ -410,6 +507,13 @@ xps_read_part(xps_context_t *ctx, stream_cursor_read *buf) } } +/* + * Parse a cursor full of data. Read and decompress the + * entries in the ZIP stream into the part table. + * Aggregate interleaved parts at this level. When a part + * is completed, call xps_process_part to process it. + */ + int xps_process_data(xps_context_t *ctx, stream_cursor_read *buf) { @@ -418,9 +522,6 @@ xps_process_data(xps_context_t *ctx, stream_cursor_read *buf) /* dprintf1("xps_process_data state=%d\n", ctx->zip_state); */ - if (getenv("XPS_ZIP_TRACE")) - xps_zip_trace = 1; - while (1) { switch (ctx->zip_state) @@ -439,13 +540,10 @@ xps_process_data(xps_context_t *ctx, stream_cursor_read *buf) signature = read4(ctx, buf); if (signature == ZIP_LOCAL_FILE_SIG) { - if (xps_zip_trace) - dputs("zip: local file signature\n"); + /* do nothing */ } else if (signature == ZIP_DATA_DESC_SIG) { - if (xps_zip_trace) - dputs("zip: data desc signature\n"); if (ctx->zip_version >= 45) { (void) read4(ctx, buf); /* crc32 */ @@ -507,7 +605,7 @@ xps_process_data(xps_context_t *ctx, stream_cursor_read *buf) ctx->zip_file_name[ctx->zip_name_length + 1] = 0; if (xps_zip_trace) - dprintf1("zip: entry %s\n", ctx->zip_file_name); + dprintf1("zip: inflating '%s'\n", ctx->zip_file_name + 1); } ctx->zip_state ++; @@ -534,7 +632,7 @@ xps_process_data(xps_context_t *ctx, stream_cursor_read *buf) while (ctx->zip_state == 4) { - code = xps_read_part(ctx, buf); + code = xps_process_part_data(ctx, buf); if (code < 0) return gs_throw(-1, "cannot read part"); if (code == 0) @@ -550,7 +648,7 @@ xps_process_data(xps_context_t *ctx, stream_cursor_read *buf) /* Process contents of part. * This is the entrance to the real parser. */ - code = xps_process_part(ctx, ctx->last_part); + code = xps_process_part(ctx, ctx->current_part); if (code < 0) return gs_rethrow(code, "cannot handle part"); } @@ -559,3 +657,37 @@ xps_process_data(xps_context_t *ctx, stream_cursor_read *buf) return 0; } +int +xps_process_end_of_data(xps_context_t *ctx) +{ + if (xps_doc_trace) + dputs("doc: reached end of file; parsing remaining pages\n"); + + while (ctx->next_page) + { + xps_part_t *pagepart; + pagepart = xps_find_part(ctx, ctx->next_page->name); + if (!pagepart) + dputs(" page part missing\n"); + else if (!pagepart->complete) + dputs(" page part incomplete\n"); + else + { + xps_relation_t *rel; + for (rel = pagepart->relations; rel; rel = rel->next) + { + xps_part_t *subpart = xps_find_part(ctx, rel->target); + if (!subpart) + dprintf1(" resource '%s' missing\n", rel->target); + else if (!subpart->complete) + dprintf1(" resource '%s' incomplete\n", rel->target); + // TODO: recursive resource check... + } + } + + ctx->next_page = ctx->next_page->next; + } + + return gs_okay; +} + diff --git a/xps/xpszipseek.c b/xps/xpszipseek.c new file mode 100644 index 000000000..967724975 --- /dev/null +++ b/xps/xpszipseek.c @@ -0,0 +1,514 @@ +/* Copyright (C) 2006-2008 Artifex Software, Inc. + All Rights Reserved. + + This software is provided AS-IS with no warranty, either express or + implied. + + This software is distributed under license and may not be copied, modified + or distributed except as expressly authorized under the terms of that + license. Refer to licensing information at http://www.artifex.com/ + or contact Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134, + San Rafael, CA 94903, U.S.A., +1(415)492-9861, for further information. +*/ + +/* XPS interpreter - zip container parsing on a seekable file */ + +#include "ghostxps.h" + +static inline int getshort(FILE *file) +{ + int a = getc(file); + int b = getc(file); + return a | (b << 8); +} + +static inline int getlong(FILE *file) +{ + int a = getc(file); + int b = getc(file); + int c = getc(file); + int d = getc(file); + return a | (b << 8) | (c << 16) | (d << 24); +} + +static void * +xps_zip_alloc_items(xps_context_t *ctx, int items, int size) +{ + return xps_alloc(ctx, items * size); +} + +static void +xps_zip_free(xps_context_t *ctx, void *ptr) +{ + xps_free(ctx, ptr); +} + +static int +xps_compare_entries(const void *a0, const void *b0) +{ + xps_entry_t *a = (xps_entry_t*) a0; + xps_entry_t *b = (xps_entry_t*) b0; + return xps_strcasecmp(a->name, b->name); +} + +static xps_entry_t * +xps_find_zip_entry(xps_context_t *ctx, char *name) +{ + int l = 0; + int r = ctx->zip_count - 1; + while (l <= r) + { + int m = (l + r) >> 1; + int c = xps_strcasecmp(name, ctx->zip_table[m].name); + if (c < 0) + r = m - 1; + else if (c > 0) + l = m + 1; + else + return &ctx->zip_table[m]; + } + return NULL; +} + +static int +xps_inflate_zip_entry(xps_context_t *ctx, xps_entry_t *ent, unsigned char *outbuf) +{ + z_stream stream; + unsigned char *inbuf; + int sig; + int version, general, method; + int namelength, extralength; + int code; + + if (xps_zip_trace) + dprintf1("zip: inflating entry '%s'\n", ent->name); + + fseek(ctx->file, ent->offset, 0); + + sig = getlong(ctx->file); + if (sig != ZIP_LOCAL_FILE_SIG) + return gs_throw1(-1, "wrong zip local file signature (0x%x)", sig); + + version = getshort(ctx->file); + general = getshort(ctx->file); + method = getshort(ctx->file); + (void) getshort(ctx->file); /* file time */ + (void) getshort(ctx->file); /* file date */ + (void) getlong(ctx->file); /* crc-32 */ + (void) getlong(ctx->file); /* csize */ + (void) getlong(ctx->file); /* usize */ + namelength = getshort(ctx->file); + extralength = getshort(ctx->file); + + fseek(ctx->file, namelength + extralength, 1); + + if (method == 0) + { + fread(outbuf, 1, ent->usize, ctx->file); + } + else if (method == 8) + { + inbuf = xps_alloc(ctx, ent->csize); + + fread(inbuf, 1, ent->csize, ctx->file); + + memset(&stream, 0, sizeof(z_stream)); + stream.zalloc = (alloc_func) xps_zip_alloc_items; + stream.zfree = (free_func) xps_zip_free; + stream.opaque = ctx; + stream.next_in = inbuf; + stream.avail_in = ent->csize; + stream.next_out = outbuf; + stream.avail_out = ent->usize; + + code = inflateInit2(&stream, -15); + if (code != Z_OK) + return gs_throw1(-1, "zlib inflateInit2 error: %s", stream.msg); + code = inflate(&stream, Z_FINISH); + if (code != Z_STREAM_END) + { + inflateEnd(&stream); + return gs_throw1(-1, "zlib inflate error: %s", stream.msg); + } + code = inflateEnd(&stream); + if (code != Z_OK) + return gs_throw1(-1, "zlib inflateEnd error: %s", stream.msg); + + xps_free(ctx, inbuf); + } + else + { + return gs_throw1(-1, "unknown compression method (%d)", method); + } + + return gs_okay; +} + +static xps_part_t * +xps_read_zip_entry(xps_context_t *ctx, char *name, xps_entry_t *ent) +{ + char buf[2048]; + xps_part_t *part; + int code; + + strcpy(buf, "/"); + strcat(buf, name); + + part = xps_find_part(ctx, buf); + if (!part) + { + part = xps_new_part(ctx, buf, ent->usize); + if (!part) + { + gs_rethrow1(-1, "cannot create part for zip entry '%s'", name); + return NULL; + } + } + + if (part->capacity < ent->usize) + { + part->data = xps_realloc(ctx, part->data, ent->usize); + if (!part->data) + { + gs_throw(-1, "cannot extend part buffer"); + return NULL; + } + } + + code = xps_inflate_zip_entry(ctx, ent, part->data); + if (code) + { + gs_rethrow1(code, "cannot inflate zip entry '%s'", name); + return NULL; + } + + part->size = ent->usize; + part->complete = 1; + + return part; +} + +static xps_part_t * +xps_read_zip_interleaved_entries(xps_context_t *ctx, char *name, int count, int size) +{ + char buf[2048]; + xps_part_t *part; + xps_entry_t *ent; + int code; + int i; + + strcpy(buf, "/"); + strcat(buf, name); + + if (xps_zip_trace) + dprintf3("zip: interleaved part '%s' pieces=%d size=%d\n", buf, count, size); + + part = xps_find_part(ctx, buf); + if (!part) + { + part = xps_new_part(ctx, buf, size); + if (!part) + { + gs_rethrow1(-1, "cannot create part for zip entry '%s'", buf); + return NULL; + } + } + + if (part->capacity < size) + { + part->data = xps_realloc(ctx, part->data, size); + if (!part->data) + { + gs_throw(-1, "cannot extend part buffer"); + return NULL; + } + } + + for (i = 0; i < count; i++) + { + if (i == count - 1) + sprintf(buf, "%s/[%d].last.piece", name, i); + else + sprintf(buf, "%s/[%d].piece", name, i); + + ent = xps_find_zip_entry(ctx, buf); + if (!ent) + { + gs_throw1(-1, "cannot find zip entry '%s'", buf); + return NULL; + } + + code = xps_inflate_zip_entry(ctx, ent, part->data + part->size); + if (code) + { + gs_rethrow1(code, "cannot inflate zip entry '%s'", buf); + return NULL; + } + + part->size += ent->usize; + } + + part->complete = 1; + + return part; +} + +/* + * Find and read the contents of a part. + * De-interleaves and reassembles if necessary. + */ +xps_part_t * +xps_read_zip_part(xps_context_t *ctx, char *name) +{ + char buf[2048]; + xps_entry_t *ent; + int count, size; + + /* skip leading '/' */ + if (name[0] == '/') + name ++; + + ent = xps_find_zip_entry(ctx, name); + if (ent) + return xps_read_zip_entry(ctx, name, ent); + + count = 0; + size = 0; + do + { + sprintf(buf, "%s/[%d].piece", name, count); + ent = xps_find_zip_entry(ctx, buf); + if (!ent) + { + sprintf(buf, "%s/[%d].last.piece", name, count); + ent = xps_find_zip_entry(ctx, buf); + } + if (ent) + { + count ++; + size += ent->usize; + } + } while (ent); + if (count) + { + return xps_read_zip_interleaved_entries(ctx, name, count, size); + } + + return NULL; +} + +static int +xps_read_zip_dir(xps_context_t *ctx, int offset, int count) +{ + int sig; + int namesize, metasize, commentsize; + int i; + + fseek(ctx->file, offset, 0); + + for (i = 0; i < count; i++) + { + sig = getlong(ctx->file); + if (sig != ZIP_CENTRAL_DIRECTORY_SIG) + return gs_throw1(-1, "wrong zip central directory signature (0x%x)", sig); + + (void) getshort(ctx->file); /* version made by */ + (void) getshort(ctx->file); /* version to extract */ + (void) getshort(ctx->file); /* general */ + (void) getshort(ctx->file); /* method */ + (void) getshort(ctx->file); /* last mod file time */ + (void) getshort(ctx->file); /* last mod file date */ + (void) getlong(ctx->file); /* crc-32 */ + ctx->zip_table[i].csize = getlong(ctx->file); + ctx->zip_table[i].usize = getlong(ctx->file); + namesize = getshort(ctx->file); + metasize = getshort(ctx->file); + commentsize = getshort(ctx->file); + (void) getshort(ctx->file); /* disk number start */ + (void) getshort(ctx->file); /* int file atts */ + (void) getlong(ctx->file); /* ext file atts */ + ctx->zip_table[i].offset = getlong(ctx->file); + + ctx->zip_table[i].name = xps_alloc(ctx, namesize + 1); + if (!ctx->zip_table[i].name) + return gs_throw(-1, "cannot allocate zip entry name"); + + fread(ctx->zip_table[i].name, 1, namesize, ctx->file); + ctx->zip_table[i].name[namesize] = 0; + + fseek(ctx->file, metasize, 1); + fseek(ctx->file, commentsize, 1); + } + + qsort(ctx->zip_table, count, sizeof(xps_entry_t), xps_compare_entries); + + if (xps_zip_trace) + { + for (i = 0; i < ctx->zip_count; i++) + { + dprintf3("zip entry '%s' csize=%d usize=%d\n", + ctx->zip_table[i].name, + ctx->zip_table[i].csize, + ctx->zip_table[i].usize); + } + } + + return gs_okay; +} + +static int +xps_read_zip_end_of_dir(xps_context_t *ctx, int start_offset) +{ + int sig; + int count; + int offset; + + fseek(ctx->file, start_offset, 0); + + sig = getlong(ctx->file); + if (sig != ZIP_END_OF_CENTRAL_DIRECTORY_SIG) + return gs_throw1(-1, "wrong zip end of central directory signature (0x%x)", sig); + + (void) getshort(ctx->file); /* this disk */ + (void) getshort(ctx->file); /* start disk */ + (void) getshort(ctx->file); /* entries in this disk */ + count = getshort(ctx->file); /* entries in central directory disk */ + (void) getlong(ctx->file); /* size of central directory */ + offset = getlong(ctx->file); /* offset to central directory */ + + ctx->zip_count = count; + ctx->zip_table = xps_alloc(ctx, sizeof(xps_entry_t) * count); + if (!ctx->zip_table) + return gs_throw(-1, "cannot allocate zip entry table"); + + memset(ctx->zip_table, 0, sizeof(xps_entry_t) * count); + + return xps_read_zip_dir(ctx, offset, count); +} + +static int +xps_find_zip_end_of_dir(xps_context_t *ctx) +{ + int filesize, back, maxback; + int i, n; + char buf[512]; + + fseek(ctx->file, 0, SEEK_END); + filesize = ftell(ctx->file); + + maxback = MIN(filesize, 0xFFFF + sizeof buf); + back = MIN(maxback, sizeof buf); + + while (back < maxback) + { + fseek(ctx->file, filesize - back, 0); + + n = fread(buf, 1, sizeof buf, ctx->file); + if (n < 0) + return gs_throw(-1, "cannot read end of central directory"); + + for (i = n - 4; i > 0; i--) + if (!memcmp(buf + i, "PK\5\6", 4)) + return xps_read_zip_end_of_dir(ctx, filesize - back + i); + + back += sizeof buf - 4; + } + + return gs_throw(-1, "cannot find end of central directory"); +} + +static int +xps_read_and_process_metadata_part(xps_context_t *ctx, char *name) +{ + xps_part_t *part; + int code; + + part = xps_read_zip_part(ctx, name); + if (!part) + return gs_rethrow1(-1, "cannot read zip part '%s'", name); + + code = xps_parse_metadata(ctx, part); + if (code) + return gs_rethrow1(code, "cannot process metadata part '%s'", name); + + xps_release_part(ctx, part); + + return gs_okay; +} + +static int +xps_read_and_process_page_part(xps_context_t *ctx, char *name) +{ + xps_part_t *part; + int code; + + part = xps_read_zip_part(ctx, name); + if (!part) + return gs_rethrow1(-1, "cannot read zip part '%s'", name); + + code = xps_parse_fixed_page(ctx, part); + if (code) + return gs_rethrow1(code, "cannot parse fixed page part '%s'", name); + + xps_release_part(ctx, part); + + return gs_okay; +} + +int +xps_process_file(xps_context_t *ctx, FILE *file) +{ + xps_relation_t *rel; + xps_document_t *doc; + xps_page_t *page; + xps_part_t *part; + int code; + + ctx->file = file; + + code = xps_find_zip_end_of_dir(ctx); + if (code < 0) + return gs_rethrow(code, "cannot read zip central directory"); + + code = xps_read_and_process_metadata_part(ctx, "/_rels/.rels"); + if (code) + return gs_rethrow(code, "cannot process root relationship part"); + + part = xps_find_part(ctx, "/"); + if (!part) + return gs_rethrow(code, "cannot find root part"); + + for (rel = part->relations; rel; rel = rel->next) + { + if (!strcmp(rel->type, REL_START_PART)) + { + ctx->start_part = rel->target; + if (xps_doc_trace) + dprintf1("doc: adding fixdocseq %s\n", ctx->start_part); + } + } + + if (!ctx->start_part) + return gs_throw(-1, "cannot find fixed document sequence relationship"); + + code = xps_read_and_process_metadata_part(ctx, ctx->start_part); + if (code) + return gs_rethrow(code, "cannot process FixedDocumentSequence part"); + + for (doc = ctx->first_fixdoc; doc; doc = doc->next) + { + code = xps_read_and_process_metadata_part(ctx, doc->name); + if (code) + return gs_rethrow(code, "cannot process FixedDocument part"); + } + + for (page = ctx->first_page; page; page = page->next) + { + code = xps_read_and_process_page_part(ctx, page->name); + if (code) + return gs_rethrow(code, "cannot process FixedPage part"); + } + + return gs_okay; +} + |