diff options
author | Tim Kientzle <kientzle@acm.org> | 2016-01-03 18:11:46 -0800 |
---|---|---|
committer | Tim Kientzle <kientzle@acm.org> | 2016-01-03 18:11:46 -0800 |
commit | 4cd17347b4c35a0c06c2b13ee30e018bbcef6677 (patch) | |
tree | ec64c93872eca2189a376b97ae704406a4ee15a9 | |
parent | 3ea734488052804ff5fd47f6691073ca215e1110 (diff) | |
download | libarchive-utf8.tar.gz |
First implementation of UTF-8 path support for Zip reader.utf8
This uses the existing "update_pathname_utf8" logic to
accept the UTF8 pathname. The tests verify that this
supports UTF-8 paths stored using GP#11 or 0x7075 extension.
More testing is certainly needed...
Note: A lot of the diff here is reshuffling of an internal API
so it can accept non-null-terminated strings.
-rw-r--r-- | Makefile.am | 1 | ||||
-rw-r--r-- | libarchive/archive_entry.c | 45 | ||||
-rw-r--r-- | libarchive/archive_entry.h | 2 | ||||
-rw-r--r-- | libarchive/archive_read_support_format_zip.c | 55 | ||||
-rw-r--r-- | libarchive/archive_string.c | 8 | ||||
-rw-r--r-- | libarchive/archive_string.h | 2 | ||||
-rw-r--r-- | libarchive/test/test_read_format_zip_utf8_paths.c | 263 | ||||
-rw-r--r-- | libarchive/test/test_read_format_zip_utf8_paths.zip.uu | 62 |
8 files changed, 284 insertions, 154 deletions
diff --git a/Makefile.am b/Makefile.am index 0c04b54b..72dd6a01 100644 --- a/Makefile.am +++ b/Makefile.am @@ -790,7 +790,6 @@ libarchive_test_EXTRA_DIST=\ libarchive/test/test_read_format_zip_sfx.uu \ libarchive/test/test_read_format_zip_symlink.zip.uu \ libarchive/test/test_read_format_zip_traditional_encryption_data.zip.uu \ - libarchive/test/test_read_format_zip_utf8_paths.zip.uu \ libarchive/test/test_read_format_zip_ux.zip.uu \ libarchive/test/test_read_format_zip_winzip_aes128.zip.uu \ libarchive/test/test_read_format_zip_winzip_aes256.zip.uu \ diff --git a/libarchive/archive_entry.c b/libarchive/archive_entry.c index 4ac19660..cfff7725 100644 --- a/libarchive/archive_entry.c +++ b/libarchive/archive_entry.c @@ -852,8 +852,8 @@ archive_entry_copy_gname_w(struct archive_entry *entry, const wchar_t *name) int archive_entry_update_gname_utf8(struct archive_entry *entry, const char *name) { - if (archive_mstring_update_utf8(entry->archive, - &entry->ae_gname, name) == 0) + if (archive_mstring_update_utf8_len(entry->archive, + &entry->ae_gname, name, name == NULL ? 0 : strlen(name)) == 0) return (1); if (errno == ENOMEM) __archive_errx(1, "No memory"); @@ -930,8 +930,8 @@ archive_entry_update_hardlink_utf8(struct archive_entry *entry, const char *targ entry->ae_set |= AE_SET_HARDLINK; else entry->ae_set &= ~AE_SET_HARDLINK; - if (archive_mstring_update_utf8(entry->archive, - &entry->ae_hardlink, target) == 0) + if (archive_mstring_update_utf8_len(entry->archive, + &entry->ae_hardlink, target, target == NULL ? 0 : strlen(target)) == 0) return (1); if (errno == ENOMEM) __archive_errx(1, "No memory"); @@ -1075,11 +1075,11 @@ archive_entry_update_link_utf8(struct archive_entry *entry, const char *target) { int r; if (entry->ae_set & AE_SET_SYMLINK) - r = archive_mstring_update_utf8(entry->archive, - &entry->ae_symlink, target); + r = archive_mstring_update_utf8_len(entry->archive, + &entry->ae_symlink, target, target == NULL ? 0 : strlen(target)); else - r = archive_mstring_update_utf8(entry->archive, - &entry->ae_hardlink, target); + r = archive_mstring_update_utf8_len(entry->archive, + &entry->ae_hardlink, target, target == NULL ? 0 : strlen(target)); if (r == 0) return (1); if (errno == ENOMEM) @@ -1152,6 +1152,12 @@ archive_entry_copy_pathname(struct archive_entry *entry, const char *name) } void +archive_entry_copy_pathname_len(struct archive_entry *entry, const char *name, size_t length) +{ + archive_mstring_copy_mbs_len(&entry->ae_pathname, name, length); +} + +void archive_entry_copy_pathname_w(struct archive_entry *entry, const wchar_t *name) { archive_mstring_copy_wcs(&entry->ae_pathname, name); @@ -1160,8 +1166,19 @@ archive_entry_copy_pathname_w(struct archive_entry *entry, const wchar_t *name) int archive_entry_update_pathname_utf8(struct archive_entry *entry, const char *name) { - if (archive_mstring_update_utf8(entry->archive, - &entry->ae_pathname, name) == 0) + if (archive_mstring_update_utf8_len(entry->archive, + &entry->ae_pathname, name, name == NULL ? 0 : strlen(name)) == 0) + return (1); + if (errno == ENOMEM) + __archive_errx(1, "No memory"); + return (0); +} + +int +archive_entry_update_pathname_utf8_len(struct archive_entry *entry, const char *name, size_t length) +{ + if (archive_mstring_update_utf8_len(entry->archive, + &entry->ae_pathname, name, length) == 0) return (1); if (errno == ENOMEM) __archive_errx(1, "No memory"); @@ -1282,8 +1299,8 @@ archive_entry_update_symlink_utf8(struct archive_entry *entry, const char *linkn entry->ae_set |= AE_SET_SYMLINK; else entry->ae_set &= ~AE_SET_SYMLINK; - if (archive_mstring_update_utf8(entry->archive, - &entry->ae_symlink, linkname) == 0) + if (archive_mstring_update_utf8_len(entry->archive, + &entry->ae_symlink, linkname, linkname == NULL ? 0 : strlen(linkname)) == 0) return (1); if (errno == ENOMEM) __archive_errx(1, "No memory"); @@ -1339,8 +1356,8 @@ archive_entry_copy_uname_w(struct archive_entry *entry, const wchar_t *name) int archive_entry_update_uname_utf8(struct archive_entry *entry, const char *name) { - if (archive_mstring_update_utf8(entry->archive, - &entry->ae_uname, name) == 0) + if (archive_mstring_update_utf8_len(entry->archive, + &entry->ae_uname, name, name == NULL ? 0 : strlen(name)) == 0) return (1); if (errno == ENOMEM) __archive_errx(1, "No memory"); diff --git a/libarchive/archive_entry.h b/libarchive/archive_entry.h index 06740926..ecd02a79 100644 --- a/libarchive/archive_entry.h +++ b/libarchive/archive_entry.h @@ -305,8 +305,10 @@ __LA_DECL void archive_entry_set_nlink(struct archive_entry *, unsigned int); __LA_DECL void archive_entry_set_pathname(struct archive_entry *, const char *); __LA_DECL void archive_entry_set_pathname_utf8(struct archive_entry *, const char *); __LA_DECL void archive_entry_copy_pathname(struct archive_entry *, const char *); +__LA_DECL void archive_entry_copy_pathname_len(struct archive_entry *, const char *, size_t); __LA_DECL void archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *); __LA_DECL int archive_entry_update_pathname_utf8(struct archive_entry *, const char *); +__LA_DECL int archive_entry_update_pathname_utf8_len(struct archive_entry *, const char *, size_t); __LA_DECL void archive_entry_set_perm(struct archive_entry *, __LA_MODE_T); __LA_DECL void archive_entry_set_rdev(struct archive_entry *, dev_t); __LA_DECL void archive_entry_set_rdevmajor(struct archive_entry *, dev_t); diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c index c0b47c86..a446b81c 100644 --- a/libarchive/archive_read_support_format_zip.c +++ b/libarchive/archive_read_support_format_zip.c @@ -410,7 +410,7 @@ zip_time(const char *p) * triplets. id and size are 2 bytes each. */ static void -process_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry) +process_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry, struct archive_entry *entry) { unsigned offset = 0; @@ -626,6 +626,11 @@ process_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry) } break; } + case 0x7075: + if (entry != NULL) { + archive_entry_update_pathname_utf8_len(entry, p + offset, datasize); + } + break; case 0x7855: /* Info-ZIP Unix Extra Field (type 2) "Ux". */ #ifdef DEBUG @@ -780,33 +785,27 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, return (ARCHIVE_FATAL); } if (zip_entry->zip_flags & ZIP_UTF8_NAME) { - /* The filename is stored to be UTF-8. */ - if (zip->sconv_utf8 == NULL) { - zip->sconv_utf8 = - archive_string_conversion_from_charset( - &a->archive, "UTF-8", 1); - if (zip->sconv_utf8 == NULL) - return (ARCHIVE_FATAL); - } - sconv = zip->sconv_utf8; - } else if (zip->sconv != NULL) - sconv = zip->sconv; - else - sconv = zip->sconv_default; + archive_entry_update_pathname_utf8_len(entry, h, filename_length); + } else { + if (zip->sconv != NULL) + sconv = zip->sconv; + else + sconv = zip->sconv_default; - if (archive_entry_copy_pathname_l(entry, - h, filename_length, sconv) != 0) { - if (errno == ENOMEM) { - archive_set_error(&a->archive, ENOMEM, - "Can't allocate memory for Pathname"); - return (ARCHIVE_FATAL); + if (archive_entry_copy_pathname_l(entry, + h, filename_length, sconv) != 0) { + if (errno == ENOMEM) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for Pathname"); + return (ARCHIVE_FATAL); + } + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Pathname cannot be converted " + "from %s to current locale.", + archive_string_conversion_charset_name(sconv)); + ret = ARCHIVE_WARN; } - archive_set_error(&a->archive, - ARCHIVE_ERRNO_FILE_FORMAT, - "Pathname cannot be converted " - "from %s to current locale.", - archive_string_conversion_charset_name(sconv)); - ret = ARCHIVE_WARN; } __archive_read_consume(a, filename_length); @@ -850,7 +849,7 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, return (ARCHIVE_FATAL); } - process_extra(h, extra_length, zip_entry); + process_extra(h, extra_length, zip_entry, entry); __archive_read_consume(a, extra_length); if (zip_entry->flags & LA_FROM_CENTRAL_DIRECTORY) { @@ -2630,7 +2629,7 @@ slurp_central_directory(struct archive_read *a, struct zip *zip) "Truncated ZIP file header"); return ARCHIVE_FATAL; } - process_extra(p + filename_length, extra_length, zip_entry); + process_extra(p + filename_length, extra_length, zip_entry, NULL); /* * Mac resource fork files are stored under the diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c index 3d4be825..4db842fb 100644 --- a/libarchive/archive_string.c +++ b/libarchive/archive_string.c @@ -4152,8 +4152,8 @@ archive_mstring_copy_mbs_len_l(struct archive_mstring *aes, * usable values even if some of the character conversions are failing.) */ int -archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes, - const char *utf8) +archive_mstring_update_utf8_len(struct archive *a, struct archive_mstring *aes, + const char *utf8, size_t length) { struct archive_string_conv *sc; int r; @@ -4164,7 +4164,7 @@ archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes, } /* Save the UTF8 string. */ - archive_strcpy(&(aes->aes_utf8), utf8); + archive_strncpy(&(aes->aes_utf8), utf8, length); /* Empty the mbs and wcs strings. */ archive_string_empty(&(aes->aes_mbs)); @@ -4176,7 +4176,7 @@ archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes, sc = archive_string_conversion_from_charset(a, "UTF-8", 1); if (sc == NULL) return (-1);/* Couldn't allocate memory for sc. */ - r = archive_strcpy_l(&(aes->aes_mbs), utf8, sc); + r = archive_strncpy_l(&(aes->aes_mbs), utf8, length, sc); if (a == NULL) free_sconv_object(sc); if (r != 0) diff --git a/libarchive/archive_string.h b/libarchive/archive_string.h index 23f49165..6f7d2e5a 100644 --- a/libarchive/archive_string.h +++ b/libarchive/archive_string.h @@ -233,7 +233,7 @@ int archive_mstring_copy_wcs_len(struct archive_mstring *, const wchar_t *wcs, size_t); int archive_mstring_copy_mbs_len_l(struct archive_mstring *, const char *mbs, size_t, struct archive_string_conv *); -int archive_mstring_update_utf8(struct archive *, struct archive_mstring *aes, const char *utf8); +int archive_mstring_update_utf8_len(struct archive *, struct archive_mstring *aes, const char *utf8, size_t length); #endif diff --git a/libarchive/test/test_read_format_zip_utf8_paths.c b/libarchive/test/test_read_format_zip_utf8_paths.c index a7034162..ea4738b4 100644 --- a/libarchive/test/test_read_format_zip_utf8_paths.c +++ b/libarchive/test/test_read_format_zip_utf8_paths.c @@ -26,68 +26,243 @@ #include "test.h" __FBSDID("$FreeBSD$"); -static void -verify(struct archive *a) { +/* + * This collection of tests tries to verify that libarchive correctly + * handles Zip UTF-8 filenames stored in various fashions, including + * boundary cases where the different copies of the filename don't + * agree with each other. + * + * A UTF8 filename can appear in a Zip file in three different fashions. + * + * Unmarked: If bit 11 of the GP bit flag is not set, then the + * filename is stored in an unspecified encoding which may or may not + * be UTF-8. Practically speaking, decoders can make no assumptions + * about the filename encoding. + * + * GP bit flag #11: If this bit is set, then the Filename and File + * comment should be stored in UTF-8. + * + * Extra field 0x7075: This field was added by Info-ZIP. It stores a + * second copy of the filename in UTF-8. Note this second filename + * may not be the same encoding -- or even the same name -- as the primary + * filename. It makes no assertion about the character set used by + * the file comment. + * + * Also note that the above can appear in the local file header or the + * central directory or both and may or may not agree in any of those + * cases. In the worst case, we may have four different filenames for + * a single entry: The local file header can have both a regular filename + * (in UTF-8 or not) and the 0x7075 extension, the central directory + * would also have both, and all four names could be different. + */ + +/* + * Case 1: Use GP#11 to flag UTF-8 filename in local file header, + * but central directory has a different name. + */ +static const unsigned char case1[] = { + /* Local file header */ + 0x50, 0x4b, 0x03, 0x04, /* PK\003\004 */ + 0x20, 0x00, /* Version needed to extract: 2.0 */ + 0x00, 0x08, /* General purpose bit flag: 0x0800 == UTF8 filename */ + 0x00, 0x00, /* Compression method: None */ + 0x00, 0x00, /* Last mod time */ + 0x00, 0x00, /* Last mod date */ + 0x00, 0x00, 0x00, 0x00, /* CRC32 */ + 0x04, 0x00, 0x00, 0x00, /* Compressed size: 4 */ + 0x04, 0x00, 0x00, 0x00, /* Uncompressed size: 4 */ + 0x0a, 0x00, /* Filename length: 5 */ + 0x00, 0x00, /* Extra field lenght: 0 */ + 0x41, 0x42, 0x43, 0xE2, 0x86, 0x92, 0x2e, 0x74, 0x78, 0x74, /* Filename: ABC<right arrow>.txt */ + /* Extra field: Not present */ + + /* File data */ + 0x41, 0x42, 0x43, 0x0a, /* "ABC\n" */ + + /* Central directory header */ + 0x50, 0x4b, 0x01, 0x02, /* PK\001\002 */ + 0x20, 0x00, /* Version made by: 2.0 for MSDOS */ + 0x20, 0x00, /* Version needed to extract: 2.0 */ + 0x00, 0x08, /* General purpose bit flag: bit 11 = UTF8 filename */ + 0x00, 0x00, /* Compression method: None */ + 0x00, 0x00, /* Last mod time */ + 0x00, 0x00, /* Last mod date */ + 0x00, 0x00, 0x00, 0x00, /* CRC32 */ + 0x04, 0x00, 0x00, 0x00, /* Compressed size: 4 */ + 0x04, 0x00, 0x00, 0x00, /* Uncompressed size: 4 */ + 0x05, 0x00, /* Filename length */ + 0x00, 0x00, /* Extra field length: 0 */ + 0x00, 0x00, /* Comment length: 0 */ + 0x00, 0x00, /* Disk number start: 0 */ + 0x00, 0x00, /* Internal file attributes */ + 0x00, 0x00, 0x00, 0x00, /* External file attributes */ + 0x00, 0x00, 0x00, 0x00, /* Offset of local header */ + 0x41, 0x2e, 0x74, 0x78, 0x74, /* File name */ + /* Extra field: not present */ + /* File comment: not present */ + + /* End of central directory record */ + 0x50, 0x4b, 0x05, 0x06, /* PK\005\006 */ + 0x00, 0x00, /* Number of this disk: 0 */ + 0x00, 0x00, /* Central directory starts on this disk: 0 */ + 0x01, 0x00, /* Total CD entries on this disk: 1 */ + 0x01, 0x00, /* Total CD entries: 1 */ + 0x33, 0x00, 0x00, 0x00, /* Size of CD in bytes */ + 0x2c, 0x00, 0x00, 0x00, /* Offset of start of CD */ + 0x00, 0x00, /* Length of archive comment: 0 */ + /* Archive comment: not present */ +}; + +DEFINE_TEST(test_read_format_zip_utf8_paths_case1_seeking) +{ + struct archive *a; struct archive_entry *ae; - const wchar_t *wp; - int file, i; - - /* - * Test file has a pattern to all names: They all have a - * number followed by " - " and an accented character. This - * archive was created by Windows and has regular filenames in - * some MBCS and uses the Zip 0x7075 extension to hold UTF-8 - * pathnames. The code below checks that the correct - * (Unicode) characters are decoded by comparing the number to - * the expected accented character. - */ - - for (file = 0; file < 20; ++file) { - assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); - assert((wp = archive_entry_pathname_w(ae)) != NULL); - if (wp) { - for (i = 0; wp[i] != 0; ++i) { - if (wp[i] == '2') { - failure("Unicode 'o with umlaut' expected"); - assertEqualInt(wp[i + 4], 0xF6); - } else if (wp[i] == '3') { - failure("Unicode 'a with umlaut' expected"); - assertEqualInt(wp[i + 4], 0xE4); - } else if (wp[i] == '4') { - failure("Unicode 'a with ring' expected"); - assertEqualInt(wp[i + 4], 0xE5); - } - } - } - } - assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + + /* Verify with seeking reader. */ + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); + assertEqualIntA(a, ARCHIVE_OK, read_open_memory_seek(a, case1, sizeof(case1), 7)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString(archive_entry_pathname(ae), NULL); + assertEqualString(archive_entry_pathname_utf8(ae), "ABC\xe2\x86\x92.txt"); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); } -DEFINE_TEST(test_read_format_zip_utf8_paths) +DEFINE_TEST(test_read_format_zip_utf8_paths_case1_streaming) { - const char *refname = "test_read_format_zip_utf8_paths.zip"; struct archive *a; - char *p; - size_t s; + struct archive_entry *ae; - extract_reference_file(refname); + /* Verify with streaming reader. */ + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); + assertEqualIntA(a, ARCHIVE_OK, read_open_memory(a, case1, sizeof(case1), 31)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString(archive_entry_pathname(ae), NULL); + assertEqualString(archive_entry_pathname_utf8(ae), "ABC\xe2\x86\x92.txt"); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_free(a)); +} + +/* + * TODO: Case 2: GP#11 is used, but filename is not valid UTF-8. + * This should always cause an error; malformed UTF-8 should never happen. + */ + +/* + * TODO: Case 3: Store UTF-8 filename using extra field 0x7075 + * 0x7075 filename and regular filename have identical bytes but + * regular filename is not marked with GP#11 bit. + * + * Note: Central dir entry has only "A.txt" and no 0x7075 extension. + */ +static const unsigned char case3[] = { + /* Local file header */ + 0x50, 0x4b, 0x03, 0x04, /* PK\003\004 */ + 0x20, 0x00, /* Version needed to extract: 2.0 */ + 0x00, 0x00, /* General purpose bit flag: 0x0000 */ + 0x00, 0x00, /* Compression method: None */ + 0x00, 0x00, /* Last mod time */ + 0x00, 0x00, /* Last mod date */ + 0x00, 0x00, 0x00, 0x00, /* CRC32 */ + 0x04, 0x00, 0x00, 0x00, /* Compressed size: 4 */ + 0x04, 0x00, 0x00, 0x00, /* Uncompressed size: 4 */ + 0x0a, 0x00, /* Filename length: 10 */ + 0x0e, 0x00, /* Extra field length: 14 */ + 0x41, 0x42, 0x43, 0xE2, 0x86, 0x92, 0x2e, 0x74, 0x78, 0x74, /* Filename: ABC<right arrow>.txt */ + 0x75, 0x70, 0x0a, 0x00, 0x41, 0x42, 0x43, 0xE2, 0x86, 0x92, 0x2e, 0x74, 0x78, 0x74, /* Extra field: 0x7075 */ + + /* File data */ + 0x41, 0x42, 0x43, 0x0a, /* "ABC\n" */ + + /* Central directory header */ + 0x50, 0x4b, 0x01, 0x02, /* PK\001\002 */ + 0x20, 0x00, /* Version made by: 2.0 for MSDOS */ + 0x20, 0x00, /* Version needed to extract: 2.0 */ + 0x00, 0x08, /* General purpose bit flag: bit 11 = UTF8 filename */ + 0x00, 0x00, /* Compression method: None */ + 0x00, 0x00, /* Last mod time */ + 0x00, 0x00, /* Last mod date */ + 0x00, 0x00, 0x00, 0x00, /* CRC32 */ + 0x04, 0x00, 0x00, 0x00, /* Compressed size: 4 */ + 0x04, 0x00, 0x00, 0x00, /* Uncompressed size: 4 */ + 0x05, 0x00, /* Filename length */ + 0x00, 0x00, /* Extra field length: 0 */ + 0x00, 0x00, /* Comment length: 0 */ + 0x00, 0x00, /* Disk number start: 0 */ + 0x00, 0x00, /* Internal file attributes */ + 0x00, 0x00, 0x00, 0x00, /* External file attributes */ + 0x00, 0x00, 0x00, 0x00, /* Offset of local header */ + 0x41, 0x2e, 0x74, 0x78, 0x74, /* File name */ + /* No extra fields */ + /* File comment: not present */ + + /* End of central directory record */ + 0x50, 0x4b, 0x05, 0x06, /* PK\005\006 */ + 0x00, 0x00, /* Number of this disk: 0 */ + 0x00, 0x00, /* Central directory starts on this disk: 0 */ + 0x01, 0x00, /* Total CD entries on this disk: 1 */ + 0x01, 0x00, /* Total CD entries: 1 */ + 0x33, 0x00, 0x00, 0x00, /* Size of CD in bytes */ + 0x3a, 0x00, 0x00, 0x00, /* Offset of start of CD */ + 0x00, 0x00, /* Length of archive comment: 0 */ + /* Archive comment: not present */ +}; + +DEFINE_TEST(test_read_format_zip_utf8_paths_case3_seeking) +{ + struct archive *a; + struct archive_entry *ae; /* Verify with seeking reader. */ assert((a = archive_read_new()) != NULL); assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); - assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 10240)); - verify(a); + assertEqualIntA(a, ARCHIVE_OK, read_open_memory_seek(a, case3, sizeof(case3), 7)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString(archive_entry_pathname(ae), NULL); + assertEqualString(archive_entry_pathname_utf8(ae), "ABC\xe2\x86\x92.txt"); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); +} + +DEFINE_TEST(test_read_format_zip_utf8_paths_case3_streaming) +{ + struct archive *a; + struct archive_entry *ae; /* Verify with streaming reader. */ - p = slurpfile(&s, refname); assert((a = archive_read_new()) != NULL); assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); - assertEqualIntA(a, ARCHIVE_OK, read_open_memory(a, p, s, 31)); - verify(a); + assertEqualIntA(a, ARCHIVE_OK, read_open_memory(a, case3, sizeof(case3), 31)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString(archive_entry_pathname(ae), NULL); + assertEqualString(archive_entry_pathname_utf8(ae), "ABC\xe2\x86\x92.txt"); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); assertEqualIntA(a, ARCHIVE_OK, archive_free(a)); } + + +/* + * TODO: Case 4: As with Case 3, but the two filenames are not + * the same. + */ + +/* + * TODO: Case 5: GP#11 and extra field 0x7075 both used, but + * store different names. + */ + +/* + * TODO: Similar cases where the local file header and central directory + * disagree. Seeking reader should always use the CD version, streaming + * reader must necessarily always use the local file header version. + */ diff --git a/libarchive/test/test_read_format_zip_utf8_paths.zip.uu b/libarchive/test/test_read_format_zip_utf8_paths.zip.uu deleted file mode 100644 index 7e6cd742..00000000 --- a/libarchive/test/test_read_format_zip_utf8_paths.zip.uu +++ /dev/null @@ -1,62 +0,0 @@ -begin 644 test_read_format_zip_utf8_paths.zip -M4$L#!!0``````,(^9D5BZ95P"0````D````.````1FEL92`S("T@A"YT>'14 -M97-T(&9I;&502P,$%```````PCYF16+IE7`)````"0````X```!&:6QE(#0@ -M+2"&+G1X=%1E<W0@9FEL95!+`P04``````#"/F9%8NF5<`D````)````$P`` -M`$9O;&1E<B`Q+T9I;&4@,2YT>'1497-T(&9I;&502P,$%```````PCYF16+I -ME7`)````"0```!<```!&;VQD97(@,2]&:6QE(#(@+2"4+G1X=%1E<W0@9FEL -M95!+`P04``````#"/F9%8NF5<`D````)````%P```$9O;&1E<B`Q+T9I;&4@ -M,R`M((0N='AT5&5S="!F:6QE4$L#!!0``````,(^9D5BZ95P"0````D````7 -M````1F]L9&5R(#$O1FEL92`T("T@ABYT>'1497-T(&9I;&502P,$%``````` -MPCYF16+IE7`)````"0```!<```!&;VQD97(@,B`M()0O1FEL92`Q+G1X=%1E -M<W0@9FEL95!+`P04``````#"/F9%8NF5<`D````)````&P```$9O;&1E<B`R -M("T@E"]&:6QE(#(@+2"4+G1X=%1E<W0@9FEL95!+`P04``````#"/F9%8NF5 -M<`D````)````&P```$9O;&1E<B`R("T@E"]&:6QE(#,@+2"$+G1X=%1E<W0@ -M9FEL95!+`P04``````#"/F9%8NF5<`D````)````&P```$9O;&1E<B`R("T@ -ME"]&:6QE(#0@+2"&+G1X=%1E<W0@9FEL95!+`P04``````#"/F9%8NF5<`D` -M```)````%P```$9O;&1E<B`S("T@A"]&:6QE(#$N='AT5&5S="!F:6QE4$L# -M!!0``````,(^9D5BZ95P"0````D````;````1F]L9&5R(#,@+2"$+T9I;&4@ -M,B`M()0N='AT5&5S="!F:6QE4$L#!!0``````,(^9D5BZ95P"0````D````; -M````1F]L9&5R(#,@+2"$+T9I;&4@,R`M((0N='AT5&5S="!F:6QE4$L#!!0` -M`````,(^9D5BZ95P"0````D````;````1F]L9&5R(#,@+2"$+T9I;&4@-"`M -M((8N='AT5&5S="!F:6QE4$L#!!0``````,(^9D5BZ95P"0````D````7```` -M1F]L9&5R(#0@+2"&+T9I;&4@,2YT>'1497-T(&9I;&502P,$%```````PCYF -M16+IE7`)````"0```!L```!&;VQD97(@-"`M((8O1FEL92`R("T@E"YT>'14 -M97-T(&9I;&502P,$%```````PCYF16+IE7`)````"0```!L```!&;VQD97(@ -M-"`M((8O1FEL92`S("T@A"YT>'1497-T(&9I;&502P,$%```````PCYF16+I -ME7`)````"0```!L```!&;VQD97(@-"`M((8O1FEL92`T("T@ABYT>'1497-T -M(&9I;&502P,$%```````PCYF16+IE7`)````"0````H```!&:6QE(#$N='AT -M5&5S="!F:6QE4$L#!!0``````,(^9D5BZ95P"0````D````.````1FEL92`R -M("T@E"YT>'1497-T(&9I;&502P$"%``4``````#"/F9%8NF5<`D````)```` -M#@`````````!`"``````````1FEL92`S("T@A"YT>'102P$"%``4``````#" -M/F9%8NF5<`D````)````#@`````````!`"`````U````1FEL92`T("T@ABYT -M>'102P$"%``4``````#"/F9%8NF5<`D````)````$P`````````!`"````!J -M````1F]L9&5R(#$O1FEL92`Q+G1X=%!+`0(4`!0``````,(^9D5BZ95P"0`` -M``D````7``````````$`(````*0```!&;VQD97(@,2]&:6QE(#(@+2"4+G1X -M=%!+`0(4`!0``````,(^9D5BZ95P"0````D````7``````````$`(````.(` -M``!&;VQD97(@,2]&:6QE(#,@+2"$+G1X=%!+`0(4`!0``````,(^9D5BZ95P -M"0````D````7``````````$`(````"`!``!&;VQD97(@,2]&:6QE(#0@+2"& -M+G1X=%!+`0(4`!0``````,(^9D5BZ95P"0````D````7``````````$`(``` -M`%X!``!&;VQD97(@,B`M()0O1FEL92`Q+G1X=%!+`0(4`!0``````,(^9D5B -MZ95P"0````D````;``````````$`(````)P!``!&;VQD97(@,B`M()0O1FEL -M92`R("T@E"YT>'102P$"%``4``````#"/F9%8NF5<`D````)````&P`````` -M```!`"````#>`0``1F]L9&5R(#(@+2"4+T9I;&4@,R`M((0N='AT4$L!`A0` -M%```````PCYF16+IE7`)````"0```!L``````````0`@````(`(``$9O;&1E -M<B`R("T@E"]&:6QE(#0@+2"&+G1X=%!+`0(4`!0``````,(^9D5BZ95P"0`` -M``D````7``````````$`(````&("``!&;VQD97(@,R`M((0O1FEL92`Q+G1X -M=%!+`0(4`!0``````,(^9D5BZ95P"0````D````;``````````$`(````*`" -M``!&;VQD97(@,R`M((0O1FEL92`R("T@E"YT>'102P$"%``4``````#"/F9% -M8NF5<`D````)````&P`````````!`"````#B`@``1F]L9&5R(#,@+2"$+T9I -M;&4@,R`M((0N='AT4$L!`A0`%```````PCYF16+IE7`)````"0```!L````` -M`````0`@````)`,``$9O;&1E<B`S("T@A"]&:6QE(#0@+2"&+G1X=%!+`0(4 -M`!0``````,(^9D5BZ95P"0````D````7``````````$`(````&8#``!&;VQD -M97(@-"`M((8O1FEL92`Q+G1X=%!+`0(4`!0``````,(^9D5BZ95P"0````D` -M```;``````````$`(````*0#``!&;VQD97(@-"`M((8O1FEL92`R("T@E"YT -M>'102P$"%``4``````#"/F9%8NF5<`D````)````&P`````````!`"````#F -M`P``1F]L9&5R(#0@+2"&+T9I;&4@,R`M((0N='AT4$L!`A0`%```````PCYF -M16+IE7`)````"0```!L``````````0`@````*`0``$9O;&1E<B`T("T@AB]& -M:6QE(#0@+2"&+G1X=%!+`0(4`!0``````,(^9D5BZ95P"0````D````*```` -M``````$`(````&H$``!&:6QE(#$N='AT4$L!`A0`%```````PCYF16+IE7`) -M````"0````X``````````0`@````FP0``$9I;&4@,B`M()0N='AT4$L%!@`` -0```4`!0`7`4``-`$```````` -` -end |