From 6a0b889f57b5f3c6b72253c0919eab10c7496263 Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Tue, 26 Jan 2021 16:50:04 +0100 Subject: Fix #70091: Phar does not mark UTF-8 filenames in ZIP archives The default encoding of filenames in a ZIP archive is IBM Code Page 437. Phar, however, only supports UTF-8 filenames. Therefore we have to mark filenames as being stored in UTF-8 by setting the general purpose bit 11 (the language encoding flag). The effect of not setting this bit for non ASCII filenames can be seen in popular tools like 7-Zip and UnZip, but not when extracting the archives via ext/phar (which is agnostic to the filename encoding), or via ext/zip (which guesses the encoding). Thus we add a somewhat brittle low-level test case. Closes GH-6630. --- NEWS | 1 + ext/phar/tests/bug70091.phpt | 60 ++++++++++++++++++++++++++++++++++++++++++++ ext/phar/zip.c | 6 +++++ 3 files changed, 67 insertions(+) create mode 100644 ext/phar/tests/bug70091.phpt diff --git a/NEWS b/NEWS index 00fb7f75c2..6df4eecb57 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,7 @@ PHP NEWS - Phar: . Fixed bug #75850 (Unclear error message wrt. __halt_compiler() w/o semicolon) (cmb) + . Fixed bug #70091 (Phar does not mark UTF-8 filenames in ZIP archives). (cmb) - Zip: . Fixed bug #80648 (Fix for bug 79296 should be based on runtime version). diff --git a/ext/phar/tests/bug70091.phpt b/ext/phar/tests/bug70091.phpt new file mode 100644 index 0000000000..893b2eec4e --- /dev/null +++ b/ext/phar/tests/bug70091.phpt @@ -0,0 +1,60 @@ +--TEST-- +Bug #70091 (Phar does not mark UTF-8 filenames in ZIP archives) +--SKIPIF-- + +--FILE-- +addFromString('föö', ''); +$phar->addFromString('foo', ''); +unset($phar); + +$stream = fopen(__DIR__ . '/bug70091.zip', 'r'); + +$data = fread($stream, 8); +var_dump(unpack('H8sig/@6/nflags', $data)); + +fseek($stream, 53); +$data = fread($stream, 8); +var_dump(unpack('H8sig/@6/nflags', $data)); + +fseek($stream, 104); +$data = fread($stream, 10); +var_dump(unpack('H8sig/@8/nflags', $data)); + +fseek($stream, 173); +$data = fread($stream, 10); +var_dump(unpack('H8sig/@8/nflags', $data)); +?> +--EXPECT-- +array(2) { + ["sig"]=> + string(8) "504b0304" + ["flags"]=> + int(8) +} +array(2) { + ["sig"]=> + string(8) "504b0304" + ["flags"]=> + int(8) +} +array(2) { + ["sig"]=> + string(8) "504b0102" + ["flags"]=> + int(8) +} +array(2) { + ["sig"]=> + string(8) "504b0102" + ["flags"]=> + int(8) +} +--CLEAN-- + diff --git a/ext/phar/zip.c b/ext/phar/zip.c index c52e87647d..b6e50d572a 100644 --- a/ext/phar/zip.c +++ b/ext/phar/zip.c @@ -829,6 +829,7 @@ static int phar_zip_changed_apply_int(phar_entry_info *entry, void *arg) /* {{{ zend_off_t offset; int not_really_modified = 0; p = (struct _phar_zip_pass*) arg; + uint16_t general_purpose_flags; if (entry->is_mounted) { return ZEND_HASH_APPLY_KEEP; @@ -878,6 +879,11 @@ static int phar_zip_changed_apply_int(phar_entry_info *entry, void *arg) /* {{{ memcpy(central.datestamp, local.datestamp, sizeof(local.datestamp)); PHAR_SET_16(central.filename_len, entry->filename_len + (entry->is_dir ? 1 : 0)); PHAR_SET_16(local.filename_len, entry->filename_len + (entry->is_dir ? 1 : 0)); + // set language encoding flag (all filenames have to be UTF-8 anyway) + general_purpose_flags = PHAR_GET_16(central.flags); + PHAR_SET_16(central.flags, general_purpose_flags | (1 << 11)); + general_purpose_flags = PHAR_GET_16(local.flags); + PHAR_SET_16(local.flags, general_purpose_flags | (1 << 11)); PHAR_SET_32(central.offset, php_stream_tell(p->filefp)); /* do extra field for perms later */ -- cgit v1.2.1