diff options
Diffstat (limited to 'libgo/go/archive/tar')
-rw-r--r-- | libgo/go/archive/tar/common.go | 27 | ||||
-rw-r--r-- | libgo/go/archive/tar/reader.go | 531 | ||||
-rw-r--r-- | libgo/go/archive/tar/reader_test.go | 773 | ||||
-rw-r--r-- | libgo/go/archive/tar/strconv.go | 252 | ||||
-rw-r--r-- | libgo/go/archive/tar/strconv_test.go | 319 | ||||
-rw-r--r-- | libgo/go/archive/tar/tar_test.go | 236 | ||||
-rw-r--r-- | libgo/go/archive/tar/testdata/gnu-incremental.tar | bin | 0 -> 2560 bytes | |||
-rw-r--r-- | libgo/go/archive/tar/testdata/pax-bad-hdr-file.tar | bin | 0 -> 2560 bytes | |||
-rw-r--r-- | libgo/go/archive/tar/testdata/pax-bad-mtime-file.tar | bin | 0 -> 2560 bytes | |||
-rw-r--r-- | libgo/go/archive/tar/testdata/pax-pos-size-file.tar | bin | 0 -> 2560 bytes | |||
-rw-r--r-- | libgo/go/archive/tar/testdata/ustar.issue12594.tar | bin | 0 -> 3072 bytes | |||
-rw-r--r-- | libgo/go/archive/tar/testdata/writer-big-long.tar | bin | 4096 -> 4096 bytes | |||
-rw-r--r-- | libgo/go/archive/tar/writer.go | 106 | ||||
-rw-r--r-- | libgo/go/archive/tar/writer_test.go | 483 |
14 files changed, 1500 insertions, 1227 deletions
diff --git a/libgo/go/archive/tar/common.go b/libgo/go/archive/tar/common.go index 2a1e4321826..d2ae66d554d 100644 --- a/libgo/go/archive/tar/common.go +++ b/libgo/go/archive/tar/common.go @@ -13,7 +13,6 @@ package tar import ( - "bytes" "errors" "fmt" "os" @@ -21,6 +20,10 @@ import ( "time" ) +// BUG: Use of the Uid and Gid fields in Header could overflow on 32-bit +// architectures. If a large value is encountered when decoding, the result +// stored in Header will be the truncated version. + // Header type flags. const ( TypeReg = '0' // regular file @@ -271,28 +274,6 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { return h, nil } -func isASCII(s string) bool { - for _, c := range s { - if c >= 0x80 { - return false - } - } - return true -} - -func toASCII(s string) string { - if isASCII(s) { - return s - } - var buf bytes.Buffer - for _, c := range s { - if c < 0x80 { - buf.WriteByte(byte(c)) - } - } - return buf.String() -} - // isHeaderOnlyType checks if the given type flag is of the type that has no // data section even if a size is specified. func isHeaderOnlyType(flag byte) bool { diff --git a/libgo/go/archive/tar/reader.go b/libgo/go/archive/tar/reader.go index 096ef082bf8..9abe888218f 100644 --- a/libgo/go/archive/tar/reader.go +++ b/libgo/go/archive/tar/reader.go @@ -22,22 +22,20 @@ var ( ErrHeader = errors.New("archive/tar: invalid tar header") ) -const maxNanoSecondIntSize = 9 - // A Reader provides sequential access to the contents of a tar archive. // A tar archive consists of a sequence of files. // The Next method advances to the next file in the archive (including the first), // and then it can be treated as an io.Reader to access the file's data. type Reader struct { r io.Reader - err error pad int64 // amount of padding (ignored) after current file entry curr numBytesReader // reader for current file entry blk block // buffer to use as temporary local storage -} -type parser struct { - err error // Last error seen + // err is a persistent error. + // It is only the responsibility of every exported method of Reader to + // ensure that this error is sticky. + err error } // A numBytesReader is an io.Reader with a numBytes method, returning the number @@ -108,8 +106,12 @@ func (tr *Reader) Next() (*Header, error) { if tr.err != nil { return nil, tr.err } + hdr, err := tr.next() + tr.err = err + return hdr, err +} - var hdr *Header +func (tr *Reader) next() (*Header, error) { var extHdrs map[string]string // Externally, Next iterates through the tar archive as if it is a series of @@ -119,29 +121,29 @@ func (tr *Reader) Next() (*Header, error) { // one or more "header files" until it finds a "normal file". loop: for { - tr.err = tr.skipUnread() - if tr.err != nil { - return nil, tr.err + if err := tr.skipUnread(); err != nil { + return nil, err } - - hdr = tr.readHeader() - if tr.err != nil { - return nil, tr.err + hdr, rawHdr, err := tr.readHeader() + if err != nil { + return nil, err + } + if err := tr.handleRegularFile(hdr); err != nil { + return nil, err } // Check for PAX/GNU special headers and files. switch hdr.Typeflag { case TypeXHeader: - extHdrs, tr.err = parsePAX(tr) - if tr.err != nil { - return nil, tr.err + extHdrs, err = parsePAX(tr) + if err != nil { + return nil, err } continue loop // This is a meta header affecting the next header case TypeGNULongName, TypeGNULongLink: - var realname []byte - realname, tr.err = ioutil.ReadAll(tr) - if tr.err != nil { - return nil, tr.err + realname, err := ioutil.ReadAll(tr) + if err != nil { + return nil, err } // Convert GNU extensions to use PAX headers. @@ -156,31 +158,73 @@ loop: extHdrs[paxLinkpath] = p.parseString(realname) } if p.err != nil { - tr.err = p.err - return nil, tr.err + return nil, p.err } continue loop // This is a meta header affecting the next header default: - mergePAX(hdr, extHdrs) + // The old GNU sparse format is handled here since it is technically + // just a regular file with additional attributes. - // Check for a PAX format sparse file - sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) - if err != nil { - tr.err = err + if err := mergePAX(hdr, extHdrs); err != nil { return nil, err } - if sp != nil { - // Current file is a PAX format GNU sparse file. - // Set the current file reader to a sparse file reader. - tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) - if tr.err != nil { - return nil, tr.err - } + + // The extended headers may have updated the size. + // Thus, setup the regFileReader again after merging PAX headers. + if err := tr.handleRegularFile(hdr); err != nil { + return nil, err + } + + // Sparse formats rely on being able to read from the logical data + // section; there must be a preceding call to handleRegularFile. + if err := tr.handleSparseFile(hdr, rawHdr, extHdrs); err != nil { + return nil, err } - break loop // This is a file, so stop + return hdr, nil // This is a file, so stop + } + } +} + +// handleRegularFile sets up the current file reader and padding such that it +// can only read the following logical data section. It will properly handle +// special headers that contain no data section. +func (tr *Reader) handleRegularFile(hdr *Header) error { + nb := hdr.Size + if isHeaderOnlyType(hdr.Typeflag) { + nb = 0 + } + if nb < 0 { + return ErrHeader + } + + tr.pad = -nb & (blockSize - 1) // blockSize is a power of two + tr.curr = ®FileReader{r: tr.r, nb: nb} + return nil +} + +// handleSparseFile checks if the current file is a sparse format of any type +// and sets the curr reader appropriately. +func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[string]string) error { + var sp []sparseEntry + var err error + if hdr.Typeflag == TypeGNUSparse { + sp, err = tr.readOldGNUSparseMap(hdr, rawHdr) + if err != nil { + return err + } + } else { + sp, err = tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) + if err != nil { + return err } } - return hdr, nil + + // If sp is non-nil, then this is a sparse file. + // Note that it is possible for len(sp) to be zero. + if sp != nil { + tr.curr, err = newSparseFileReader(tr.curr, sp, hdr.Size) + } + return err } // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then @@ -219,13 +263,13 @@ func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]st hdr.Name = sparseName } if sparseSizeOk { - realSize, err := strconv.ParseInt(sparseSize, 10, 0) + realSize, err := strconv.ParseInt(sparseSize, 10, 64) if err != nil { return nil, ErrHeader } hdr.Size = realSize } else if sparseRealSizeOk { - realSize, err := strconv.ParseInt(sparseRealSize, 10, 0) + realSize, err := strconv.ParseInt(sparseRealSize, 10, 64) if err != nil { return nil, ErrHeader } @@ -249,53 +293,32 @@ func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]st // in the header struct overwrite those found in the header // struct with higher precision or longer values. Esp. useful // for name and linkname fields. -func mergePAX(hdr *Header, headers map[string]string) error { +func mergePAX(hdr *Header, headers map[string]string) (err error) { + var id64 int64 for k, v := range headers { switch k { case paxPath: hdr.Name = v case paxLinkpath: hdr.Linkname = v - case paxGname: - hdr.Gname = v case paxUname: hdr.Uname = v + case paxGname: + hdr.Gname = v case paxUid: - uid, err := strconv.ParseInt(v, 10, 0) - if err != nil { - return err - } - hdr.Uid = int(uid) + id64, err = strconv.ParseInt(v, 10, 64) + hdr.Uid = int(id64) // Integer overflow possible case paxGid: - gid, err := strconv.ParseInt(v, 10, 0) - if err != nil { - return err - } - hdr.Gid = int(gid) + id64, err = strconv.ParseInt(v, 10, 64) + hdr.Gid = int(id64) // Integer overflow possible case paxAtime: - t, err := parsePAXTime(v) - if err != nil { - return err - } - hdr.AccessTime = t + hdr.AccessTime, err = parsePAXTime(v) case paxMtime: - t, err := parsePAXTime(v) - if err != nil { - return err - } - hdr.ModTime = t + hdr.ModTime, err = parsePAXTime(v) case paxCtime: - t, err := parsePAXTime(v) - if err != nil { - return err - } - hdr.ChangeTime = t + hdr.ChangeTime, err = parsePAXTime(v) case paxSize: - size, err := strconv.ParseInt(v, 10, 0) - if err != nil { - return err - } - hdr.Size = size + hdr.Size, err = strconv.ParseInt(v, 10, 64) default: if strings.HasPrefix(k, paxXattr) { if hdr.Xattrs == nil { @@ -304,44 +327,11 @@ func mergePAX(hdr *Header, headers map[string]string) error { hdr.Xattrs[k[len(paxXattr):]] = v } } - } - return nil -} - -// parsePAXTime takes a string of the form %d.%d as described in -// the PAX specification. -func parsePAXTime(t string) (time.Time, error) { - buf := []byte(t) - pos := bytes.IndexByte(buf, '.') - var seconds, nanoseconds int64 - var err error - if pos == -1 { - seconds, err = strconv.ParseInt(t, 10, 0) - if err != nil { - return time.Time{}, err - } - } else { - seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0) - if err != nil { - return time.Time{}, err - } - nanoBuf := string(buf[pos+1:]) - // Pad as needed before converting to a decimal. - // For example .030 -> .030000000 -> 30000000 nanoseconds - if len(nanoBuf) < maxNanoSecondIntSize { - // Right pad - nanoBuf += strings.Repeat("0", maxNanoSecondIntSize-len(nanoBuf)) - } else if len(nanoBuf) > maxNanoSecondIntSize { - // Right truncate - nanoBuf = nanoBuf[:maxNanoSecondIntSize] - } - nanoseconds, err = strconv.ParseInt(nanoBuf, 10, 0) if err != nil { - return time.Time{}, err + return ErrHeader } } - ts := time.Unix(seconds, nanoseconds) - return ts, nil + return nil } // parsePAX parses PAX headers. @@ -354,12 +344,11 @@ func parsePAX(r io.Reader) (map[string]string, error) { sbuf := string(buf) // For GNU PAX sparse format 0.0 support. - // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. - var sparseMap bytes.Buffer + // This function transforms the sparse format 0.0 headers into format 0.1 + // headers since 0.0 headers were not PAX compliant. + var sparseMap []string - headers := make(map[string]string) - // Each record is constructed as - // "%d %s=%s\n", length, keyword, value + extHdrs := make(map[string]string) for len(sbuf) > 0 { key, value, residual, err := parsePAXRecord(sbuf) if err != nil { @@ -367,127 +356,29 @@ func parsePAX(r io.Reader) (map[string]string, error) { } sbuf = residual - keyStr := key - if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { - // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. - sparseMap.WriteString(value) - sparseMap.Write([]byte{','}) - } else { - // Normal key. Set the value in the headers map. - headers[keyStr] = value - } - } - if sparseMap.Len() != 0 { - // Add sparse info to headers, chopping off the extra comma - sparseMap.Truncate(sparseMap.Len() - 1) - headers[paxGNUSparseMap] = sparseMap.String() - } - return headers, nil -} - -// parsePAXRecord parses the input PAX record string into a key-value pair. -// If parsing is successful, it will slice off the currently read record and -// return the remainder as r. -// -// A PAX record is of the following form: -// "%d %s=%s\n" % (size, key, value) -func parsePAXRecord(s string) (k, v, r string, err error) { - // The size field ends at the first space. - sp := strings.IndexByte(s, ' ') - if sp == -1 { - return "", "", s, ErrHeader - } - - // Parse the first token as a decimal integer. - n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int - if perr != nil || n < 5 || int64(len(s)) < n { - return "", "", s, ErrHeader - } - - // Extract everything between the space and the final newline. - rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] - if nl != "\n" { - return "", "", s, ErrHeader - } - - // The first equals separates the key from the value. - eq := strings.IndexByte(rec, '=') - if eq == -1 { - return "", "", s, ErrHeader - } - return rec[:eq], rec[eq+1:], rem, nil -} - -// parseString parses bytes as a NUL-terminated C-style string. -// If a NUL byte is not found then the whole slice is returned as a string. -func (*parser) parseString(b []byte) string { - n := 0 - for n < len(b) && b[n] != 0 { - n++ - } - return string(b[0:n]) -} - -// parseNumeric parses the input as being encoded in either base-256 or octal. -// This function may return negative numbers. -// If parsing fails or an integer overflow occurs, err will be set. -func (p *parser) parseNumeric(b []byte) int64 { - // Check for base-256 (binary) format first. - // If the first bit is set, then all following bits constitute a two's - // complement encoded number in big-endian byte order. - if len(b) > 0 && b[0]&0x80 != 0 { - // Handling negative numbers relies on the following identity: - // -a-1 == ^a - // - // If the number is negative, we use an inversion mask to invert the - // data bytes and treat the value as an unsigned number. - var inv byte // 0x00 if positive or zero, 0xff if negative - if b[0]&0x40 != 0 { - inv = 0xff - } - - var x uint64 - for i, c := range b { - c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing - if i == 0 { - c &= 0x7f // Ignore signal bit in first byte + switch key { + case paxGNUSparseOffset, paxGNUSparseNumBytes: + // Validate sparse header order and value. + if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) || + (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) || + strings.Contains(value, ",") { + return nil, ErrHeader } - if (x >> 56) > 0 { - p.err = ErrHeader // Integer overflow - return 0 + sparseMap = append(sparseMap, value) + default: + // According to PAX specification, a value is stored only if it is + // non-empty. Otherwise, the key is deleted. + if len(value) > 0 { + extHdrs[key] = value + } else { + delete(extHdrs, key) } - x = x<<8 | uint64(c) - } - if (x >> 63) > 0 { - p.err = ErrHeader // Integer overflow - return 0 } - if inv == 0xff { - return ^int64(x) - } - return int64(x) - } - - // Normal case is base-8 (octal) format. - return p.parseOctal(b) -} - -func (p *parser) parseOctal(b []byte) int64 { - // Because unused fields are filled with NULs, we need - // to skip leading NULs. Fields may also be padded with - // spaces or NULs. - // So we remove leading and trailing NULs and spaces to - // be sure. - b = bytes.Trim(b, " \x00") - - if len(b) == 0 { - return 0 } - x, perr := strconv.ParseUint(p.parseString(b), 8, 64) - if perr != nil { - p.err = ErrHeader + if len(sparseMap) > 0 { + extHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",") } - return int64(x) + return extHdrs, nil } // skipUnread skips any unread bytes in the existing file entry, as well as any @@ -516,51 +407,46 @@ func (tr *Reader) skipUnread() error { // Seek seems supported, so perform the real Seek. pos2, err := sr.Seek(dataSkip-1, io.SeekCurrent) if err != nil { - tr.err = err - return tr.err + return err } seekSkipped = pos2 - pos1 } } - var copySkipped int64 // Number of bytes skipped via CopyN - copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) - if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip { - tr.err = io.ErrUnexpectedEOF + copySkipped, err := io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) + if err == io.EOF && seekSkipped+copySkipped < dataSkip { + err = io.ErrUnexpectedEOF } - return tr.err + return err } // readHeader reads the next block header and assumes that the underlying reader -// is already aligned to a block boundary. +// is already aligned to a block boundary. It returns the raw block of the +// header in case further processing is required. // // The err will be set to io.EOF only when one of the following occurs: // * Exactly 0 bytes are read and EOF is hit. // * Exactly 1 block of zeros is read and EOF is hit. // * At least 2 blocks of zeros are read. -func (tr *Reader) readHeader() *Header { - if _, tr.err = io.ReadFull(tr.r, tr.blk[:]); tr.err != nil { - return nil // io.EOF is okay here - } - +func (tr *Reader) readHeader() (*Header, *block, error) { // Two blocks of zero bytes marks the end of the archive. + if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + return nil, nil, err // EOF is okay here; exactly 0 bytes read + } if bytes.Equal(tr.blk[:], zeroBlock[:]) { - if _, tr.err = io.ReadFull(tr.r, tr.blk[:]); tr.err != nil { - return nil // io.EOF is okay here + if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + return nil, nil, err // EOF is okay here; exactly 1 block of zeros read } if bytes.Equal(tr.blk[:], zeroBlock[:]) { - tr.err = io.EOF - } else { - tr.err = ErrHeader // zero block and then non-zero block + return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read } - return nil + return nil, nil, ErrHeader // Zero block and then non-zero block } // Verify the header matches a known format. format := tr.blk.GetFormat() if format == formatUnknown { - tr.err = ErrHeader - return nil + return nil, nil, ErrHeader } var p parser @@ -577,6 +463,26 @@ func (tr *Reader) readHeader() *Header { hdr.Typeflag = v7.TypeFlag()[0] hdr.Linkname = p.parseString(v7.LinkName()) + // The atime and ctime fields are often left unused. Some versions of Go + // had a bug in the tar.Writer where it would output an invalid tar file + // in certain rare situations because the logic incorrectly believed that + // the old GNU format had a prefix field. This is wrong and leads to + // an outputted file that actually mangles the atime and ctime fields. + // + // In order to continue reading tar files created by a buggy writer, we + // try to parse the atime and ctime fields, but just return the zero value + // of time.Time when we cannot parse them. + // + // See https://golang.org/issues/12594 + tryParseTime := func(b []byte) time.Time { + var p parser + n := p.parseNumeric(b) + if b[0] != 0x00 && p.err == nil { + return time.Unix(n, 0) + } + return time.Time{} + } + // Unpack format specific fields. if format > formatV7 { ustar := tr.blk.USTAR() @@ -589,9 +495,7 @@ func (tr *Reader) readHeader() *Header { var prefix string switch format { - case formatUSTAR, formatGNU: - // TODO(dsnet): Do not use the prefix field for the GNU format! - // See golang.org/issues/12594 + case formatUSTAR: ustar := tr.blk.USTAR() prefix = p.parseString(ustar.Prefix()) case formatSTAR: @@ -599,97 +503,68 @@ func (tr *Reader) readHeader() *Header { prefix = p.parseString(star.Prefix()) hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0) hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0) + case formatGNU: + gnu := tr.blk.GNU() + hdr.AccessTime = tryParseTime(gnu.AccessTime()) + hdr.ChangeTime = tryParseTime(gnu.ChangeTime()) } if len(prefix) > 0 { hdr.Name = prefix + "/" + hdr.Name } } + return hdr, &tr.blk, p.err +} - nb := hdr.Size - if isHeaderOnlyType(hdr.Typeflag) { - nb = 0 - } - if nb < 0 { - tr.err = ErrHeader - return nil - } - - // Set the current file reader. - tr.pad = -nb & (blockSize - 1) // blockSize is a power of two - tr.curr = ®FileReader{r: tr.r, nb: nb} - - // Check for old GNU sparse format entry. - if hdr.Typeflag == TypeGNUSparse { - // Get the real size of the file. - hdr.Size = p.parseNumeric(tr.blk.GNU().RealSize()) - if p.err != nil { - tr.err = p.err - return nil - } - - // Read the sparse map. - sp := tr.readOldGNUSparseMap(&tr.blk) - if tr.err != nil { - return nil - } - - // Current file is a GNU sparse file. Update the current file reader. - tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) - if tr.err != nil { - return nil - } +// readOldGNUSparseMap reads the sparse map from the old GNU sparse format. +// The sparse map is stored in the tar header if it's small enough. +// If it's larger than four entries, then one or more extension headers are used +// to store the rest of the sparse map. +// +// The Header.Size does not reflect the size of any extended headers used. +// Thus, this function will read from the raw io.Reader to fetch extra headers. +// This method mutates blk in the process. +func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, error) { + // Make sure that the input format is GNU. + // Unfortunately, the STAR format also has a sparse header format that uses + // the same type flag but has a completely different layout. + if blk.GetFormat() != formatGNU { + return nil, ErrHeader } + var p parser + hdr.Size = p.parseNumeric(blk.GNU().RealSize()) if p.err != nil { - tr.err = p.err - return nil + return nil, p.err } - - return hdr -} - -// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. -// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, -// then one or more extension headers are used to store the rest of the sparse map. -func (tr *Reader) readOldGNUSparseMap(blk *block) []sparseEntry { - var p parser var s sparseArray = blk.GNU().Sparse() var sp = make([]sparseEntry, 0, s.MaxEntries()) - for i := 0; i < s.MaxEntries(); i++ { - offset := p.parseOctal(s.Entry(i).Offset()) - numBytes := p.parseOctal(s.Entry(i).NumBytes()) - if p.err != nil { - tr.err = p.err - return nil - } - if offset == 0 && numBytes == 0 { - break - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) - } - - for s.IsExtended()[0] > 0 { - // There are more entries. Read an extension header and parse its entries. - var blk block - if _, tr.err = io.ReadFull(tr.r, blk[:]); tr.err != nil { - return nil - } - s = blk.Sparse() - + for { for i := 0; i < s.MaxEntries(); i++ { - offset := p.parseOctal(s.Entry(i).Offset()) - numBytes := p.parseOctal(s.Entry(i).NumBytes()) - if p.err != nil { - tr.err = p.err - return nil + // This termination condition is identical to GNU and BSD tar. + if s.Entry(i).Offset()[0] == 0x00 { + break // Don't return, need to process extended headers (even if empty) } - if offset == 0 && numBytes == 0 { - break + offset := p.parseNumeric(s.Entry(i).Offset()) + numBytes := p.parseNumeric(s.Entry(i).NumBytes()) + if p.err != nil { + return nil, p.err } sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) } + + if s.IsExtended()[0] > 0 { + // There are more entries. Read an extension header and parse its entries. + if _, err := io.ReadFull(tr.r, blk[:]); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return nil, err + } + s = blk.Sparse() + continue + } + return sp, nil // Done } - return sp } // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format @@ -817,7 +692,7 @@ func (tr *Reader) numBytes() int64 { // Calling Read on special types like TypeLink, TypeSymLink, TypeChar, // TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what // the Header.Size claims. -func (tr *Reader) Read(b []byte) (n int, err error) { +func (tr *Reader) Read(b []byte) (int, error) { if tr.err != nil { return 0, tr.err } @@ -825,11 +700,11 @@ func (tr *Reader) Read(b []byte) (n int, err error) { return 0, io.EOF } - n, err = tr.curr.Read(b) + n, err := tr.curr.Read(b) if err != nil && err != io.EOF { tr.err = err } - return + return n, err } func (rfr *regFileReader) Read(b []byte) (n int, err error) { diff --git a/libgo/go/archive/tar/reader_test.go b/libgo/go/archive/tar/reader_test.go index 7b148b5122b..338686836b6 100644 --- a/libgo/go/archive/tar/reader_test.go +++ b/libgo/go/archive/tar/reader_test.go @@ -18,17 +18,15 @@ import ( "time" ) -type untarTest struct { - file string // Test input file - headers []*Header // Expected output headers - chksums []string // MD5 checksum of files, leave as nil if not checked - err error // Expected error to occur -} - -var gnuTarTest = &untarTest{ - file: "testdata/gnu.tar", - headers: []*Header{ - { +func TestReader(t *testing.T) { + vectors := []struct { + file string // Test input file + headers []*Header // Expected output headers + chksums []string // MD5 checksum of files, leave as nil if not checked + err error // Expected error to occur + }{{ + file: "testdata/gnu.tar", + headers: []*Header{{ Name: "small.txt", Mode: 0640, Uid: 73025, @@ -38,8 +36,7 @@ var gnuTarTest = &untarTest{ Typeflag: '0', Uname: "dsymonds", Gname: "eng", - }, - { + }, { Name: "small2.txt", Mode: 0640, Uid: 73025, @@ -49,18 +46,14 @@ var gnuTarTest = &untarTest{ Typeflag: '0', Uname: "dsymonds", Gname: "eng", + }}, + chksums: []string{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", }, - }, - chksums: []string{ - "e38b27eaccb4391bdec553a7f3ae6b2f", - "c65bd2e50a56a2138bf1716f2fd56fe9", - }, -} - -var sparseTarTest = &untarTest{ - file: "testdata/sparse-formats.tar", - headers: []*Header{ - { + }, { + file: "testdata/sparse-formats.tar", + headers: []*Header{{ Name: "sparse-gnu", Mode: 420, Uid: 1000, @@ -73,8 +66,7 @@ var sparseTarTest = &untarTest{ Gname: "david", Devmajor: 0, Devminor: 0, - }, - { + }, { Name: "sparse-posix-0.0", Mode: 420, Uid: 1000, @@ -87,8 +79,7 @@ var sparseTarTest = &untarTest{ Gname: "david", Devmajor: 0, Devminor: 0, - }, - { + }, { Name: "sparse-posix-0.1", Mode: 420, Uid: 1000, @@ -101,8 +92,7 @@ var sparseTarTest = &untarTest{ Gname: "david", Devmajor: 0, Devminor: 0, - }, - { + }, { Name: "sparse-posix-1.0", Mode: 420, Uid: 1000, @@ -115,8 +105,7 @@ var sparseTarTest = &untarTest{ Gname: "david", Devmajor: 0, Devminor: 0, - }, - { + }, { Name: "end", Mode: 420, Uid: 1000, @@ -129,209 +118,237 @@ var sparseTarTest = &untarTest{ Gname: "david", Devmajor: 0, Devminor: 0, + }}, + chksums: []string{ + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "b0061974914468de549a2af8ced10316", }, - }, - chksums: []string{ - "6f53234398c2449fe67c1812d993012f", - "6f53234398c2449fe67c1812d993012f", - "6f53234398c2449fe67c1812d993012f", - "6f53234398c2449fe67c1812d993012f", - "b0061974914468de549a2af8ced10316", - }, -} - -var untarTests = []*untarTest{ - gnuTarTest, - sparseTarTest, - { + }, { file: "testdata/star.tar", - headers: []*Header{ - { - Name: "small.txt", - Mode: 0640, - Uid: 73025, - Gid: 5000, - Size: 5, - ModTime: time.Unix(1244592783, 0), - Typeflag: '0', - Uname: "dsymonds", - Gname: "eng", - AccessTime: time.Unix(1244592783, 0), - ChangeTime: time.Unix(1244592783, 0), - }, - { - Name: "small2.txt", - Mode: 0640, - Uid: 73025, - Gid: 5000, - Size: 11, - ModTime: time.Unix(1244592783, 0), - Typeflag: '0', - Uname: "dsymonds", - Gname: "eng", - AccessTime: time.Unix(1244592783, 0), - ChangeTime: time.Unix(1244592783, 0), - }, - }, - }, - { + headers: []*Header{{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + ModTime: time.Unix(1244592783, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + AccessTime: time.Unix(1244592783, 0), + ChangeTime: time.Unix(1244592783, 0), + }, { + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + ModTime: time.Unix(1244592783, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + AccessTime: time.Unix(1244592783, 0), + ChangeTime: time.Unix(1244592783, 0), + }}, + }, { file: "testdata/v7.tar", - headers: []*Header{ - { - Name: "small.txt", - Mode: 0444, - Uid: 73025, - Gid: 5000, - Size: 5, - ModTime: time.Unix(1244593104, 0), - Typeflag: '\x00', - }, - { - Name: "small2.txt", - Mode: 0444, - Uid: 73025, - Gid: 5000, - Size: 11, - ModTime: time.Unix(1244593104, 0), - Typeflag: '\x00', - }, - }, - }, - { + headers: []*Header{{ + Name: "small.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 5, + ModTime: time.Unix(1244593104, 0), + Typeflag: '\x00', + }, { + Name: "small2.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 11, + ModTime: time.Unix(1244593104, 0), + Typeflag: '\x00', + }}, + }, { file: "testdata/pax.tar", - headers: []*Header{ - { - Name: "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", - Mode: 0664, - Uid: 1000, - Gid: 1000, - Uname: "shane", - Gname: "shane", - Size: 7, - ModTime: time.Unix(1350244992, 23960108), - ChangeTime: time.Unix(1350244992, 23960108), - AccessTime: time.Unix(1350244992, 23960108), - Typeflag: TypeReg, - }, - { - Name: "a/b", - Mode: 0777, - Uid: 1000, - Gid: 1000, - Uname: "shane", - Gname: "shane", - Size: 0, - ModTime: time.Unix(1350266320, 910238425), - ChangeTime: time.Unix(1350266320, 910238425), - AccessTime: time.Unix(1350266320, 910238425), - Typeflag: TypeSymlink, - Linkname: "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", - }, + headers: []*Header{{ + Name: "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + Mode: 0664, + Uid: 1000, + Gid: 1000, + Uname: "shane", + Gname: "shane", + Size: 7, + ModTime: time.Unix(1350244992, 23960108), + ChangeTime: time.Unix(1350244992, 23960108), + AccessTime: time.Unix(1350244992, 23960108), + Typeflag: TypeReg, + }, { + Name: "a/b", + Mode: 0777, + Uid: 1000, + Gid: 1000, + Uname: "shane", + Gname: "shane", + Size: 0, + ModTime: time.Unix(1350266320, 910238425), + ChangeTime: time.Unix(1350266320, 910238425), + AccessTime: time.Unix(1350266320, 910238425), + Typeflag: TypeSymlink, + Linkname: "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + }}, + }, { + file: "testdata/pax-bad-hdr-file.tar", + err: ErrHeader, + }, { + file: "testdata/pax-bad-mtime-file.tar", + err: ErrHeader, + }, { + file: "testdata/pax-pos-size-file.tar", + headers: []*Header{{ + Name: "foo", + Mode: 0640, + Uid: 319973, + Gid: 5000, + Size: 999, + ModTime: time.Unix(1442282516, 0), + Typeflag: '0', + Uname: "joetsai", + Gname: "eng", + }}, + chksums: []string{ + "0afb597b283fe61b5d4879669a350556", }, - }, - { + }, { file: "testdata/nil-uid.tar", // golang.org/issue/5290 - headers: []*Header{ - { - Name: "P1050238.JPG.log", - Mode: 0664, - Uid: 0, - Gid: 0, - Size: 14, - ModTime: time.Unix(1365454838, 0), - Typeflag: TypeReg, - Linkname: "", - Uname: "eyefi", - Gname: "eyefi", - Devmajor: 0, - Devminor: 0, - }, - }, - }, - { + headers: []*Header{{ + Name: "P1050238.JPG.log", + Mode: 0664, + Uid: 0, + Gid: 0, + Size: 14, + ModTime: time.Unix(1365454838, 0), + Typeflag: TypeReg, + Linkname: "", + Uname: "eyefi", + Gname: "eyefi", + Devmajor: 0, + Devminor: 0, + }}, + }, { file: "testdata/xattrs.tar", - headers: []*Header{ - { - Name: "small.txt", - Mode: 0644, - Uid: 1000, - Gid: 10, - Size: 5, - ModTime: time.Unix(1386065770, 448252320), - Typeflag: '0', - Uname: "alex", - Gname: "wheel", - AccessTime: time.Unix(1389782991, 419875220), - ChangeTime: time.Unix(1389782956, 794414986), - Xattrs: map[string]string{ - "user.key": "value", - "user.key2": "value2", - // Interestingly, selinux encodes the terminating null inside the xattr - "security.selinux": "unconfined_u:object_r:default_t:s0\x00", - }, + headers: []*Header{{ + Name: "small.txt", + Mode: 0644, + Uid: 1000, + Gid: 10, + Size: 5, + ModTime: time.Unix(1386065770, 448252320), + Typeflag: '0', + Uname: "alex", + Gname: "wheel", + AccessTime: time.Unix(1389782991, 419875220), + ChangeTime: time.Unix(1389782956, 794414986), + Xattrs: map[string]string{ + "user.key": "value", + "user.key2": "value2", + // Interestingly, selinux encodes the terminating null inside the xattr + "security.selinux": "unconfined_u:object_r:default_t:s0\x00", }, - { - Name: "small2.txt", - Mode: 0644, - Uid: 1000, - Gid: 10, - Size: 11, - ModTime: time.Unix(1386065770, 449252304), - Typeflag: '0', - Uname: "alex", - Gname: "wheel", - AccessTime: time.Unix(1389782991, 419875220), - ChangeTime: time.Unix(1386065770, 449252304), - Xattrs: map[string]string{ - "security.selinux": "unconfined_u:object_r:default_t:s0\x00", - }, + }, { + Name: "small2.txt", + Mode: 0644, + Uid: 1000, + Gid: 10, + Size: 11, + ModTime: time.Unix(1386065770, 449252304), + Typeflag: '0', + Uname: "alex", + Gname: "wheel", + AccessTime: time.Unix(1389782991, 419875220), + ChangeTime: time.Unix(1386065770, 449252304), + Xattrs: map[string]string{ + "security.selinux": "unconfined_u:object_r:default_t:s0\x00", }, - }, - }, - { + }}, + }, { // Matches the behavior of GNU, BSD, and STAR tar utilities. file: "testdata/gnu-multi-hdrs.tar", - headers: []*Header{ - { - Name: "GNU2/GNU2/long-path-name", - Linkname: "GNU4/GNU4/long-linkpath-name", - ModTime: time.Unix(0, 0), - Typeflag: '2', - }, - }, - }, - { + headers: []*Header{{ + Name: "GNU2/GNU2/long-path-name", + Linkname: "GNU4/GNU4/long-linkpath-name", + ModTime: time.Unix(0, 0), + Typeflag: '2', + }}, + }, { + // GNU tar file with atime and ctime fields set. + // Created with the GNU tar v1.27.1. + // tar --incremental -S -cvf gnu-incremental.tar test2 + file: "testdata/gnu-incremental.tar", + headers: []*Header{{ + Name: "test2/", + Mode: 16877, + Uid: 1000, + Gid: 1000, + Size: 14, + ModTime: time.Unix(1441973427, 0), + Typeflag: 'D', + Uname: "rawr", + Gname: "dsnet", + AccessTime: time.Unix(1441974501, 0), + ChangeTime: time.Unix(1441973436, 0), + }, { + Name: "test2/foo", + Mode: 33188, + Uid: 1000, + Gid: 1000, + Size: 64, + ModTime: time.Unix(1441973363, 0), + Typeflag: '0', + Uname: "rawr", + Gname: "dsnet", + AccessTime: time.Unix(1441974501, 0), + ChangeTime: time.Unix(1441973436, 0), + }, { + Name: "test2/sparse", + Mode: 33188, + Uid: 1000, + Gid: 1000, + Size: 536870912, + ModTime: time.Unix(1441973427, 0), + Typeflag: 'S', + Uname: "rawr", + Gname: "dsnet", + AccessTime: time.Unix(1441991948, 0), + ChangeTime: time.Unix(1441973436, 0), + }}, + }, { // Matches the behavior of GNU and BSD tar utilities. file: "testdata/pax-multi-hdrs.tar", - headers: []*Header{ - { - Name: "bar", - Linkname: "PAX4/PAX4/long-linkpath-name", - ModTime: time.Unix(0, 0), - Typeflag: '2', - }, - }, - }, - { + headers: []*Header{{ + Name: "bar", + Linkname: "PAX4/PAX4/long-linkpath-name", + ModTime: time.Unix(0, 0), + Typeflag: '2', + }}, + }, { file: "testdata/neg-size.tar", err: ErrHeader, - }, - { + }, { file: "testdata/issue10968.tar", err: ErrHeader, - }, - { + }, { file: "testdata/issue11169.tar", err: ErrHeader, - }, - { + }, { file: "testdata/issue12435.tar", err: ErrHeader, - }, -} + }} -func TestReader(t *testing.T) { - for i, v := range untarTests { + for i, v := range vectors { f, err := os.Open(v.file) if err != nil { t.Errorf("file %s, test %d: unexpected error: %v", v.file, i, err) @@ -440,83 +457,8 @@ func TestPartialRead(t *testing.T) { } } -func TestParsePAXHeader(t *testing.T) { - paxTests := [][3]string{ - {"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths - {"a", "a=name", "9 a=name\n"}, // Test case involving multiple acceptable length - {"mtime", "mtime=1350244992.023960108", "30 mtime=1350244992.023960108\n"}} - for _, test := range paxTests { - key, expected, raw := test[0], test[1], test[2] - reader := bytes.NewReader([]byte(raw)) - headers, err := parsePAX(reader) - if err != nil { - t.Errorf("Couldn't parse correctly formatted headers: %v", err) - continue - } - if strings.EqualFold(headers[key], expected) { - t.Errorf("mtime header incorrectly parsed: got %s, wanted %s", headers[key], expected) - continue - } - trailer := make([]byte, 100) - n, err := reader.Read(trailer) - if err != io.EOF || n != 0 { - t.Error("Buffer wasn't consumed") - } - } - badHeaderTests := [][]byte{ - []byte("3 somelongkey=\n"), - []byte("50 tooshort=\n"), - } - for _, test := range badHeaderTests { - if _, err := parsePAX(bytes.NewReader(test)); err != ErrHeader { - t.Fatal("Unexpected success when parsing bad header") - } - } -} - -func TestParsePAXTime(t *testing.T) { - // Some valid PAX time values - timestamps := map[string]time.Time{ - "1350244992.023960108": time.Unix(1350244992, 23960108), // The common case - "1350244992.02396010": time.Unix(1350244992, 23960100), // Lower precision value - "1350244992.0239601089": time.Unix(1350244992, 23960108), // Higher precision value - "1350244992": time.Unix(1350244992, 0), // Low precision value - } - for input, expected := range timestamps { - ts, err := parsePAXTime(input) - if err != nil { - t.Fatal(err) - } - if !ts.Equal(expected) { - t.Fatalf("Time parsing failure %s %s", ts, expected) - } - } -} - -func TestMergePAX(t *testing.T) { - hdr := new(Header) - // Test a string, integer, and time based value. - headers := map[string]string{ - "path": "a/b/c", - "uid": "1000", - "mtime": "1350244992.023960108", - } - err := mergePAX(hdr, headers) - if err != nil { - t.Fatal(err) - } - want := &Header{ - Name: "a/b/c", - Uid: 1000, - ModTime: time.Unix(1350244992, 23960108), - } - if !reflect.DeepEqual(hdr, want) { - t.Errorf("incorrect merge: got %+v, want %+v", hdr, want) - } -} - func TestSparseFileReader(t *testing.T) { - var vectors = []struct { + vectors := []struct { realSize int64 // Real size of the output file sparseMap []sparseEntry // Input sparse map sparseData string // Input compact data @@ -639,9 +581,11 @@ func TestSparseFileReader(t *testing.T) { r := bytes.NewReader([]byte(v.sparseData)) rfr := ®FileReader{r: r, nb: int64(len(v.sparseData))} - var sfr *sparseFileReader - var err error - var buf []byte + var ( + sfr *sparseFileReader + err error + buf []byte + ) sfr, err = newSparseFileReader(rfr, v.sparseMap, v.realSize) if err != nil { @@ -668,6 +612,64 @@ func TestSparseFileReader(t *testing.T) { } } +func TestReadOldGNUSparseMap(t *testing.T) { + const ( + t00 = "00000000000\x0000000000000\x00" + t11 = "00000000001\x0000000000001\x00" + t12 = "00000000001\x0000000000002\x00" + t21 = "00000000002\x0000000000001\x00" + ) + + mkBlk := func(size, sp0, sp1, sp2, sp3, ext string, format int) *block { + var blk block + copy(blk.GNU().RealSize(), size) + copy(blk.GNU().Sparse().Entry(0), sp0) + copy(blk.GNU().Sparse().Entry(1), sp1) + copy(blk.GNU().Sparse().Entry(2), sp2) + copy(blk.GNU().Sparse().Entry(3), sp3) + copy(blk.GNU().Sparse().IsExtended(), ext) + if format != formatUnknown { + blk.SetFormat(format) + } + return &blk + } + + vectors := []struct { + data string // Input data + rawHdr *block // Input raw header + want []sparseEntry // Expected sparse entries to be outputted + err error // Expected error to be returned + }{ + {"", mkBlk("", "", "", "", "", "", formatUnknown), nil, ErrHeader}, + {"", mkBlk("1234", "fewa", "", "", "", "", formatGNU), nil, ErrHeader}, + {"", mkBlk("0031", "", "", "", "", "", formatGNU), nil, nil}, + {"", mkBlk("1234", t00, t11, "", "", "", formatGNU), + []sparseEntry{{0, 0}, {1, 1}}, nil}, + {"", mkBlk("1234", t11, t12, t21, t11, "", formatGNU), + []sparseEntry{{1, 1}, {1, 2}, {2, 1}, {1, 1}}, nil}, + {"", mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU), + []sparseEntry{}, io.ErrUnexpectedEOF}, + {t11 + t11, + mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU), + []sparseEntry{}, io.ErrUnexpectedEOF}, + {t11 + t21 + strings.Repeat("\x00", 512), + mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU), + []sparseEntry{{1, 1}, {1, 2}, {2, 1}, {1, 1}, {1, 1}, {2, 1}}, nil}, + } + + for i, v := range vectors { + tr := Reader{r: strings.NewReader(v.data)} + hdr := new(Header) + got, err := tr.readOldGNUSparseMap(hdr, v.rawHdr) + if !reflect.DeepEqual(got, v.want) && !(len(got) == 0 && len(v.want) == 0) { + t.Errorf("test %d, readOldGNUSparseMap(...): got %v, want %v", i, got, v.want) + } + if err != v.err { + t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) + } + } +} + func TestReadGNUSparseMap0x1(t *testing.T) { const ( maxUint = ^uint(0) @@ -679,7 +681,7 @@ func TestReadGNUSparseMap0x1(t *testing.T) { big3 = fmt.Sprintf("%d", (int64(maxInt) / 3)) ) - var vectors = []struct { + vectors := []struct { extHdrs map[string]string // Input data sparseMap []sparseEntry // Expected sparse entries to be outputted err error // Expected errors that may be raised @@ -745,12 +747,12 @@ func TestReadGNUSparseMap0x1(t *testing.T) { } func TestReadGNUSparseMap1x0(t *testing.T) { - var sp = []sparseEntry{{1, 2}, {3, 4}} + sp := []sparseEntry{{1, 2}, {3, 4}} for i := 0; i < 98; i++ { sp = append(sp, sparseEntry{54321, 12345}) } - var vectors = []struct { + vectors := []struct { input string // Input data sparseMap []sparseEntry // Expected sparse entries to be outputted cnt int // Expected number of bytes read @@ -825,8 +827,7 @@ func TestReadGNUSparseMap1x0(t *testing.T) { } func TestUninitializedRead(t *testing.T) { - test := gnuTarTest - f, err := os.Open(test.file) + f, err := os.Open("testdata/gnu.tar") if err != nil { t.Fatalf("Unexpected error: %v", err) } @@ -868,7 +869,7 @@ func TestReadTruncation(t *testing.T) { data2 += strings.Repeat("\x00", 10*512) trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes - var vectors = []struct { + vectors := []struct { input string // Input stream cnt int // Expected number of headers read err error // Expected error outcome @@ -904,8 +905,7 @@ func TestReadTruncation(t *testing.T) { {pax + trash[:1], 0, io.ErrUnexpectedEOF}, {pax + trash[:511], 0, io.ErrUnexpectedEOF}, {sparse[:511], 0, io.ErrUnexpectedEOF}, - // TODO(dsnet): This should pass, but currently fails. - // {sparse[:512], 0, io.ErrUnexpectedEOF}, + {sparse[:512], 0, io.ErrUnexpectedEOF}, {sparse[:3584], 1, io.EOF}, {sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header {sparse[:9216], 1, io.EOF}, @@ -1002,7 +1002,7 @@ func TestReadHeaderOnly(t *testing.T) { t.Fatalf("len(hdrs): got %d, want %d", len(hdrs), 16) } for i := 0; i < 8; i++ { - var hdr1, hdr2 = hdrs[i+0], hdrs[i+8] + hdr1, hdr2 := hdrs[i+0], hdrs[i+8] hdr1.Size, hdr2.Size = 0, 0 if !reflect.DeepEqual(*hdr1, *hdr2) { t.Errorf("incorrect header:\ngot %+v\nwant %+v", *hdr1, *hdr2) @@ -1010,116 +1010,87 @@ func TestReadHeaderOnly(t *testing.T) { } } -func TestParsePAXRecord(t *testing.T) { - var medName = strings.Repeat("CD", 50) - var longName = strings.Repeat("AB", 100) - - var vectors = []struct { - input string - residual string - outputKey string - outputVal string - ok bool - }{ - {"6 k=v\n\n", "\n", "k", "v", true}, - {"19 path=/etc/hosts\n", "", "path", "/etc/hosts", true}, - {"210 path=" + longName + "\nabc", "abc", "path", longName, true}, - {"110 path=" + medName + "\n", "", "path", medName, true}, - {"9 foo=ba\n", "", "foo", "ba", true}, - {"11 foo=bar\n\x00", "\x00", "foo", "bar", true}, - {"18 foo=b=\nar=\n==\x00\n", "", "foo", "b=\nar=\n==\x00", true}, - {"27 foo=hello9 foo=ba\nworld\n", "", "foo", "hello9 foo=ba\nworld", true}, - {"27 ☺☻☹=日a本b語ç\nmeow mix", "meow mix", "☺☻☹", "日a本b語ç", true}, - {"17 \x00hello=\x00world\n", "", "\x00hello", "\x00world", true}, - {"1 k=1\n", "1 k=1\n", "", "", false}, - {"6 k~1\n", "6 k~1\n", "", "", false}, - {"6_k=1\n", "6_k=1\n", "", "", false}, - {"6 k=1 ", "6 k=1 ", "", "", false}, - {"632 k=1\n", "632 k=1\n", "", "", false}, - {"16 longkeyname=hahaha\n", "16 longkeyname=hahaha\n", "", "", false}, - {"3 somelongkey=\n", "3 somelongkey=\n", "", "", false}, - {"50 tooshort=\n", "50 tooshort=\n", "", "", false}, - } +func TestMergePAX(t *testing.T) { + vectors := []struct { + in map[string]string + want *Header + ok bool + }{{ + in: map[string]string{ + "path": "a/b/c", + "uid": "1000", + "mtime": "1350244992.023960108", + }, + want: &Header{ + Name: "a/b/c", + Uid: 1000, + ModTime: time.Unix(1350244992, 23960108), + }, + ok: true, + }, { + in: map[string]string{ + "gid": "gtgergergersagersgers", + }, + }, { + in: map[string]string{ + "missing": "missing", + "SCHILY.xattr.key": "value", + }, + want: &Header{ + Xattrs: map[string]string{"key": "value"}, + }, + ok: true, + }} - for _, v := range vectors { - key, val, res, err := parsePAXRecord(v.input) - ok := (err == nil) - if v.ok != ok { - if v.ok { - t.Errorf("parsePAXRecord(%q): got parsing failure, want success", v.input) - } else { - t.Errorf("parsePAXRecord(%q): got parsing success, want failure", v.input) - } - } - if ok && (key != v.outputKey || val != v.outputVal) { - t.Errorf("parsePAXRecord(%q): got (%q: %q), want (%q: %q)", - v.input, key, val, v.outputKey, v.outputVal) + for i, v := range vectors { + got := new(Header) + err := mergePAX(got, v.in) + if v.ok && !reflect.DeepEqual(*got, *v.want) { + t.Errorf("test %d, mergePAX(...):\ngot %+v\nwant %+v", i, *got, *v.want) } - if res != v.residual { - t.Errorf("parsePAXRecord(%q): got residual %q, want residual %q", - v.input, res, v.residual) + if ok := err == nil; ok != v.ok { + t.Errorf("test %d, mergePAX(...): got %v, want %v", i, ok, v.ok) } } } -func TestParseNumeric(t *testing.T) { - var vectors = []struct { - input string - output int64 - ok bool +func TestParsePAX(t *testing.T) { + vectors := []struct { + in string + want map[string]string + ok bool }{ - // Test base-256 (binary) encoded values. - {"", 0, true}, - {"\x80", 0, true}, - {"\x80\x00", 0, true}, - {"\x80\x00\x00", 0, true}, - {"\xbf", (1 << 6) - 1, true}, - {"\xbf\xff", (1 << 14) - 1, true}, - {"\xbf\xff\xff", (1 << 22) - 1, true}, - {"\xff", -1, true}, - {"\xff\xff", -1, true}, - {"\xff\xff\xff", -1, true}, - {"\xc0", -1 * (1 << 6), true}, - {"\xc0\x00", -1 * (1 << 14), true}, - {"\xc0\x00\x00", -1 * (1 << 22), true}, - {"\x87\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, - {"\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, - {"\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, - {"\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, - {"\x80\x7f\xff\xff\xff\xff\xff\xff\xff", math.MaxInt64, true}, - {"\x80\x80\x00\x00\x00\x00\x00\x00\x00", 0, false}, - {"\xff\x80\x00\x00\x00\x00\x00\x00\x00", math.MinInt64, true}, - {"\xff\x7f\xff\xff\xff\xff\xff\xff\xff", 0, false}, - {"\xf5\xec\xd1\xc7\x7e\x5f\x26\x48\x81\x9f\x8f\x9b", 0, false}, - - // Test base-8 (octal) encoded values. - {"0000000\x00", 0, true}, - {" \x0000000\x00", 0, true}, - {" \x0000003\x00", 3, true}, - {"00000000227\x00", 0227, true}, - {"032033\x00 ", 032033, true}, - {"320330\x00 ", 0320330, true}, - {"0000660\x00 ", 0660, true}, - {"\x00 0000660\x00 ", 0660, true}, - {"0123456789abcdef", 0, false}, - {"0123456789\x00abcdef", 0, false}, - {"01234567\x0089abcdef", 342391, true}, - {"0123\x7e\x5f\x264123", 0, false}, + {"", nil, true}, + {"6 k=1\n", map[string]string{"k": "1"}, true}, + {"10 a=name\n", map[string]string{"a": "name"}, true}, + {"9 a=name\n", map[string]string{"a": "name"}, true}, + {"30 mtime=1350244992.023960108\n", map[string]string{"mtime": "1350244992.023960108"}, true}, + {"3 somelongkey=\n", nil, false}, + {"50 tooshort=\n", nil, false}, + {"13 key1=haha\n13 key2=nana\n13 key3=kaka\n", + map[string]string{"key1": "haha", "key2": "nana", "key3": "kaka"}, true}, + {"13 key1=val1\n13 key2=val2\n8 key1=\n", + map[string]string{"key2": "val2"}, true}, + {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=2\n" + + "23 GNU.sparse.offset=1\n25 GNU.sparse.numbytes=2\n" + + "23 GNU.sparse.offset=3\n25 GNU.sparse.numbytes=4\n", + map[string]string{paxGNUSparseSize: "10", paxGNUSparseNumBlocks: "2", paxGNUSparseMap: "1,2,3,4"}, true}, + {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=1\n" + + "25 GNU.sparse.numbytes=2\n23 GNU.sparse.offset=1\n", + nil, false}, + {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=1\n" + + "25 GNU.sparse.offset=1,2\n25 GNU.sparse.numbytes=2\n", + nil, false}, } - for _, v := range vectors { - var p parser - num := p.parseNumeric([]byte(v.input)) - ok := (p.err == nil) - if v.ok != ok { - if v.ok { - t.Errorf("parseNumeric(%q): got parsing failure, want success", v.input) - } else { - t.Errorf("parseNumeric(%q): got parsing success, want failure", v.input) - } + for i, v := range vectors { + r := strings.NewReader(v.in) + got, err := parsePAX(r) + if !reflect.DeepEqual(got, v.want) && !(len(got) == 0 && len(v.want) == 0) { + t.Errorf("test %d, parsePAX(...):\ngot %v\nwant %v", i, got, v.want) } - if ok && num != v.output { - t.Errorf("parseNumeric(%q): got %d, want %d", v.input, num, v.output) + if ok := err == nil; ok != v.ok { + t.Errorf("test %d, parsePAX(...): got %v, want %v", i, ok, v.ok) } } } diff --git a/libgo/go/archive/tar/strconv.go b/libgo/go/archive/tar/strconv.go new file mode 100644 index 00000000000..bb5b51c02de --- /dev/null +++ b/libgo/go/archive/tar/strconv.go @@ -0,0 +1,252 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "fmt" + "strconv" + "strings" + "time" +) + +func isASCII(s string) bool { + for _, c := range s { + if c >= 0x80 { + return false + } + } + return true +} + +func toASCII(s string) string { + if isASCII(s) { + return s + } + var buf bytes.Buffer + for _, c := range s { + if c < 0x80 { + buf.WriteByte(byte(c)) + } + } + return buf.String() +} + +type parser struct { + err error // Last error seen +} + +type formatter struct { + err error // Last error seen +} + +// parseString parses bytes as a NUL-terminated C-style string. +// If a NUL byte is not found then the whole slice is returned as a string. +func (*parser) parseString(b []byte) string { + n := 0 + for n < len(b) && b[n] != 0 { + n++ + } + return string(b[0:n]) +} + +// Write s into b, terminating it with a NUL if there is room. +func (f *formatter) formatString(b []byte, s string) { + if len(s) > len(b) { + f.err = ErrFieldTooLong + return + } + ascii := toASCII(s) + copy(b, ascii) + if len(ascii) < len(b) { + b[len(ascii)] = 0 + } +} + +// fitsInBase256 reports whether x can be encoded into n bytes using base-256 +// encoding. Unlike octal encoding, base-256 encoding does not require that the +// string ends with a NUL character. Thus, all n bytes are available for output. +// +// If operating in binary mode, this assumes strict GNU binary mode; which means +// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is +// equivalent to the sign bit in two's complement form. +func fitsInBase256(n int, x int64) bool { + var binBits = uint(n-1) * 8 + return n >= 9 || (x >= -1<<binBits && x < 1<<binBits) +} + +// parseNumeric parses the input as being encoded in either base-256 or octal. +// This function may return negative numbers. +// If parsing fails or an integer overflow occurs, err will be set. +func (p *parser) parseNumeric(b []byte) int64 { + // Check for base-256 (binary) format first. + // If the first bit is set, then all following bits constitute a two's + // complement encoded number in big-endian byte order. + if len(b) > 0 && b[0]&0x80 != 0 { + // Handling negative numbers relies on the following identity: + // -a-1 == ^a + // + // If the number is negative, we use an inversion mask to invert the + // data bytes and treat the value as an unsigned number. + var inv byte // 0x00 if positive or zero, 0xff if negative + if b[0]&0x40 != 0 { + inv = 0xff + } + + var x uint64 + for i, c := range b { + c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing + if i == 0 { + c &= 0x7f // Ignore signal bit in first byte + } + if (x >> 56) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + x = x<<8 | uint64(c) + } + if (x >> 63) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + if inv == 0xff { + return ^int64(x) + } + return int64(x) + } + + // Normal case is base-8 (octal) format. + return p.parseOctal(b) +} + +// Write x into b, as binary (GNUtar/star extension). +func (f *formatter) formatNumeric(b []byte, x int64) { + if fitsInBase256(len(b), x) { + for i := len(b) - 1; i >= 0; i-- { + b[i] = byte(x) + x >>= 8 + } + b[0] |= 0x80 // Highest bit indicates binary format + return + } + + f.formatOctal(b, 0) // Last resort, just write zero + f.err = ErrFieldTooLong +} + +func (p *parser) parseOctal(b []byte) int64 { + // Because unused fields are filled with NULs, we need + // to skip leading NULs. Fields may also be padded with + // spaces or NULs. + // So we remove leading and trailing NULs and spaces to + // be sure. + b = bytes.Trim(b, " \x00") + + if len(b) == 0 { + return 0 + } + x, perr := strconv.ParseUint(p.parseString(b), 8, 64) + if perr != nil { + p.err = ErrHeader + } + return int64(x) +} + +func (f *formatter) formatOctal(b []byte, x int64) { + s := strconv.FormatInt(x, 8) + // Add leading zeros, but leave room for a NUL. + if n := len(b) - len(s) - 1; n > 0 { + s = strings.Repeat("0", n) + s + } + f.formatString(b, s) +} + +// parsePAXTime takes a string of the form %d.%d as described in the PAX +// specification. Note that this implementation allows for negative timestamps, +// which is allowed for by the PAX specification, but not always portable. +func parsePAXTime(s string) (time.Time, error) { + const maxNanoSecondDigits = 9 + + // Split string into seconds and sub-seconds parts. + ss, sn := s, "" + if pos := strings.IndexByte(s, '.'); pos >= 0 { + ss, sn = s[:pos], s[pos+1:] + } + + // Parse the seconds. + secs, err := strconv.ParseInt(ss, 10, 64) + if err != nil { + return time.Time{}, ErrHeader + } + if len(sn) == 0 { + return time.Unix(secs, 0), nil // No sub-second values + } + + // Parse the nanoseconds. + if strings.Trim(sn, "0123456789") != "" { + return time.Time{}, ErrHeader + } + if len(sn) < maxNanoSecondDigits { + sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad + } else { + sn = sn[:maxNanoSecondDigits] // Right truncate + } + nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed + if len(ss) > 0 && ss[0] == '-' { + return time.Unix(secs, -1*int64(nsecs)), nil // Negative correction + } + return time.Unix(secs, int64(nsecs)), nil +} + +// TODO(dsnet): Implement formatPAXTime. + +// parsePAXRecord parses the input PAX record string into a key-value pair. +// If parsing is successful, it will slice off the currently read record and +// return the remainder as r. +// +// A PAX record is of the following form: +// "%d %s=%s\n" % (size, key, value) +func parsePAXRecord(s string) (k, v, r string, err error) { + // The size field ends at the first space. + sp := strings.IndexByte(s, ' ') + if sp == -1 { + return "", "", s, ErrHeader + } + + // Parse the first token as a decimal integer. + n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int + if perr != nil || n < 5 || int64(len(s)) < n { + return "", "", s, ErrHeader + } + + // Extract everything between the space and the final newline. + rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] + if nl != "\n" { + return "", "", s, ErrHeader + } + + // The first equals separates the key from the value. + eq := strings.IndexByte(rec, '=') + if eq == -1 { + return "", "", s, ErrHeader + } + return rec[:eq], rec[eq+1:], rem, nil +} + +// formatPAXRecord formats a single PAX record, prefixing it with the +// appropriate length. +func formatPAXRecord(k, v string) string { + const padding = 3 // Extra padding for ' ', '=', and '\n' + size := len(k) + len(v) + padding + size += len(strconv.Itoa(size)) + record := fmt.Sprintf("%d %s=%s\n", size, k, v) + + // Final adjustment if adding size field increased the record size. + if len(record) != size { + size = len(record) + record = fmt.Sprintf("%d %s=%s\n", size, k, v) + } + return record +} diff --git a/libgo/go/archive/tar/strconv_test.go b/libgo/go/archive/tar/strconv_test.go new file mode 100644 index 00000000000..beb70938bfd --- /dev/null +++ b/libgo/go/archive/tar/strconv_test.go @@ -0,0 +1,319 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "math" + "strings" + "testing" + "time" +) + +func TestFitsInBase256(t *testing.T) { + vectors := []struct { + in int64 + width int + ok bool + }{ + {+1, 8, true}, + {0, 8, true}, + {-1, 8, true}, + {1 << 56, 8, false}, + {(1 << 56) - 1, 8, true}, + {-1 << 56, 8, true}, + {(-1 << 56) - 1, 8, false}, + {121654, 8, true}, + {-9849849, 8, true}, + {math.MaxInt64, 9, true}, + {0, 9, true}, + {math.MinInt64, 9, true}, + {math.MaxInt64, 12, true}, + {0, 12, true}, + {math.MinInt64, 12, true}, + } + + for _, v := range vectors { + ok := fitsInBase256(v.width, v.in) + if ok != v.ok { + t.Errorf("fitsInBase256(%d, %d): got %v, want %v", v.in, v.width, ok, v.ok) + } + } +} + +func TestParseNumeric(t *testing.T) { + vectors := []struct { + in string + want int64 + ok bool + }{ + // Test base-256 (binary) encoded values. + {"", 0, true}, + {"\x80", 0, true}, + {"\x80\x00", 0, true}, + {"\x80\x00\x00", 0, true}, + {"\xbf", (1 << 6) - 1, true}, + {"\xbf\xff", (1 << 14) - 1, true}, + {"\xbf\xff\xff", (1 << 22) - 1, true}, + {"\xff", -1, true}, + {"\xff\xff", -1, true}, + {"\xff\xff\xff", -1, true}, + {"\xc0", -1 * (1 << 6), true}, + {"\xc0\x00", -1 * (1 << 14), true}, + {"\xc0\x00\x00", -1 * (1 << 22), true}, + {"\x87\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, + {"\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, + {"\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, + {"\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, + {"\x80\x7f\xff\xff\xff\xff\xff\xff\xff", math.MaxInt64, true}, + {"\x80\x80\x00\x00\x00\x00\x00\x00\x00", 0, false}, + {"\xff\x80\x00\x00\x00\x00\x00\x00\x00", math.MinInt64, true}, + {"\xff\x7f\xff\xff\xff\xff\xff\xff\xff", 0, false}, + {"\xf5\xec\xd1\xc7\x7e\x5f\x26\x48\x81\x9f\x8f\x9b", 0, false}, + + // Test base-8 (octal) encoded values. + {"0000000\x00", 0, true}, + {" \x0000000\x00", 0, true}, + {" \x0000003\x00", 3, true}, + {"00000000227\x00", 0227, true}, + {"032033\x00 ", 032033, true}, + {"320330\x00 ", 0320330, true}, + {"0000660\x00 ", 0660, true}, + {"\x00 0000660\x00 ", 0660, true}, + {"0123456789abcdef", 0, false}, + {"0123456789\x00abcdef", 0, false}, + {"01234567\x0089abcdef", 342391, true}, + {"0123\x7e\x5f\x264123", 0, false}, + } + + for _, v := range vectors { + var p parser + got := p.parseNumeric([]byte(v.in)) + ok := (p.err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("parseNumeric(%q): got parsing failure, want success", v.in) + } else { + t.Errorf("parseNumeric(%q): got parsing success, want failure", v.in) + } + } + if ok && got != v.want { + t.Errorf("parseNumeric(%q): got %d, want %d", v.in, got, v.want) + } + } +} + +func TestFormatNumeric(t *testing.T) { + vectors := []struct { + in int64 + want string + ok bool + }{ + // Test base-256 (binary) encoded values. + {-1, "\xff", true}, + {-1, "\xff\xff", true}, + {-1, "\xff\xff\xff", true}, + {(1 << 0), "0", false}, + {(1 << 8) - 1, "\x80\xff", true}, + {(1 << 8), "0\x00", false}, + {(1 << 16) - 1, "\x80\xff\xff", true}, + {(1 << 16), "00\x00", false}, + {-1 * (1 << 0), "\xff", true}, + {-1*(1<<0) - 1, "0", false}, + {-1 * (1 << 8), "\xff\x00", true}, + {-1*(1<<8) - 1, "0\x00", false}, + {-1 * (1 << 16), "\xff\x00\x00", true}, + {-1*(1<<16) - 1, "00\x00", false}, + {537795476381659745, "0000000\x00", false}, + {537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true}, + {-615126028225187231, "0000000\x00", false}, + {-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true}, + {math.MaxInt64, "0000000\x00", false}, + {math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true}, + {math.MinInt64, "0000000\x00", false}, + {math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, + {math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true}, + {math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, + } + + for _, v := range vectors { + var f formatter + got := make([]byte, len(v.want)) + f.formatNumeric(got, v.in) + ok := (f.err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("formatNumeric(%d): got formatting failure, want success", v.in) + } else { + t.Errorf("formatNumeric(%d): got formatting success, want failure", v.in) + } + } + if string(got) != v.want { + t.Errorf("formatNumeric(%d): got %q, want %q", v.in, got, v.want) + } + } +} + +func TestParsePAXTime(t *testing.T) { + vectors := []struct { + in string + want time.Time + ok bool + }{ + {"1350244992.023960108", time.Unix(1350244992, 23960108), true}, + {"1350244992.02396010", time.Unix(1350244992, 23960100), true}, + {"1350244992.0239601089", time.Unix(1350244992, 23960108), true}, + {"1350244992.3", time.Unix(1350244992, 300000000), true}, + {"1350244992", time.Unix(1350244992, 0), true}, + {"-1.000000001", time.Unix(-1, -1e0+0e0), true}, + {"-1.000001", time.Unix(-1, -1e3+0e0), true}, + {"-1.001000", time.Unix(-1, -1e6+0e0), true}, + {"-1", time.Unix(-1, -0e0+0e0), true}, + {"-1.999000", time.Unix(-1, -1e9+1e6), true}, + {"-1.999999", time.Unix(-1, -1e9+1e3), true}, + {"-1.999999999", time.Unix(-1, -1e9+1e0), true}, + {"0.000000001", time.Unix(0, 1e0+0e0), true}, + {"0.000001", time.Unix(0, 1e3+0e0), true}, + {"0.001000", time.Unix(0, 1e6+0e0), true}, + {"0", time.Unix(0, 0e0), true}, + {"0.999000", time.Unix(0, 1e9-1e6), true}, + {"0.999999", time.Unix(0, 1e9-1e3), true}, + {"0.999999999", time.Unix(0, 1e9-1e0), true}, + {"1.000000001", time.Unix(+1, +1e0-0e0), true}, + {"1.000001", time.Unix(+1, +1e3-0e0), true}, + {"1.001000", time.Unix(+1, +1e6-0e0), true}, + {"1", time.Unix(+1, +0e0-0e0), true}, + {"1.999000", time.Unix(+1, +1e9-1e6), true}, + {"1.999999", time.Unix(+1, +1e9-1e3), true}, + {"1.999999999", time.Unix(+1, +1e9-1e0), true}, + {"-1350244992.023960108", time.Unix(-1350244992, -23960108), true}, + {"-1350244992.02396010", time.Unix(-1350244992, -23960100), true}, + {"-1350244992.0239601089", time.Unix(-1350244992, -23960108), true}, + {"-1350244992.3", time.Unix(-1350244992, -300000000), true}, + {"-1350244992", time.Unix(-1350244992, 0), true}, + {"", time.Time{}, false}, + {"0", time.Unix(0, 0), true}, + {"1.", time.Unix(1, 0), true}, + {"0.0", time.Unix(0, 0), true}, + {".5", time.Time{}, false}, + {"-1.3", time.Unix(-1, -3e8), true}, + {"-1.0", time.Unix(-1, -0e0), true}, + {"-0.0", time.Unix(-0, -0e0), true}, + {"-0.1", time.Unix(-0, -1e8), true}, + {"-0.01", time.Unix(-0, -1e7), true}, + {"-0.99", time.Unix(-0, -99e7), true}, + {"-0.98", time.Unix(-0, -98e7), true}, + {"-1.1", time.Unix(-1, -1e8), true}, + {"-1.01", time.Unix(-1, -1e7), true}, + {"-2.99", time.Unix(-2, -99e7), true}, + {"-5.98", time.Unix(-5, -98e7), true}, + {"-", time.Time{}, false}, + {"+", time.Time{}, false}, + {"-1.-1", time.Time{}, false}, + {"99999999999999999999999999999999999999999999999", time.Time{}, false}, + {"0.123456789abcdef", time.Time{}, false}, + {"foo", time.Time{}, false}, + {"\x00", time.Time{}, false}, + {"𝟵𝟴𝟳𝟲𝟱.𝟰𝟯𝟮𝟭𝟬", time.Time{}, false}, // Unicode numbers (U+1D7EC to U+1D7F5) + {"98765﹒43210", time.Time{}, false}, // Unicode period (U+FE52) + } + + for _, v := range vectors { + ts, err := parsePAXTime(v.in) + ok := (err == nil) + if v.ok != ok { + if v.ok { + t.Errorf("parsePAXTime(%q): got parsing failure, want success", v.in) + } else { + t.Errorf("parsePAXTime(%q): got parsing success, want failure", v.in) + } + } + if ok && !ts.Equal(v.want) { + t.Errorf("parsePAXTime(%q): got (%ds %dns), want (%ds %dns)", + v.in, ts.Unix(), ts.Nanosecond(), v.want.Unix(), v.want.Nanosecond()) + } + } +} + +func TestParsePAXRecord(t *testing.T) { + medName := strings.Repeat("CD", 50) + longName := strings.Repeat("AB", 100) + + vectors := []struct { + in string + wantRes string + wantKey string + wantVal string + ok bool + }{ + {"6 k=v\n\n", "\n", "k", "v", true}, + {"19 path=/etc/hosts\n", "", "path", "/etc/hosts", true}, + {"210 path=" + longName + "\nabc", "abc", "path", longName, true}, + {"110 path=" + medName + "\n", "", "path", medName, true}, + {"9 foo=ba\n", "", "foo", "ba", true}, + {"11 foo=bar\n\x00", "\x00", "foo", "bar", true}, + {"18 foo=b=\nar=\n==\x00\n", "", "foo", "b=\nar=\n==\x00", true}, + {"27 foo=hello9 foo=ba\nworld\n", "", "foo", "hello9 foo=ba\nworld", true}, + {"27 ☺☻☹=日a本b語ç\nmeow mix", "meow mix", "☺☻☹", "日a本b語ç", true}, + {"17 \x00hello=\x00world\n", "", "\x00hello", "\x00world", true}, + {"1 k=1\n", "1 k=1\n", "", "", false}, + {"6 k~1\n", "6 k~1\n", "", "", false}, + {"6_k=1\n", "6_k=1\n", "", "", false}, + {"6 k=1 ", "6 k=1 ", "", "", false}, + {"632 k=1\n", "632 k=1\n", "", "", false}, + {"16 longkeyname=hahaha\n", "16 longkeyname=hahaha\n", "", "", false}, + {"3 somelongkey=\n", "3 somelongkey=\n", "", "", false}, + {"50 tooshort=\n", "50 tooshort=\n", "", "", false}, + } + + for _, v := range vectors { + key, val, res, err := parsePAXRecord(v.in) + ok := (err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("parsePAXRecord(%q): got parsing failure, want success", v.in) + } else { + t.Errorf("parsePAXRecord(%q): got parsing success, want failure", v.in) + } + } + if v.ok && (key != v.wantKey || val != v.wantVal) { + t.Errorf("parsePAXRecord(%q): got (%q: %q), want (%q: %q)", + v.in, key, val, v.wantKey, v.wantVal) + } + if res != v.wantRes { + t.Errorf("parsePAXRecord(%q): got residual %q, want residual %q", + v.in, res, v.wantRes) + } + } +} + +func TestFormatPAXRecord(t *testing.T) { + medName := strings.Repeat("CD", 50) + longName := strings.Repeat("AB", 100) + + vectors := []struct { + inKey string + inVal string + want string + }{ + {"k", "v", "6 k=v\n"}, + {"path", "/etc/hosts", "19 path=/etc/hosts\n"}, + {"path", longName, "210 path=" + longName + "\n"}, + {"path", medName, "110 path=" + medName + "\n"}, + {"foo", "ba", "9 foo=ba\n"}, + {"foo", "bar", "11 foo=bar\n"}, + {"foo", "b=\nar=\n==\x00", "18 foo=b=\nar=\n==\x00\n"}, + {"foo", "hello9 foo=ba\nworld", "27 foo=hello9 foo=ba\nworld\n"}, + {"☺☻☹", "日a本b語ç", "27 ☺☻☹=日a本b語ç\n"}, + {"\x00hello", "\x00world", "17 \x00hello=\x00world\n"}, + } + + for _, v := range vectors { + got := formatPAXRecord(v.inKey, v.inVal) + if got != v.want { + t.Errorf("formatPAXRecord(%q, %q): got %q, want %q", + v.inKey, v.inVal, got, v.want) + } + } +} diff --git a/libgo/go/archive/tar/tar_test.go b/libgo/go/archive/tar/tar_test.go index d63c072eb9a..cf8337c2ad9 100644 --- a/libgo/go/archive/tar/tar_test.go +++ b/libgo/go/archive/tar/tar_test.go @@ -135,190 +135,178 @@ type headerRoundTripTest struct { } func TestHeaderRoundTrip(t *testing.T) { - golden := []headerRoundTripTest{ + vectors := []headerRoundTripTest{{ // regular file. - { - h: &Header{ - Name: "test.txt", - Mode: 0644 | c_ISREG, - Size: 12, - ModTime: time.Unix(1360600916, 0), - Typeflag: TypeReg, - }, - fm: 0644, + h: &Header{ + Name: "test.txt", + Mode: 0644 | c_ISREG, + Size: 12, + ModTime: time.Unix(1360600916, 0), + Typeflag: TypeReg, }, + fm: 0644, + }, { // symbolic link. - { - h: &Header{ - Name: "link.txt", - Mode: 0777 | c_ISLNK, - Size: 0, - ModTime: time.Unix(1360600852, 0), - Typeflag: TypeSymlink, - }, - fm: 0777 | os.ModeSymlink, + h: &Header{ + Name: "link.txt", + Mode: 0777 | c_ISLNK, + Size: 0, + ModTime: time.Unix(1360600852, 0), + Typeflag: TypeSymlink, }, + fm: 0777 | os.ModeSymlink, + }, { // character device node. - { - h: &Header{ - Name: "dev/null", - Mode: 0666 | c_ISCHR, - Size: 0, - ModTime: time.Unix(1360578951, 0), - Typeflag: TypeChar, - }, - fm: 0666 | os.ModeDevice | os.ModeCharDevice, + h: &Header{ + Name: "dev/null", + Mode: 0666 | c_ISCHR, + Size: 0, + ModTime: time.Unix(1360578951, 0), + Typeflag: TypeChar, }, + fm: 0666 | os.ModeDevice | os.ModeCharDevice, + }, { // block device node. - { - h: &Header{ - Name: "dev/sda", - Mode: 0660 | c_ISBLK, - Size: 0, - ModTime: time.Unix(1360578954, 0), - Typeflag: TypeBlock, - }, - fm: 0660 | os.ModeDevice, + h: &Header{ + Name: "dev/sda", + Mode: 0660 | c_ISBLK, + Size: 0, + ModTime: time.Unix(1360578954, 0), + Typeflag: TypeBlock, }, + fm: 0660 | os.ModeDevice, + }, { // directory. - { - h: &Header{ - Name: "dir/", - Mode: 0755 | c_ISDIR, - Size: 0, - ModTime: time.Unix(1360601116, 0), - Typeflag: TypeDir, - }, - fm: 0755 | os.ModeDir, + h: &Header{ + Name: "dir/", + Mode: 0755 | c_ISDIR, + Size: 0, + ModTime: time.Unix(1360601116, 0), + Typeflag: TypeDir, }, + fm: 0755 | os.ModeDir, + }, { // fifo node. - { - h: &Header{ - Name: "dev/initctl", - Mode: 0600 | c_ISFIFO, - Size: 0, - ModTime: time.Unix(1360578949, 0), - Typeflag: TypeFifo, - }, - fm: 0600 | os.ModeNamedPipe, + h: &Header{ + Name: "dev/initctl", + Mode: 0600 | c_ISFIFO, + Size: 0, + ModTime: time.Unix(1360578949, 0), + Typeflag: TypeFifo, }, + fm: 0600 | os.ModeNamedPipe, + }, { // setuid. - { - h: &Header{ - Name: "bin/su", - Mode: 0755 | c_ISREG | c_ISUID, - Size: 23232, - ModTime: time.Unix(1355405093, 0), - Typeflag: TypeReg, - }, - fm: 0755 | os.ModeSetuid, + h: &Header{ + Name: "bin/su", + Mode: 0755 | c_ISREG | c_ISUID, + Size: 23232, + ModTime: time.Unix(1355405093, 0), + Typeflag: TypeReg, }, + fm: 0755 | os.ModeSetuid, + }, { // setguid. - { - h: &Header{ - Name: "group.txt", - Mode: 0750 | c_ISREG | c_ISGID, - Size: 0, - ModTime: time.Unix(1360602346, 0), - Typeflag: TypeReg, - }, - fm: 0750 | os.ModeSetgid, + h: &Header{ + Name: "group.txt", + Mode: 0750 | c_ISREG | c_ISGID, + Size: 0, + ModTime: time.Unix(1360602346, 0), + Typeflag: TypeReg, }, + fm: 0750 | os.ModeSetgid, + }, { // sticky. - { - h: &Header{ - Name: "sticky.txt", - Mode: 0600 | c_ISREG | c_ISVTX, - Size: 7, - ModTime: time.Unix(1360602540, 0), - Typeflag: TypeReg, - }, - fm: 0600 | os.ModeSticky, + h: &Header{ + Name: "sticky.txt", + Mode: 0600 | c_ISREG | c_ISVTX, + Size: 7, + ModTime: time.Unix(1360602540, 0), + Typeflag: TypeReg, }, + fm: 0600 | os.ModeSticky, + }, { // hard link. - { - h: &Header{ - Name: "hard.txt", - Mode: 0644 | c_ISREG, - Size: 0, - Linkname: "file.txt", - ModTime: time.Unix(1360600916, 0), - Typeflag: TypeLink, - }, - fm: 0644, + h: &Header{ + Name: "hard.txt", + Mode: 0644 | c_ISREG, + Size: 0, + Linkname: "file.txt", + ModTime: time.Unix(1360600916, 0), + Typeflag: TypeLink, }, + fm: 0644, + }, { // More information. - { - h: &Header{ - Name: "info.txt", - Mode: 0600 | c_ISREG, - Size: 0, - Uid: 1000, - Gid: 1000, - ModTime: time.Unix(1360602540, 0), - Uname: "slartibartfast", - Gname: "users", - Typeflag: TypeReg, - }, - fm: 0600, + h: &Header{ + Name: "info.txt", + Mode: 0600 | c_ISREG, + Size: 0, + Uid: 1000, + Gid: 1000, + ModTime: time.Unix(1360602540, 0), + Uname: "slartibartfast", + Gname: "users", + Typeflag: TypeReg, }, - } + fm: 0600, + }} - for i, g := range golden { - fi := g.h.FileInfo() + for i, v := range vectors { + fi := v.h.FileInfo() h2, err := FileInfoHeader(fi, "") if err != nil { t.Error(err) continue } if strings.Contains(fi.Name(), "/") { - t.Errorf("FileInfo of %q contains slash: %q", g.h.Name, fi.Name()) + t.Errorf("FileInfo of %q contains slash: %q", v.h.Name, fi.Name()) } - name := path.Base(g.h.Name) + name := path.Base(v.h.Name) if fi.IsDir() { name += "/" } if got, want := h2.Name, name; got != want { t.Errorf("i=%d: Name: got %v, want %v", i, got, want) } - if got, want := h2.Size, g.h.Size; got != want { + if got, want := h2.Size, v.h.Size; got != want { t.Errorf("i=%d: Size: got %v, want %v", i, got, want) } - if got, want := h2.Uid, g.h.Uid; got != want { + if got, want := h2.Uid, v.h.Uid; got != want { t.Errorf("i=%d: Uid: got %d, want %d", i, got, want) } - if got, want := h2.Gid, g.h.Gid; got != want { + if got, want := h2.Gid, v.h.Gid; got != want { t.Errorf("i=%d: Gid: got %d, want %d", i, got, want) } - if got, want := h2.Uname, g.h.Uname; got != want { + if got, want := h2.Uname, v.h.Uname; got != want { t.Errorf("i=%d: Uname: got %q, want %q", i, got, want) } - if got, want := h2.Gname, g.h.Gname; got != want { + if got, want := h2.Gname, v.h.Gname; got != want { t.Errorf("i=%d: Gname: got %q, want %q", i, got, want) } - if got, want := h2.Linkname, g.h.Linkname; got != want { + if got, want := h2.Linkname, v.h.Linkname; got != want { t.Errorf("i=%d: Linkname: got %v, want %v", i, got, want) } - if got, want := h2.Typeflag, g.h.Typeflag; got != want { - t.Logf("%#v %#v", g.h, fi.Sys()) + if got, want := h2.Typeflag, v.h.Typeflag; got != want { + t.Logf("%#v %#v", v.h, fi.Sys()) t.Errorf("i=%d: Typeflag: got %q, want %q", i, got, want) } - if got, want := h2.Mode, g.h.Mode; got != want { + if got, want := h2.Mode, v.h.Mode; got != want { t.Errorf("i=%d: Mode: got %o, want %o", i, got, want) } - if got, want := fi.Mode(), g.fm; got != want { + if got, want := fi.Mode(), v.fm; got != want { t.Errorf("i=%d: fi.Mode: got %o, want %o", i, got, want) } - if got, want := h2.AccessTime, g.h.AccessTime; got != want { + if got, want := h2.AccessTime, v.h.AccessTime; got != want { t.Errorf("i=%d: AccessTime: got %v, want %v", i, got, want) } - if got, want := h2.ChangeTime, g.h.ChangeTime; got != want { + if got, want := h2.ChangeTime, v.h.ChangeTime; got != want { t.Errorf("i=%d: ChangeTime: got %v, want %v", i, got, want) } - if got, want := h2.ModTime, g.h.ModTime; got != want { + if got, want := h2.ModTime, v.h.ModTime; got != want { t.Errorf("i=%d: ModTime: got %v, want %v", i, got, want) } - if sysh, ok := fi.Sys().(*Header); !ok || sysh != g.h { + if sysh, ok := fi.Sys().(*Header); !ok || sysh != v.h { t.Errorf("i=%d: Sys didn't return original *Header", i) } } diff --git a/libgo/go/archive/tar/testdata/gnu-incremental.tar b/libgo/go/archive/tar/testdata/gnu-incremental.tar Binary files differnew file mode 100644 index 00000000000..4c442e5b82d --- /dev/null +++ b/libgo/go/archive/tar/testdata/gnu-incremental.tar diff --git a/libgo/go/archive/tar/testdata/pax-bad-hdr-file.tar b/libgo/go/archive/tar/testdata/pax-bad-hdr-file.tar Binary files differnew file mode 100644 index 00000000000..b97cc981f29 --- /dev/null +++ b/libgo/go/archive/tar/testdata/pax-bad-hdr-file.tar diff --git a/libgo/go/archive/tar/testdata/pax-bad-mtime-file.tar b/libgo/go/archive/tar/testdata/pax-bad-mtime-file.tar Binary files differnew file mode 100644 index 00000000000..9b22f7e8d94 --- /dev/null +++ b/libgo/go/archive/tar/testdata/pax-bad-mtime-file.tar diff --git a/libgo/go/archive/tar/testdata/pax-pos-size-file.tar b/libgo/go/archive/tar/testdata/pax-pos-size-file.tar Binary files differnew file mode 100644 index 00000000000..aed9a8aa48f --- /dev/null +++ b/libgo/go/archive/tar/testdata/pax-pos-size-file.tar diff --git a/libgo/go/archive/tar/testdata/ustar.issue12594.tar b/libgo/go/archive/tar/testdata/ustar.issue12594.tar Binary files differnew file mode 100644 index 00000000000..c7910ae9f43 --- /dev/null +++ b/libgo/go/archive/tar/testdata/ustar.issue12594.tar diff --git a/libgo/go/archive/tar/testdata/writer-big-long.tar b/libgo/go/archive/tar/testdata/writer-big-long.tar Binary files differindex 5960ee82478..52bd748f3b2 100644 --- a/libgo/go/archive/tar/testdata/writer-big-long.tar +++ b/libgo/go/archive/tar/testdata/writer-big-long.tar diff --git a/libgo/go/archive/tar/writer.go b/libgo/go/archive/tar/writer.go index 426e4434eb7..596fb8b9e17 100644 --- a/libgo/go/archive/tar/writer.go +++ b/libgo/go/archive/tar/writer.go @@ -42,10 +42,6 @@ type Writer struct { paxHdrBuff block // buffer to use in writeHeader when writing a PAX header } -type formatter struct { - err error // Last error seen -} - // NewWriter creates a new Writer writing to w. func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } @@ -71,56 +67,6 @@ func (tw *Writer) Flush() error { return tw.err } -// Write s into b, terminating it with a NUL if there is room. -func (f *formatter) formatString(b []byte, s string) { - if len(s) > len(b) { - f.err = ErrFieldTooLong - return - } - ascii := toASCII(s) - copy(b, ascii) - if len(ascii) < len(b) { - b[len(ascii)] = 0 - } -} - -// Encode x as an octal ASCII string and write it into b with leading zeros. -func (f *formatter) formatOctal(b []byte, x int64) { - s := strconv.FormatInt(x, 8) - // leading zeros, but leave room for a NUL. - for len(s)+1 < len(b) { - s = "0" + s - } - f.formatString(b, s) -} - -// fitsInBase256 reports whether x can be encoded into n bytes using base-256 -// encoding. Unlike octal encoding, base-256 encoding does not require that the -// string ends with a NUL character. Thus, all n bytes are available for output. -// -// If operating in binary mode, this assumes strict GNU binary mode; which means -// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is -// equivalent to the sign bit in two's complement form. -func fitsInBase256(n int, x int64) bool { - var binBits = uint(n-1) * 8 - return n >= 9 || (x >= -1<<binBits && x < 1<<binBits) -} - -// Write x into b, as binary (GNUtar/star extension). -func (f *formatter) formatNumeric(b []byte, x int64) { - if fitsInBase256(len(b), x) { - for i := len(b) - 1; i >= 0; i-- { - b[i] = byte(x) - x >>= 8 - } - b[0] |= 0x80 // Highest bit indicates binary format - return - } - - f.formatOctal(b, 0) // Last resort, just write zero - f.err = ErrFieldTooLong -} - var ( minTime = time.Unix(0, 0) // There is room for 11 octal digits (33 bits) of mtime. @@ -224,9 +170,41 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { formatNumeric(ustar.DevMajor(), hdr.Devmajor, paxNone) formatNumeric(ustar.DevMinor(), hdr.Devminor, paxNone) + // TODO(dsnet): The logic surrounding the prefix field is broken when trying + // to encode the header as GNU format. The challenge with the current logic + // is that we are unsure what format we are using at any given moment until + // we have processed *all* of the fields. The problem is that by the time + // all fields have been processed, some work has already been done to handle + // each field under the assumption that it is for one given format or + // another. In some situations, this causes the Writer to be confused and + // encode a prefix field when the format being used is GNU. Thus, producing + // an invalid tar file. + // + // As a short-term fix, we disable the logic to use the prefix field, which + // will force the badly generated GNU files to become encoded as being + // the PAX format. + // + // As an alternative fix, we could hard-code preferPax to be true. However, + // this is problematic for the following reasons: + // * The preferPax functionality is not tested at all. + // * This can result in headers that try to use both the GNU and PAX + // features at the same time, which is also wrong. + // + // The proper fix for this is to use a two-pass method: + // * The first pass simply determines what set of formats can possibly + // encode the given header. + // * The second pass actually encodes the header as that given format + // without worrying about violating the format. + // + // See the following: + // https://golang.org/issue/12594 + // https://golang.org/issue/17630 + // https://golang.org/issue/9683 + const usePrefix = false + // try to use a ustar header when only the name is too long _, paxPathUsed := paxHeaders[paxPath] - if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { + if usePrefix && !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { prefix, suffix, ok := splitUSTARPath(hdr.Name) if ok { // Since we can encode in USTAR format, disable PAX header. @@ -317,7 +295,7 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro var buf bytes.Buffer // Keys are sorted before writing to body to allow deterministic output. - var keys []string + keys := make([]string, 0, len(paxHeaders)) for k := range paxHeaders { keys = append(keys, k) } @@ -340,22 +318,6 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro return nil } -// formatPAXRecord formats a single PAX record, prefixing it with the -// appropriate length. -func formatPAXRecord(k, v string) string { - const padding = 3 // Extra padding for ' ', '=', and '\n' - size := len(k) + len(v) + padding - size += len(strconv.Itoa(size)) - record := fmt.Sprintf("%d %s=%s\n", size, k, v) - - // Final adjustment if adding size field increased the record size. - if len(record) != size { - size = len(record) - record = fmt.Sprintf("%d %s=%s\n", size, k, v) - } - return record -} - // Write writes to the current entry in the tar archive. // Write returns the error ErrWriteTooLong if more than // hdr.Size bytes are written after WriteHeader. diff --git a/libgo/go/archive/tar/writer_test.go b/libgo/go/archive/tar/writer_test.go index 27aa8e5dab6..d88b8f41ca8 100644 --- a/libgo/go/archive/tar/writer_test.go +++ b/libgo/go/archive/tar/writer_test.go @@ -9,7 +9,6 @@ import ( "fmt" "io" "io/ioutil" - "math" "os" "reflect" "sort" @@ -19,176 +18,6 @@ import ( "time" ) -type writerTestEntry struct { - header *Header - contents string -} - -type writerTest struct { - file string // filename of expected output - entries []*writerTestEntry -} - -var writerTests = []*writerTest{ - // The writer test file was produced with this command: - // tar (GNU tar) 1.26 - // ln -s small.txt link.txt - // tar -b 1 --format=ustar -c -f writer.tar small.txt small2.txt link.txt - { - file: "testdata/writer.tar", - entries: []*writerTestEntry{ - { - header: &Header{ - Name: "small.txt", - Mode: 0640, - Uid: 73025, - Gid: 5000, - Size: 5, - ModTime: time.Unix(1246508266, 0), - Typeflag: '0', - Uname: "dsymonds", - Gname: "eng", - }, - contents: "Kilts", - }, - { - header: &Header{ - Name: "small2.txt", - Mode: 0640, - Uid: 73025, - Gid: 5000, - Size: 11, - ModTime: time.Unix(1245217492, 0), - Typeflag: '0', - Uname: "dsymonds", - Gname: "eng", - }, - contents: "Google.com\n", - }, - { - header: &Header{ - Name: "link.txt", - Mode: 0777, - Uid: 1000, - Gid: 1000, - Size: 0, - ModTime: time.Unix(1314603082, 0), - Typeflag: '2', - Linkname: "small.txt", - Uname: "strings", - Gname: "strings", - }, - // no contents - }, - }, - }, - // The truncated test file was produced using these commands: - // dd if=/dev/zero bs=1048576 count=16384 > /tmp/16gig.txt - // tar -b 1 -c -f- /tmp/16gig.txt | dd bs=512 count=8 > writer-big.tar - { - file: "testdata/writer-big.tar", - entries: []*writerTestEntry{ - { - header: &Header{ - Name: "tmp/16gig.txt", - Mode: 0640, - Uid: 73025, - Gid: 5000, - Size: 16 << 30, - ModTime: time.Unix(1254699560, 0), - Typeflag: '0', - Uname: "dsymonds", - Gname: "eng", - }, - // fake contents - contents: strings.Repeat("\x00", 4<<10), - }, - }, - }, - // The truncated test file was produced using these commands: - // dd if=/dev/zero bs=1048576 count=16384 > (longname/)*15 /16gig.txt - // tar -b 1 -c -f- (longname/)*15 /16gig.txt | dd bs=512 count=8 > writer-big-long.tar - { - file: "testdata/writer-big-long.tar", - entries: []*writerTestEntry{ - { - header: &Header{ - Name: strings.Repeat("longname/", 15) + "16gig.txt", - Mode: 0644, - Uid: 1000, - Gid: 1000, - Size: 16 << 30, - ModTime: time.Unix(1399583047, 0), - Typeflag: '0', - Uname: "guillaume", - Gname: "guillaume", - }, - // fake contents - contents: strings.Repeat("\x00", 4<<10), - }, - }, - }, - // This file was produced using gnu tar 1.17 - // gnutar -b 4 --format=ustar (longname/)*15 + file.txt - { - file: "testdata/ustar.tar", - entries: []*writerTestEntry{ - { - header: &Header{ - Name: strings.Repeat("longname/", 15) + "file.txt", - Mode: 0644, - Uid: 0765, - Gid: 024, - Size: 06, - ModTime: time.Unix(1360135598, 0), - Typeflag: '0', - Uname: "shane", - Gname: "staff", - }, - contents: "hello\n", - }, - }, - }, - // This file was produced using gnu tar 1.26 - // echo "Slartibartfast" > file.txt - // ln file.txt hard.txt - // tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt - { - file: "testdata/hardlink.tar", - entries: []*writerTestEntry{ - { - header: &Header{ - Name: "file.txt", - Mode: 0644, - Uid: 1000, - Gid: 100, - Size: 15, - ModTime: time.Unix(1425484303, 0), - Typeflag: '0', - Uname: "vbatts", - Gname: "users", - }, - contents: "Slartibartfast\n", - }, - { - header: &Header{ - Name: "hard.txt", - Mode: 0644, - Uid: 1000, - Gid: 100, - Size: 0, - ModTime: time.Unix(1425484303, 0), - Typeflag: '1', - Linkname: "file.txt", - Uname: "vbatts", - Gname: "users", - }, - // no contents - }, - }, - }, -} - // Render byte array in a two-character hexadecimal string, spaced for easy visual inspection. func bytestr(offset int, b []byte) string { const rowLen = 32 @@ -228,9 +57,168 @@ func bytediff(a []byte, b []byte) string { } func TestWriter(t *testing.T) { + type entry struct { + header *Header + contents string + } + + vectors := []struct { + file string // filename of expected output + entries []*entry + }{{ + // The writer test file was produced with this command: + // tar (GNU tar) 1.26 + // ln -s small.txt link.txt + // tar -b 1 --format=ustar -c -f writer.tar small.txt small2.txt link.txt + file: "testdata/writer.tar", + entries: []*entry{{ + header: &Header{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + ModTime: time.Unix(1246508266, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + contents: "Kilts", + }, { + header: &Header{ + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + ModTime: time.Unix(1245217492, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + contents: "Google.com\n", + }, { + header: &Header{ + Name: "link.txt", + Mode: 0777, + Uid: 1000, + Gid: 1000, + Size: 0, + ModTime: time.Unix(1314603082, 0), + Typeflag: '2', + Linkname: "small.txt", + Uname: "strings", + Gname: "strings", + }, + // no contents + }}, + }, { + // The truncated test file was produced using these commands: + // dd if=/dev/zero bs=1048576 count=16384 > /tmp/16gig.txt + // tar -b 1 -c -f- /tmp/16gig.txt | dd bs=512 count=8 > writer-big.tar + file: "testdata/writer-big.tar", + entries: []*entry{{ + header: &Header{ + Name: "tmp/16gig.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 16 << 30, + ModTime: time.Unix(1254699560, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + // fake contents + contents: strings.Repeat("\x00", 4<<10), + }}, + }, { + // This truncated file was produced using this library. + // It was verified to work with GNU tar 1.27.1 and BSD tar 3.1.2. + // dd if=/dev/zero bs=1G count=16 >> writer-big-long.tar + // gnutar -xvf writer-big-long.tar + // bsdtar -xvf writer-big-long.tar + // + // This file is in PAX format. + file: "testdata/writer-big-long.tar", + entries: []*entry{{ + header: &Header{ + Name: strings.Repeat("longname/", 15) + "16gig.txt", + Mode: 0644, + Uid: 1000, + Gid: 1000, + Size: 16 << 30, + ModTime: time.Unix(1399583047, 0), + Typeflag: '0', + Uname: "guillaume", + Gname: "guillaume", + }, + // fake contents + contents: strings.Repeat("\x00", 4<<10), + }}, + }, { + // TODO(dsnet): The Writer output should match the following file. + // To fix an issue (see https://golang.org/issue/12594), we disabled + // prefix support, which alters the generated output. + /* + // This file was produced using gnu tar 1.17 + // gnutar -b 4 --format=ustar (longname/)*15 + file.txt + file: "testdata/ustar.tar" + */ + file: "testdata/ustar.issue12594.tar", // This is a valid tar file, but not expected + entries: []*entry{{ + header: &Header{ + Name: strings.Repeat("longname/", 15) + "file.txt", + Mode: 0644, + Uid: 0765, + Gid: 024, + Size: 06, + ModTime: time.Unix(1360135598, 0), + Typeflag: '0', + Uname: "shane", + Gname: "staff", + }, + contents: "hello\n", + }}, + }, { + // This file was produced using gnu tar 1.26 + // echo "Slartibartfast" > file.txt + // ln file.txt hard.txt + // tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt + file: "testdata/hardlink.tar", + entries: []*entry{{ + header: &Header{ + Name: "file.txt", + Mode: 0644, + Uid: 1000, + Gid: 100, + Size: 15, + ModTime: time.Unix(1425484303, 0), + Typeflag: '0', + Uname: "vbatts", + Gname: "users", + }, + contents: "Slartibartfast\n", + }, { + header: &Header{ + Name: "hard.txt", + Mode: 0644, + Uid: 1000, + Gid: 100, + Size: 0, + ModTime: time.Unix(1425484303, 0), + Typeflag: '1', + Linkname: "file.txt", + Uname: "vbatts", + Gname: "users", + }, + // no contents + }}, + }} + testLoop: - for i, test := range writerTests { - expected, err := ioutil.ReadFile(test.file) + for i, v := range vectors { + expected, err := ioutil.ReadFile(v.file) if err != nil { t.Errorf("test %d: Unexpected error: %v", i, err) continue @@ -239,7 +227,7 @@ testLoop: buf := new(bytes.Buffer) tw := NewWriter(iotest.TruncateWriter(buf, 4<<10)) // only catch the first 4 KB big := false - for j, entry := range test.entries { + for j, entry := range v.entries { big = big || entry.header.Size > 1<<10 if err := tw.WriteHeader(entry.header); err != nil { t.Errorf("test %d, entry %d: Failed writing header: %v", i, j, err) @@ -576,9 +564,9 @@ func TestWriteAfterClose(t *testing.T) { } func TestSplitUSTARPath(t *testing.T) { - var sr = strings.Repeat + sr := strings.Repeat - var vectors = []struct { + vectors := []struct { input string // Input path prefix string // Expected output prefix suffix string // Expected output suffix @@ -609,114 +597,51 @@ func TestSplitUSTARPath(t *testing.T) { } } -func TestFormatPAXRecord(t *testing.T) { - var medName = strings.Repeat("CD", 50) - var longName = strings.Repeat("AB", 100) - - var vectors = []struct { - inputKey string - inputVal string - output string - }{ - {"k", "v", "6 k=v\n"}, - {"path", "/etc/hosts", "19 path=/etc/hosts\n"}, - {"path", longName, "210 path=" + longName + "\n"}, - {"path", medName, "110 path=" + medName + "\n"}, - {"foo", "ba", "9 foo=ba\n"}, - {"foo", "bar", "11 foo=bar\n"}, - {"foo", "b=\nar=\n==\x00", "18 foo=b=\nar=\n==\x00\n"}, - {"foo", "hello9 foo=ba\nworld", "27 foo=hello9 foo=ba\nworld\n"}, - {"☺☻☹", "日a本b語ç", "27 ☺☻☹=日a本b語ç\n"}, - {"\x00hello", "\x00world", "17 \x00hello=\x00world\n"}, - } - - for _, v := range vectors { - output := formatPAXRecord(v.inputKey, v.inputVal) - if output != v.output { - t.Errorf("formatPAXRecord(%q, %q): got %q, want %q", - v.inputKey, v.inputVal, output, v.output) +// TestIssue12594 tests that the Writer does not attempt to populate the prefix +// field when encoding a header in the GNU format. The prefix field is valid +// in USTAR and PAX, but not GNU. +func TestIssue12594(t *testing.T) { + names := []string{ + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/file.txt", + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/file.txt", + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/333/file.txt", + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35/36/37/38/39/40/file.txt", + "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000/file.txt", + "/home/support/.openoffice.org/3/user/uno_packages/cache/registry/com.sun.star.comp.deployment.executable.PackageRegistryBackend", + } + + for i, name := range names { + var b bytes.Buffer + + tw := NewWriter(&b) + if err := tw.WriteHeader(&Header{ + Name: name, + Uid: 1 << 25, // Prevent USTAR format + }); err != nil { + t.Errorf("test %d, unexpected WriteHeader error: %v", i, err) } - } -} - -func TestFitsInBase256(t *testing.T) { - var vectors = []struct { - input int64 - width int - ok bool - }{ - {+1, 8, true}, - {0, 8, true}, - {-1, 8, true}, - {1 << 56, 8, false}, - {(1 << 56) - 1, 8, true}, - {-1 << 56, 8, true}, - {(-1 << 56) - 1, 8, false}, - {121654, 8, true}, - {-9849849, 8, true}, - {math.MaxInt64, 9, true}, - {0, 9, true}, - {math.MinInt64, 9, true}, - {math.MaxInt64, 12, true}, - {0, 12, true}, - {math.MinInt64, 12, true}, - } - - for _, v := range vectors { - ok := fitsInBase256(v.width, v.input) - if ok != v.ok { - t.Errorf("checkNumeric(%d, %d): got %v, want %v", v.input, v.width, ok, v.ok) + if err := tw.Close(); err != nil { + t.Errorf("test %d, unexpected Close error: %v", i, err) } - } -} -func TestFormatNumeric(t *testing.T) { - var vectors = []struct { - input int64 - output string - ok bool - }{ - // Test base-256 (binary) encoded values. - {-1, "\xff", true}, - {-1, "\xff\xff", true}, - {-1, "\xff\xff\xff", true}, - {(1 << 0), "0", false}, - {(1 << 8) - 1, "\x80\xff", true}, - {(1 << 8), "0\x00", false}, - {(1 << 16) - 1, "\x80\xff\xff", true}, - {(1 << 16), "00\x00", false}, - {-1 * (1 << 0), "\xff", true}, - {-1*(1<<0) - 1, "0", false}, - {-1 * (1 << 8), "\xff\x00", true}, - {-1*(1<<8) - 1, "0\x00", false}, - {-1 * (1 << 16), "\xff\x00\x00", true}, - {-1*(1<<16) - 1, "00\x00", false}, - {537795476381659745, "0000000\x00", false}, - {537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true}, - {-615126028225187231, "0000000\x00", false}, - {-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true}, - {math.MaxInt64, "0000000\x00", false}, - {math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true}, - {math.MinInt64, "0000000\x00", false}, - {math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, - {math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true}, - {math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, - } + // The prefix field should never appear in the GNU format. + var blk block + copy(blk[:], b.Bytes()) + prefix := string(blk.USTAR().Prefix()) + if i := strings.IndexByte(prefix, 0); i >= 0 { + prefix = prefix[:i] // Truncate at the NUL terminator + } + if blk.GetFormat() == formatGNU && len(prefix) > 0 && strings.HasPrefix(name, prefix) { + t.Errorf("test %d, found prefix in GNU format: %s", i, prefix) + } - for _, v := range vectors { - var f formatter - output := make([]byte, len(v.output)) - f.formatNumeric(output, v.input) - ok := (f.err == nil) - if ok != v.ok { - if v.ok { - t.Errorf("formatNumeric(%d): got formatting failure, want success", v.input) - } else { - t.Errorf("formatNumeric(%d): got formatting success, want failure", v.input) - } + tr := NewReader(&b) + hdr, err := tr.Next() + if err != nil { + t.Errorf("test %d, unexpected Next error: %v", i, err) } - if string(output) != v.output { - t.Errorf("formatNumeric(%d): got %q, want %q", v.input, output, v.output) + if hdr.Name != name { + t.Errorf("test %d, hdr.Name = %s, want %s", i, hdr.Name, name) } } } |