summaryrefslogtreecommitdiff
path: root/workhorse/internal/zipartifacts/open_archive.go
blob: 30b86b66c4950347b71399de6659937354f953ec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
package zipartifacts

import (
	"archive/zip"
	"context"
	"fmt"
	"io"
	"net"
	"net/http"
	"os"
	"strings"
	"time"

	"gitlab.com/gitlab-org/gitlab-workhorse/internal/httprs"

	"gitlab.com/gitlab-org/labkit/correlation"
	"gitlab.com/gitlab-org/labkit/mask"
	"gitlab.com/gitlab-org/labkit/tracing"
)

var httpClient = &http.Client{
	Transport: tracing.NewRoundTripper(correlation.NewInstrumentedRoundTripper(&http.Transport{
		Proxy: http.ProxyFromEnvironment,
		DialContext: (&net.Dialer{
			Timeout:   30 * time.Second,
			KeepAlive: 10 * time.Second,
		}).DialContext,
		IdleConnTimeout:       30 * time.Second,
		TLSHandshakeTimeout:   10 * time.Second,
		ExpectContinueTimeout: 10 * time.Second,
		ResponseHeaderTimeout: 30 * time.Second,
		DisableCompression:    true,
	})),
}

type archive struct {
	reader io.ReaderAt
	size   int64
}

// OpenArchive will open a zip.Reader from a local path or a remote object store URL
// in case of remote url it will make use of ranged requestes to support seeking.
// If the path do not exists error will be ErrArchiveNotFound,
// if the file isn't a zip archive error will be ErrNotAZip
func OpenArchive(ctx context.Context, archivePath string) (*zip.Reader, error) {
	archive, err := openArchiveLocation(ctx, archivePath)
	if err != nil {
		return nil, err
	}

	return openZipReader(archive.reader, archive.size)
}

// OpenArchiveWithReaderFunc opens a zip.Reader from either local path or a
// remote object, similarly to OpenArchive function. The difference is that it
// allows passing a readerFunc that takes a io.ReaderAt that is either going to
// be os.File or a custom reader we use to read from object storage. The
// readerFunc can augment the archive reader and return a type that satisfies
// io.ReaderAt.
func OpenArchiveWithReaderFunc(ctx context.Context, location string, readerFunc func(io.ReaderAt, int64) io.ReaderAt) (*zip.Reader, error) {
	archive, err := openArchiveLocation(ctx, location)
	if err != nil {
		return nil, err
	}

	return openZipReader(readerFunc(archive.reader, archive.size), archive.size)
}

func openArchiveLocation(ctx context.Context, location string) (*archive, error) {
	if isURL(location) {
		return openHTTPArchive(ctx, location)
	}

	return openFileArchive(ctx, location)
}

func isURL(path string) bool {
	return strings.HasPrefix(path, "http://") || strings.HasPrefix(path, "https://")
}

func openHTTPArchive(ctx context.Context, archivePath string) (*archive, error) {
	scrubbedArchivePath := mask.URL(archivePath)
	req, err := http.NewRequest(http.MethodGet, archivePath, nil)
	if err != nil {
		return nil, fmt.Errorf("can't create HTTP GET %q: %v", scrubbedArchivePath, err)
	}
	req = req.WithContext(ctx)

	resp, err := httpClient.Do(req.WithContext(ctx))
	if err != nil {
		return nil, fmt.Errorf("HTTP GET %q: %v", scrubbedArchivePath, err)
	} else if resp.StatusCode == http.StatusNotFound {
		return nil, ErrorCode[CodeArchiveNotFound]
	} else if resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("HTTP GET %q: %d: %v", scrubbedArchivePath, resp.StatusCode, resp.Status)
	}

	rs := httprs.NewHttpReadSeeker(resp, httpClient)

	go func() {
		<-ctx.Done()
		resp.Body.Close()
		rs.Close()
	}()

	return &archive{reader: rs, size: resp.ContentLength}, nil
}

func openFileArchive(ctx context.Context, archivePath string) (*archive, error) {
	file, err := os.Open(archivePath)
	if err != nil {
		if os.IsNotExist(err) {
			return nil, ErrorCode[CodeArchiveNotFound]
		}
	}

	go func() {
		<-ctx.Done()
		// We close the archive from this goroutine so that we can safely return a *zip.Reader instead of a *zip.ReadCloser
		file.Close()
	}()

	stat, err := file.Stat()
	if err != nil {
		return nil, err
	}

	return &archive{reader: file, size: stat.Size()}, nil
}

func openZipReader(archive io.ReaderAt, size int64) (*zip.Reader, error) {
	reader, err := zip.NewReader(archive, size)
	if err != nil {
		return nil, ErrorCode[CodeNotZip]
	}

	return reader, nil
}