1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
|
#
# Author:: Daniel DeLeo (<dan@opscode.com>)
# Author:: Jesse Campbell (<hikeit@gmail.com>)
# Author:: Lamont Granquist (<lamont@opscode.com>)
# Copyright:: Copyright (c) 2013 Jesse Campbell
# Copyright:: Copyright (c) 2013 Opscode, Inc.
# License:: Apache License, Version 2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
require 'stringio'
require 'chef/file_cache'
require 'chef/json_compat'
require 'chef/digester'
require 'chef/exceptions'
class Chef
class Provider
class RemoteFile
# == CacheControlData
# Implements per-uri storage of cache control data for a remote resource
# along with a sanity check checksum of the file in question.
# Provider::RemoteFile protocol implementation classes can use this
# information to avoid re-fetching files when the current copy is up to
# date. The way this information is used is protocol-dependent. For HTTP,
# this information is sent to the origin server via headers to make a
# conditional GET request.
#
# == API
# The general shape of the API is active-record-the-pattern-like. New
# instances should be instantiated via
# `CacheControlData.load_and_validate`, which will do a find-or-create
# operation and then sanity check the data against the checksum of the
# current copy of the file. If there is no data or the sanity check
# fails, the `etag` and `mtime` attributes will be set to nil; otherwise
# they are populated with the previously saved values.
#
# After fetching a file, the CacheControlData instance should be updated
# with new etag, mtime and checksum values in whatever format is
# preferred by the protocol used. Then call #save to save the data to disk.
class CacheControlData
def self.load_and_validate(uri, current_copy_checksum)
ccdata = new(uri)
ccdata.load
ccdata.validate!(current_copy_checksum)
ccdata
end
# Entity Tag of the resource. HTTP-specific. See also:
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.2
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19
attr_accessor :etag
# Last modified time of the remote resource. Different protocols will
# use different types for this field (e.g., string representation of a
# specific date format, integer, etc.) For HTTP-specific references,
# see:
# * http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3
# * http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.1
# * http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.25
attr_accessor :mtime
# SHA2-256 Hash of the file as last fetched.
attr_accessor :checksum
# URI of the resource as a String. This is the "primary key" used for
# storage and retrieval.
attr_reader :uri
def initialize(uri)
uri = uri.dup
uri.password = "XXXX" unless uri.userinfo.nil?
@uri = uri.to_s
end
def load
if previous_cc_data = load_data
apply(previous_cc_data)
self
else
false
end
end
def validate!(current_copy_checksum)
if current_copy_checksum.nil? or checksum != current_copy_checksum
reset!
false
else
true
end
end
# Saves the data to disk using Chef::FileCache. The filename is a
# sanitized version of the URI with a MD5 of the same URI appended (to
# avoid collisions between different URIs having the same sanitized
# form).
def save
Chef::FileCache.store("remote_file/#{sanitized_cache_file_basename}", json_data)
end
# :nodoc:
# JSON representation of this object for storage.
def json_data
Chef::JSONCompat.to_json(hash_data)
end
private
def hash_data
as_hash = {}
as_hash["etag"] = etag
as_hash["mtime"] = mtime
as_hash["checksum"] = checksum
as_hash
end
def reset!
@etag, @mtime = nil, nil
end
def apply(previous_cc_data)
@etag = previous_cc_data["etag"]
@mtime = previous_cc_data["mtime"]
@checksum = previous_cc_data["checksum"]
end
def load_data
Chef::JSONCompat.parse(load_json_data)
rescue Chef::Exceptions::FileNotFound, Chef::Exceptions::JSON::ParseError
false
end
def load_json_data
path = sanitized_cache_file_path(sanitized_cache_file_basename)
if Chef::FileCache.has_key?(path)
Chef::FileCache.load(path)
else
old_path = sanitized_cache_file_path(sanitized_cache_file_basename_md5)
if Chef::FileCache.has_key?(old_path)
# We found an old cache control data file. We started using sha256 instead of md5
# to name these. Upgrade the file to the new name.
Chef::Log.debug("Found old cache control data file at #{old_path}. Moving to #{path}.")
Chef::FileCache.load(old_path).tap do |data|
Chef::FileCache.store(path, data)
Chef::FileCache.delete(old_path)
end
else
raise Chef::Exceptions::FileNotFound
end
end
end
def sanitized_cache_file_path(basename)
"remote_file/#{basename}"
end
def scrubbed_uri
# Scrub and truncate in accordance with the goals of keeping the name
# human-readable but within the bounds of local file system
# path length limits
uri.gsub(/\W/, '_')[0..63]
end
def sanitized_cache_file_basename
uri_sha2 = Chef::Digester.instance.generate_checksum(StringIO.new(uri))
cache_file_basename(uri_sha2[0,32])
end
def sanitized_cache_file_basename_md5
# Old way of creating the file basename
uri_md5 = Chef::Digester.instance.generate_md5_checksum(StringIO.new(uri))
cache_file_basename(uri_md5)
end
def cache_file_basename(checksum)
"#{scrubbed_uri}-#{checksum}.json"
end
end
end
end
end
|