summaryrefslogtreecommitdiff
path: root/lib/chef/cookbook/synchronizer.rb
blob: b1fa1b436dae1db343c353ec322cd3a2f717cf01 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
# License:: Apache License, Version 2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

require_relative "../client"
require_relative "../util/threaded_job_queue"
require_relative "../server_api"
require "singleton"
require_relative "../dist"

class Chef

  # Keep track of the filenames that we use in both eager cookbook
  # downloading (during sync_cookbooks) and lazy (during the run
  # itself, through FileVendor). After the run is over, clean up the
  # cache.
  class CookbookCacheCleaner

    attr_accessor :skip_removal

    # Setup a notification to clear the valid_cache_entries when a Chef client
    # run starts
    Chef::Client.when_run_starts do |run_status|
      instance.reset!
    end

    # Register a notification to cleanup unused files from cookbooks
    Chef::Client.when_run_completes_successfully do |run_status|
      instance.cleanup_file_cache
    end

    include Singleton

    def initialize
      reset!
    end

    def reset!
      @valid_cache_entries = {}
    end

    def mark_file_as_valid(cache_path)
      @valid_cache_entries[cache_path] = true
    end

    def cache
      Chef::FileCache
    end

    def cleanup_file_cache
      unless Chef::Config[:solo_legacy_mode] || skip_removal
        # Delete each file in the cache that we didn't encounter in the
        # manifest.
        cache.find(File.join(%w{cookbooks ** {*,.*}})).each do |cache_filename|
          unless @valid_cache_entries[cache_filename]
            Chef::Log.info("Removing #{cache_filename} from the cache; it is no longer needed by #{Chef::Dist::CLIENT}.")
            cache.delete(cache_filename)
          end
        end
      else
        Chef::Log.info("Skipping removal of unused files from the cache")
      end
    end

  end

  # Synchronizes the locally cached copies of cookbooks with the files on the
  # server.
  class CookbookSynchronizer
    CookbookFile = Struct.new(:cookbook, :manifest_record)

    attr_accessor :remove_obsoleted_files

    def initialize(cookbooks_by_name, events)
      @cookbooks_by_name, @events = cookbooks_by_name, events

      @cookbook_full_file_paths = {}
      @remove_obsoleted_files = true

      @lazy_files = {}
    end

    def cache
      Chef::FileCache
    end

    def cookbook_names
      @cookbooks_by_name.keys
    end

    def cookbooks
      @cookbooks_by_name.values
    end

    def cookbook_count
      @cookbooks_by_name.size
    end

    def have_cookbook?(cookbook_name)
      @cookbooks_by_name.key?(cookbook_name)
    end

    def cookbook_segment(cookbook_name, segment)
      @cookbooks_by_name[cookbook_name].files_for(segment)
    end

    def files
      lazy = unless Chef::Config[:no_lazy_load]
               %w{ files templates }
             else
               []
             end

      @files ||= cookbooks.inject([]) do |memo, cookbook|
        cookbook.each_file do |manifest_record|
          part = manifest_record[:name].split("/")[0]
          if lazy.include?(part)
            manifest_record[:lazy] = true
            @lazy_files[cookbook] ||= []
            @lazy_files[cookbook] << manifest_record
          else
            memo << CookbookFile.new(cookbook, manifest_record)
          end
        end
        memo
      end
    end

    def files_by_cookbook
      files.group_by { |file| file.cookbook }
    end

    def files_remaining_by_cookbook
      @files_remaining_by_cookbook ||= begin
        files_by_cookbook.inject({}) do |memo, (cookbook, files)|
          memo[cookbook] = files.size
          memo
        end
      end
    end

    def mark_file_synced(file)
      files_remaining_by_cookbook[file.cookbook] -= 1

      if files_remaining_by_cookbook[file.cookbook] == 0
        @events.synchronized_cookbook(file.cookbook.name, file.cookbook)
      end
    end

    # Synchronizes all the cookbooks from the chef-server.
    # )
    # === Returns
    # true:: Always returns true
    def sync_cookbooks
      Chef::Log.info("Loading cookbooks [#{cookbooks.map { |ckbk| ckbk.name + '@' + ckbk.version }.join(', ')}]")
      Chef::Log.trace("Cookbooks detail: #{cookbooks.inspect}")

      clear_obsoleted_cookbooks

      queue = Chef::Util::ThreadedJobQueue.new

      Chef::Log.warn("skipping cookbook synchronization! DO NOT LEAVE THIS ENABLED IN PRODUCTION!!!") if Chef::Config[:skip_cookbook_sync]
      files.each do |file|
        queue << lambda do |lock|
          full_file_path = sync_file(file)

          lock.synchronize do
            # Save the full_path of the downloaded file to be restored in the manifest later
            save_full_file_path(file, full_file_path)
            mark_file_synced(file)
          end
        end
      end

      @events.cookbook_sync_start(cookbook_count)
      queue.process(Chef::Config[:cookbook_sync_threads])
      # Ensure that cookbooks know where they're rooted at, for manifest purposes.
      ensure_cookbook_paths
      # Update the full file paths in the manifest
      update_cookbook_filenames

    rescue Exception => e
      @events.cookbook_sync_failed(cookbooks, e)
      raise
    else
      @events.cookbook_sync_complete
      true
    end

    # Saves the full_path to the file of the cookbook to be updated
    # in the manifest later
    def save_full_file_path(file, full_path)
      @cookbook_full_file_paths[file.cookbook] ||= []
      @cookbook_full_file_paths[file.cookbook] << full_path
    end

    # remove cookbooks that are not referenced in the expanded run_list at all
    # (if we have an override run_list we may not want to do this)
    def remove_old_cookbooks
      cache.find(File.join(%w{cookbooks ** {*,.*}})).each do |cache_file|
        cache_file =~ /^cookbooks\/([^\/]+)\//
        unless have_cookbook?($1)
          Chef::Log.info("Removing #{cache_file} from the cache; its cookbook is no longer needed on this client.")
          cache.delete(cache_file)
          @events.removed_cookbook_file(cache_file)
        end
      end
    end

    # remove deleted files in cookbooks that are being used on the node
    def remove_deleted_files
      cache.find(File.join(%w{cookbooks ** {*,.*}})).each do |cache_file|
        md = cache_file.match(/^cookbooks\/([^\/]+)\/([^\/]+)\/(.*)/)
        next unless md
        ( cookbook_name, segment, file ) = md[1..3]
        if have_cookbook?(cookbook_name)
          manifest_segment = cookbook_segment(cookbook_name, segment)
          if manifest_segment.select { |manifest_record| manifest_record["path"] == "#{segment}/#{file}" }.empty?
            Chef::Log.info("Removing #{cache_file} from the cache; its is no longer in the cookbook manifest.")
            cache.delete(cache_file)
            @events.removed_cookbook_file(cache_file)
          end
        end
      end
    end

    # Iterates over cached cookbooks' files, removing files belonging to
    # cookbooks that don't appear in +cookbook_hash+
    def clear_obsoleted_cookbooks
      @events.cookbook_clean_start

      if remove_obsoleted_files
        remove_old_cookbooks
      else
        Chef::Log.info("Skipping removal of obsoleted cookbooks from the cache")
        CookbookCacheCleaner.instance.skip_removal = true
      end

      remove_deleted_files

      @events.cookbook_clean_complete
    end

    def update_cookbook_filenames
      @cookbook_full_file_paths.each do |cookbook, full_paths|
        cookbook.all_files = full_paths
      end

      @lazy_files.each do |cookbook, lazy_files|
        cookbook.cookbook_manifest.add_files_to_manifest(lazy_files)
      end
    end

    def ensure_cookbook_paths
      cookbooks.each do |cookbook|
        cb_dir = File.join(Chef::Config[:file_cache_path], "cookbooks", cookbook.name)
        cookbook.root_paths = Array(cb_dir)
      end
    end

    # Sync an individual file if needed. If there is an up to date copy
    # locally, nothing is done. Updates +file+'s manifest with the full path to
    # the cached file.
    #
    # === Arguments
    # file<CookbookFile>
    # === Returns
    # Full path to the cached file as a String
    def sync_file(file)
      cache_filename = File.join("cookbooks", file.cookbook.name, file.manifest_record["path"])
      mark_cached_file_valid(cache_filename)

      # If the checksums are different between on-disk (current) and on-server
      # (remote, per manifest), do the update. This will also execute if there
      # is no current checksum.
      if !cached_copy_up_to_date?(cache_filename, file.manifest_record["checksum"])
        download_file(file.manifest_record["url"], cache_filename)
        @events.updated_cookbook_file(file.cookbook.name, cache_filename)
      else
        Chef::Log.trace("Not storing #{cache_filename}, as the cache is up to date.")
      end

      # Load the file in the cache and return the full file path to the loaded file
      cache.load(cache_filename, false)
    end

    def cached_copy_up_to_date?(local_path, expected_checksum)
      return true if Chef::Config[:skip_cookbook_sync]
      if cache.key?(local_path)
        current_checksum = CookbookVersion.checksum_cookbook_file(cache.load(local_path, false))
        expected_checksum == current_checksum
      else
        false
      end
    end

    # Unconditionally download the file from the given URL. File will be
    # downloaded to the path +destination+ which is relative to the Chef file
    # cache root.
    def download_file(url, destination)
      raw_file = server_api.streaming_request(url)

      Chef::Log.info("Storing updated #{destination} in the cache.")
      cache.move_to(raw_file.path, destination)
    end

    # Marks the given file as valid (non-stale).
    def mark_cached_file_valid(cache_filename)
      CookbookCacheCleaner.instance.mark_file_as_valid(cache_filename)
    end

    def server_api
      Thread.current[:server_api] ||= Chef::ServerAPI.new(Chef::Config[:chef_server_url], keepalives: true)
    end

  end
end