summaryrefslogtreecommitdiff
path: root/lib/backup/files.rb
blob: 0f6ed847dea0fb5d6ab6b687e066ec169632b9d1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# frozen_string_literal: true

require 'open3'
require_relative 'helper'

module Backup
  class Files
    include Backup::Helper

    DEFAULT_EXCLUDE = 'lost+found'

    attr_reader :name, :app_files_dir, :backup_tarball, :excludes, :files_parent_dir

    def initialize(name, app_files_dir, excludes: [])
      @name = name
      @app_files_dir = File.realpath(app_files_dir)
      @files_parent_dir = File.realpath(File.join(@app_files_dir, '..'))
      @backup_files_dir = File.join(Gitlab.config.backup.path, File.basename(@app_files_dir) )
      @backup_tarball = File.join(Gitlab.config.backup.path, name + '.tar.gz')
      @excludes = [DEFAULT_EXCLUDE].concat(excludes)
    end

    # Copy files from public/files to backup/files
    def dump
      FileUtils.mkdir_p(Gitlab.config.backup.path)
      FileUtils.rm_f(backup_tarball)

      if ENV['STRATEGY'] == 'copy'
        cmd = [%w[rsync -a --delete], exclude_dirs(:rsync), %W[#{app_files_dir} #{Gitlab.config.backup.path}]].flatten
        output, status = Gitlab::Popen.popen(cmd)

        # Retry if rsync source files vanish
        if status == 24
          $stdout.puts "Warning: files vanished during rsync, retrying..."
          output, status = Gitlab::Popen.popen(cmd)
        end

        unless status == 0
          puts output
          raise Backup::Error, 'Backup failed'
        end

        tar_cmd = [tar, exclude_dirs(:tar), %W[-C #{@backup_files_dir} -cf - .]].flatten
        status_list, output = run_pipeline!([tar_cmd, gzip_cmd], out: [backup_tarball, 'w', 0600])
        FileUtils.rm_rf(@backup_files_dir)
      else
        tar_cmd = [tar, exclude_dirs(:tar), %W[-C #{app_files_dir} -cf - .]].flatten
        status_list, output = run_pipeline!([tar_cmd, gzip_cmd], out: [backup_tarball, 'w', 0600])
      end

      unless pipeline_succeeded?(tar_status: status_list[0], gzip_status: status_list[1], output: output)
        raise Backup::Error, "Backup operation failed: #{output}"
      end
    end

    def restore
      backup_existing_files_dir

      cmd_list = [%w[gzip -cd], %W[#{tar} --unlink-first --recursive-unlink -C #{app_files_dir} -xf -]]
      status_list, output = run_pipeline!(cmd_list, in: backup_tarball)
      unless pipeline_succeeded?(gzip_status: status_list[0], tar_status: status_list[1], output: output)
        raise Backup::Error, "Restore operation failed: #{output}"
      end
    end

    def tar
      if system(*%w[gtar --version], out: '/dev/null')
        # It looks like we can get GNU tar by running 'gtar'
        'gtar'
      else
        'tar'
      end
    end

    def backup_existing_files_dir
      timestamped_files_path = File.join(Gitlab.config.backup.path, "tmp", "#{name}.#{Time.now.to_i}")
      if File.exist?(app_files_dir)
        # Move all files in the existing repos directory except . and .. to
        # repositories.old.<timestamp> directory
        FileUtils.mkdir_p(timestamped_files_path, mode: 0700)
        files = Dir.glob(File.join(app_files_dir, "*"), File::FNM_DOTMATCH) - [File.join(app_files_dir, "."), File.join(app_files_dir, "..")]
        begin
          FileUtils.mv(files, timestamped_files_path)
        rescue Errno::EACCES
          access_denied_error(app_files_dir)
        rescue Errno::EBUSY
          resource_busy_error(app_files_dir)
        end
      end
    end

    def run_pipeline!(cmd_list, options = {})
      err_r, err_w = IO.pipe
      options[:err] = err_w
      status_list = Open3.pipeline(*cmd_list, options)
      err_w.close

      [status_list, err_r.read]
    end

    def noncritical_warning?(warning)
      noncritical_warnings = [
        /^g?tar: \.: Cannot mkdir: No such file or directory$/
      ]

      noncritical_warnings.map { |w| warning =~ w }.any?
    end

    def pipeline_succeeded?(tar_status:, gzip_status:, output:)
      return false unless gzip_status&.success?

      tar_status&.success? || tar_ignore_non_success?(tar_status.exitstatus, output)
    end

    def tar_ignore_non_success?(exitstatus, output)
      # tar can exit with nonzero code:
      #  1 - if some files changed (i.e. a CI job is currently writes to log)
      #  2 - if it cannot create `.` directory (see issue https://gitlab.com/gitlab-org/gitlab/-/issues/22442)
      #  http://www.gnu.org/software/tar/manual/html_section/tar_19.html#Synopsis
      #  so check tar status 1 or stderr output against some non-critical warnings
      if exitstatus == 1
        $stdout.puts "Ignoring tar exit status 1 'Some files differ': #{output}"
        return true
      end

      # allow tar to fail with other non-success status if output contain non-critical warning
      if noncritical_warning?(output)
        $stdout.puts "Ignoring non-success exit status #{exitstatus} due to output of non-critical warning(s): #{output}"
        return true
      end

      false
    end

    def exclude_dirs(fmt)
      excludes.map do |s|
        if s == DEFAULT_EXCLUDE
          '--exclude=' + s
        elsif fmt == :rsync
          '--exclude=/' + s
        elsif fmt == :tar
          '--exclude=./' + s
        end
      end
    end
  end
end