summaryrefslogtreecommitdiff
path: root/lorry
diff options
context:
space:
mode:
authorBen Hutchings <ben.hutchings@codethink.co.uk>2020-08-06 16:37:41 +0100
committerBen Hutchings <ben.hutchings@codethink.co.uk>2020-08-10 19:38:24 +0100
commit50383f3bcce75d897760bb2c606ffea865f352da (patch)
tree0b77f19151f28ae227f33187121102aa9984202e /lorry
parente24858ad11582082f0a329650325c1f8b0fda277 (diff)
downloadlorry-50383f3bcce75d897760bb2c606ffea865f352da.tar.gz
lorry: Crash-proof updates to working git repos with A/B switching
lorry currently updates a git repository in its working area like this: 1. Delete old backup and debug repositories 2. Copy active repository (git) to backup (git-pre-update) 3. Perform conversion in active repository 4. If conversion failed, restore the backup: 1. Copy active repository to debug (git-post-fail) 2. Delete active repository 3. Copy backup repository to active 5. If conversion succeeded: delete git-pre-update If step 3 or 4 is interrupted, for example because of a system reboot, the active repository may be left missing or broken. On the next update, the backup will be deleted. Replace this with the following process: 1. Look for repositories 'git-a' and 'git-b'. The one with a higher update count (or the only one that exists and has an update count file) is the active repository. The other is the the temporary repository. In case neither exists, 'git-a' is the temporary repository. 2. Delete old temporary repository, if it exists 3. Copy active repository, if it exists, to temporary repository (excluding the update count file) 4. Perform conversion in temporary repository 5. If conversion succeeded, make the temporary repository active by writing the new update count file 6. If conversion failed, move the temporary repository to debug as before We need to migrate the working state, so if there is a 'git' and no 'git-a', add an update count of 1 to 'git' and move it to 'git-a'. Also delete the old-style backup repository. Update test cases to expect the new filenames. lorry should now be crash-proof, in the sense that killing it at any point will leave the last successful update in place. However, an unclean system shutdown (e.g. kernel crash) that does not flush all writes to persistent storage may still result in the active repository being left broken. Closes: #8.
Diffstat (limited to 'lorry')
-rwxr-xr-xlorry145
1 files changed, 98 insertions, 47 deletions
diff --git a/lorry b/lorry
index b42861b..65f1cb3 100755
--- a/lorry
+++ b/lorry
@@ -41,6 +41,9 @@ __version__ = '0.0'
lorry_path = os.path.realpath(__file__)
+UPDATE_COUNT_NAME = 'lorry-update-count'
+
+
def file_missing_or_empty(filename):
''' A more comprehensive alternative to os.path.exists(). '''
return (not os.path.isfile(filename)) or (os.path.getsize(filename) <= 0)
@@ -343,15 +346,12 @@ class Lorry(cliapp.Application):
dirname = self.dirname(name)
if not os.path.exists(dirname):
os.mkdir(dirname)
- gitdir = os.path.join(dirname, 'git')
-
- time = datetime.now().strftime('%F-%T')
- pre_update_name = 'git-pre-update'
- pre_update_backup_suffix = (pre_update_name + '-' + time
- if self.settings['keep-multiple-backups'] else pre_update_name)
+ self.migrate_oldstyle_repos(dirname)
+ temp_repo, active_repo, next_update_count = \
+ self.prepare_working_repos(dirname)
- pre_update_backup_dir = os.path.join(dirname, pre_update_backup_suffix)
+ time = datetime.now().strftime('%F-%T')
post_fail_name = 'git-post-fail'
post_fail_backup_suffix = (post_fail_name + '-' + time
@@ -360,79 +360,130 @@ class Lorry(cliapp.Application):
post_fail_backup_dir = os.path.join(dirname, post_fail_backup_suffix)
if not self.settings['keep-multiple-backups']:
- # remove previous backups if they exist
- if os.path.exists(pre_update_backup_dir):
- shutil.rmtree(pre_update_backup_dir)
-
if os.path.exists(post_fail_backup_dir):
shutil.rmtree(post_fail_backup_dir)
- backupdir = self.backup_gitdir(name, gitdir, pre_update_backup_dir)
-
try:
self.needs_aggressive = False
- table[vcstype](name, dirname, gitdir, spec)
+ table[vcstype](name, dirname, temp_repo, spec)
if self.settings['repack']:
self.progress('.. repacking %s git repository' % name)
self.run_program(['git', 'config',
- 'pack.windowMemory', '128M'], cwd=gitdir)
+ 'pack.windowMemory', '128M'], cwd=temp_repo)
args = ['git', 'gc']
if self.needs_aggressive:
args += ['--aggressive']
- self.run_program(args, cwd=gitdir)
+ self.run_program(args, cwd=temp_repo)
- self.bundle(name, gitdir)
- self.make_tarball(name, gitdir)
+ self.bundle(name, temp_repo)
+ self.make_tarball(name, temp_repo)
+
+ self.write_update_count(temp_repo, next_update_count)
+ active_repo = temp_repo
except:
- if backupdir is not None:
- faildir = self.backup_gitdir(name, gitdir, post_fail_backup_dir)
- self.restore_backup(name, backupdir, gitdir)
+ if active_repo is not None:
+ os.rename(temp_repo, post_fail_backup_dir)
self.output.write('Mirror of %s failed, state before mirror '
'is saved at %s and state after mirror is '
- 'saved at %s\n' % (name, backupdir, faildir))
+ 'saved at %s\n'
+ % (name, active_repo, post_fail_backup_dir))
logging.debug('Mirror of %s failed, state before mirror '
'is saved at %s and state after mirror is '
- 'saved at %s\n', name, backupdir, faildir)
+ 'saved at %s\n',
+ name, active_repo, post_fail_backup_dir)
raise
if not self.settings['pull-only']:
if len(self.settings['mirror-base-url-push']) > 0:
if 'refspecs' in spec:
- self.push_to_mirror_server(name, gitdir, spec['refspecs'])
+ self.push_to_mirror_server(name, active_repo, spec['refspecs'])
else:
- self.push_to_mirror_server(name, gitdir)
-
- if backupdir is not None:
- self.progress('.. removing %s git repository backup' % name)
- shutil.rmtree(backupdir)
-
- def restore_backup(self, name, backupdir, gitdir):
- self.progress('.. restoring %s good git repository' % name)
- dotgit = os.path.join(gitdir, '.git')
- if not os.path.exists(dotgit):
- dotgit = gitdir
- shutil.rmtree(dotgit)
- self.copy_gitdir(backupdir, dotgit)
+ self.push_to_mirror_server(name, active_repo)
+
+ def migrate_oldstyle_repos(self, dirname):
+ # Migrate old-style active repository
+ old_repo = os.path.join(dirname, 'git')
+ if os.path.exists(old_repo):
+ new_repo = os.path.join(dirname, 'git-a')
+ if os.path.exists(new_repo):
+ msg = ('Found both old %s and new %s directories; '
+ 'not migrating\n' % (old_repo, new_repo))
+ self.output.write(msg)
+ logging.warning(msg)
+ else:
+ # If it has a .git subdirectory, use that
+ old_gitdir = os.path.join(old_repo, '.git')
+ if not os.path.exists(old_gitdir):
+ old_gitdir = old_repo
+
+ self.write_update_count(old_gitdir, 1)
+
+ # Move it to new name, and remove top-level directory if we
+ # moved the .git subdirectory
+ os.rename(old_gitdir, new_repo)
+ if old_repo != old_gitdir:
+ shutil.rmtree(old_repo)
+
+ # Remove old-style backup repository
+ old_repo = os.path.join(dirname, 'git-pre-update')
+ if os.path.exists(old_repo):
+ shutil.rmtree(old_repo)
+
+ def prepare_working_repos(self, dirname):
+ # Determine which repository is active (has highest update
+ # count) and which we will create or replace
+ repos = []
+ for repo in [os.path.join(dirname, 'git-a'),
+ os.path.join(dirname, 'git-b')]:
+ tstamp = -1
+ count = 0
+ try:
+ count_name = os.path.join(repo, UPDATE_COUNT_NAME)
+ with open(count_name, 'r') as count_file:
+ tstamp = os.stat(count_file.fileno()).st_mtime
+ count = int(count_file.readline())
+ except (FileNotFoundError, ValueError):
+ pass
+ repos.append((count, tstamp, repo))
+ repos.sort()
+ temp_count, _, temp_repo = repos[0]
+ active_count, active_tstamp, active_repo = repos[1]
+
+ # Remove/rename temporary repository
+ if os.path.exists(temp_repo):
+ # If this was the result of a successful conversion, and
+ # multiple backups are enabled, rename it. We name it
+ # using the timestamp of the active repository, i.e. the
+ # time that this repository became inactive.
+ if temp_count > 0 and self.settings['keep-multiple-backups']:
+ time = datetime.fromtimestamp(active_tstamp) \
+ .strftime('%F-%T')
+ os.rename(temp_repo,
+ os.path.join(dirname, 'git-pre-update-' + time))
+ else:
+ shutil.rmtree(temp_repo)
- def backup_gitdir(self, name, gitdir, backupdir):
- dotgit = os.path.join(gitdir, '.git')
- if not os.path.exists(dotgit):
- dotgit = gitdir
+ if active_count == 0:
+ return temp_repo, None, 1
- self.progress('.. backing up %s git repository to %s' % (name, backupdir))
+ self.copy_gitdir(active_repo, temp_repo)
+ return temp_repo, active_repo, active_count + 1
- return self.copy_gitdir(dotgit, backupdir)
+ def write_update_count(self, gitdir, count):
+ count_name = os.path.join(gitdir, UPDATE_COUNT_NAME)
+ with open(count_name, 'w') as count_file:
+ count_file.write('%d\n' % count)
def copy_gitdir(self, source, dest):
if not os.path.exists(source):
return None
- # copy everything except the objects dir
- def ignoreobjects(dirname, filenames):
+ # copy everything except the objects dir and update count
+ def ignore_filter(dirname, filenames):
if dirname.endswith(source):
- return ['objects']
+ return ['objects', UPDATE_COUNT_NAME]
return []
- shutil.copytree(source, dest, ignore=ignoreobjects)
+ shutil.copytree(source, dest, ignore=ignore_filter)
# hardlink the objects
sourceobjects = os.path.join(source, 'objects')