summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Vosmaer <contact@jacobvosmaer.nl>2015-12-08 15:08:22 +0100
committerJacob Vosmaer <contact@jacobvosmaer.nl>2015-12-08 15:08:22 +0100
commitf3ca92a062424e0cda2c077d9c30a4edbd6bf4c8 (patch)
tree9d01dcfe14e9873a876de9005e1608bfece53b45
parent6d2be0212c444c6a3d25ae6a3c75822fa1c8614f (diff)
downloadgitlab-ce-f3ca92a062424e0cda2c077d9c30a4edbd6bf4c8.tar.gz
Add 'resume' capability to parallel-rsync-repossync-all-repos
-rwxr-xr-xbin/parallel-rsync-repos43
-rw-r--r--doc/operations/moving_repositories.md50
2 files changed, 72 insertions, 21 deletions
diff --git a/bin/parallel-rsync-repos b/bin/parallel-rsync-repos
index b777056c95f..21921148fa0 100755
--- a/bin/parallel-rsync-repos
+++ b/bin/parallel-rsync-repos
@@ -1,29 +1,31 @@
-#!/bin/sh
-# this script should run as the 'git' user, not root, because of mkdir
+#!/usr/bin/env bash
+# this script should run as the 'git' user, not root, because 'root' should not
+# own intermediate directories created by rsync.
#
# Example invocation:
# find /var/opt/gitlab/git-data/repositories -maxdepth 2 | \
-# parallel-rsync-repos /var/opt/gitlab/git-data/repositories /mnt/gitlab/repositories
+# parallel-rsync-repos transfer-success.log /var/opt/gitlab/git-data/repositories /mnt/gitlab/repositories
#
# You can also rsync to a remote destination.
#
-# parallel-rsync-repos /var/opt/gitlab/git-data/repositories user@host:/mnt/gitlab/repositories
+# parallel-rsync-repos transfer-success.log /var/opt/gitlab/git-data/repositories user@host:/mnt/gitlab/repositories
#
# If you need to pass extra options to rsync, set the RSYNC variable
#
-# env RSYNC='rsync --rsh="foo bar"' parallel-rsync-repos /src dest
+# env RSYNC='rsync --rsh="foo bar"' parallel-rsync-repos transfer-success.log /src dest
#
-SRC=$1
-DEST=$2
+LOGFILE=$1
+SRC=$2
+DEST=$3
-if [ -z "$JOBS" ] ; then
- JOBS=10
+if [ -z "$LOGFILE" ] || [ -z "$SRC" ] || [ -z "$DEST" ] ; then
+ echo "Usage: $0 LOGFILE SRC DEST"
+ exit 1
fi
-if [ -z "$SRC" ] || [ -z "$DEST" ] ; then
- echo "Usage: $0 SRC DEST"
- exit 1
+if [ -z "$JOBS" ] ; then
+ JOBS=10
fi
if [ -z "$RSYNC" ] ; then
@@ -35,5 +37,18 @@ if ! cd $SRC ; then
exit 1
fi
-sed "s|$SRC|./|" |\
- parallel -j$JOBS --progress "mkdir -p $DEST/{} && $RSYNC --delete -a {}/. $DEST/{}/"
+rsyncjob() {
+ relative_dir="./${1#$SRC}"
+
+ if ! $RSYNC --delete --relative -a "$relative_dir" "$DEST" ; then
+ echo "rsync $1 failed"
+ return 1
+ fi
+
+ echo "$1" >> $LOGFILE
+}
+
+export LOGFILE SRC DEST RSYNC
+export -f rsyncjob
+
+parallel -j$JOBS --progress rsyncjob
diff --git a/doc/operations/moving_repositories.md b/doc/operations/moving_repositories.md
index a89602b367f..39086b7a251 100644
--- a/doc/operations/moving_repositories.md
+++ b/doc/operations/moving_repositories.md
@@ -96,25 +96,59 @@ after switching to the new repository storage directory.
### Parallel rsync for all repositories known to GitLab
-This will sync repositories with 10 rsync processes at a time.
+This will sync repositories with 10 rsync processes at a time. We keep
+track of progress so that the transfer can be restarted if necessary.
+
+First we create a new directory, owned by 'git', to hold transfer
+logs. We assume the directory is empty before we start the transfer
+procedure, and that we are the only ones writing files in it.
```
# Omnibus
-sudo gitlab-rake gitlab:list_repos |\
- sudo -u git \
+sudo mkdir /var/opt/gitlab/transfer-logs
+sudo chown git:git /var/opt/gitlab/transfer-logs
+
+# Source
+sudo -u git -H mkdir /home/git/transfer-logs
+```
+
+We seed the process with a list of the directories we want to copy.
+
+```
+# Omnibus
+sudo -u git sh -c 'gitlab-rake gitlab:list_repos > /var/opt/gitlab/transfer-logs/all-repos-$(date +%s).txt'
+
+# Source
+cd /home/git/gitlab
+sudo -u git -H sh -c 'bundle exec rake gitlab:list_repos > /home/git/transfer-logs/all-repos-$(date +%s).txt'
+```
+
+Now we can start the transfer. The command below is idempotent, and
+the number of jobs done by GNU Parallel should converge to zero. If it
+does not some repositories listed in all-repos-1234.txt may have been
+deleted/renamed before they could be copied.
+
+```
+# Omnibus
+sudo -u git sh -c '
+cat /var/opt/gitlab/transfer-logs/* | sort | uniq -u |\
/usr/bin/env JOBS=10 \
- /opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repoos \
+ /opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repos \
+ /var/opt/gitlab/transfer-logs/succes-$(date +%s).log \
/var/opt/gitlab/git-data/repositories \
/mnt/gitlab/repositories
+'
# Source
cd /home/git/gitlab
-sudo -u git -H bundle exec rake gitlab:list_repos |\
- sudo -u git -H \
+sudo -u git -H sh -c '
+cat /home/git/transfer-logs/* | sort | uniq -u |\
/usr/bin/env JOBS=10 \
bin/parallel-rsync-repos \
+ /home/git/transfer-logs/succes-$(date +%s).log \
/home/git/repositories \
/mnt/gitlab/repositories
+`
```
### Parallel rsync only for repositories with recent activity
@@ -129,7 +163,8 @@ gitlab:list_repos' to only print repositories with recent activity.
sudo gitlab-rake gitlab:list_repos SINCE='2015-10-1 12:00 UTC' |\
sudo -u git \
/usr/bin/env JOBS=10 \
- /opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repoos \
+ /opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repos \
+ succes-$(date +%s).log \
/var/opt/gitlab/git-data/repositories \
/mnt/gitlab/repositories
@@ -139,6 +174,7 @@ sudo -u git -H bundle exec rake gitlab:list_repos SINCE='2015-10-1 12:00 UTC' |\
sudo -u git -H \
/usr/bin/env JOBS=10 \
bin/parallel-rsync-repos \
+ succes-$(date +%s).log \
/home/git/repositories \
/mnt/gitlab/repositories
```