diff options
-rwxr-xr-x | morphlib/exts/kvm.write | 15 | ||||
-rwxr-xr-x | morphlib/exts/virtualbox-ssh.write | 19 | ||||
-rwxr-xr-x | morphlib/recv-hole | 159 | ||||
-rw-r--r-- | morphlib/util.py | 24 | ||||
-rwxr-xr-x | morphlib/xfer-hole | 132 |
5 files changed, 341 insertions, 8 deletions
diff --git a/morphlib/exts/kvm.write b/morphlib/exts/kvm.write index 94a55daa..16f188b5 100755 --- a/morphlib/exts/kvm.write +++ b/morphlib/exts/kvm.write @@ -90,9 +90,18 @@ class KvmPlusSshWriteExtension(morphlib.writeexts.WriteExtension): '''Transfer raw disk image to libvirt host.''' self.status(msg='Transferring disk image') - target = '%s:%s' % (ssh_host, vm_path) - with open(raw_disk, 'rb') as f: - cliapp.runcmd(['rsync', '-szS', raw_disk, target]) + + xfer_hole_path = morphlib.util.get_data_path('xfer-hole') + recv_hole = morphlib.util.get_data('recv-hole') + + ssh_remote_cmd = [ + 'sh', '-c', recv_hole, 'dummy-argv0', 'file', vm_path + ] + + cliapp.runcmd( + ['python', xfer_hole_path, raw_disk], + ['ssh', ssh_host] + map(cliapp.shell_quote, ssh_remote_cmd), + stdout=None, stderr=None) def create_libvirt_guest(self, ssh_host, vm_name, vm_path, autostart): '''Create the libvirt virtual machine.''' diff --git a/morphlib/exts/virtualbox-ssh.write b/morphlib/exts/virtualbox-ssh.write index 42585f5e..39ea8f86 100755 --- a/morphlib/exts/virtualbox-ssh.write +++ b/morphlib/exts/virtualbox-ssh.write @@ -102,11 +102,20 @@ class VirtualBoxPlusSshWriteExtension(morphlib.writeexts.WriteExtension): '''Transfer raw disk image to VirtualBox host, and convert to VDI.''' self.status(msg='Transfer disk and convert to VDI') - with open(raw_disk, 'rb') as f: - cliapp.ssh_runcmd(ssh_host, - ['VBoxManage', 'convertfromraw', 'stdin', vdi_path, - str(os.path.getsize(raw_disk))], - stdin=f) + + st = os.lstat(raw_disk) + xfer_hole_path = morphlib.util.get_data_path('xfer-hole') + recv_hole = morphlib.util.get_data('recv-hole') + + ssh_remote_cmd = [ + 'sh', '-c', recv_hole, + 'dummy-argv0', 'vbox', vdi_path, str(st.st_size), + ] + + cliapp.runcmd( + ['python', xfer_hole_path, raw_disk], + ['ssh', ssh_host] + map(cliapp.shell_quote, ssh_remote_cmd), + stdout=None, stderr=None) def virtualbox_version(self, ssh_host): 'Get the version number of the VirtualBox running on the remote host.' diff --git a/morphlib/recv-hole b/morphlib/recv-hole new file mode 100755 index 00000000..d6504bf6 --- /dev/null +++ b/morphlib/recv-hole @@ -0,0 +1,159 @@ +#!/bin/sh +# +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# =*= License: GPL-2 =*= + + +# Receive a data stream describing a sparse file, and reproduce it, +# either to a named file or stdout. +# +# The data stream is simple: it's a sequence of DATA or HOLE records: +# +# DATA +# 123 +# <123 bytes of binary data, NOT including newline at the end> +# +# HOLE +# 123 +# +# This shell script can be executed over ssh (given to ssh as an arguemnt, +# with suitable escaping) on a different computer. This allows a large +# sparse file (e.g., disk image) be transferred quickly. +# +# This script should be called in one of the following ways: +# +# recv-hole file FILENAME +# recv-hole vbox FILENAME DISKSIZE +# +# In both cases, FILENAME is the pathname of the disk image on the +# receiving end. DISKSIZE is the size of the disk image in bytes. The +# first form is used when transferring a disk image to become an +# identical file on the receiving end. +# +# The second form is used when the disk image should be converted for +# use by VirtualBox. In this case, we want to avoid writing a +# temporary file on disk, and then calling the VirtualBox VBoxManage +# tool to do the conversion, since that would involve large amounts of +# unnecessary I/O and disk usage. Instead we pipe the file directly to +# VBoxManage, avoiding those issues. The piping is done here in this +# script, instead of in the caller, to make it easier to run things +# over ssh. +# +# However, since it's not possible seek in a Unix pipe, we have to +# explicitly write the zeroes into the pipe. This is not +# super-efficient, but the way to avoid that would be to avoid sending +# a sparse file, and do the conversion to a VDI on the sending end. +# That is out of scope for xfer-hole and recv-hole. + + +set -eu + + +die() +{ + echo "$@" 1>&2 + exit 1 +} + + +recv_hole_to_file() +{ + local n + + read n + truncate --size "+$n" "$1" +} + + +recv_data_to_file() +{ + local n + read n + + local blocksize=1048576 + local blocks=$(($n / $blocksize)) + local extra=$(($n % $blocksize)) + + xfer_data_to_stdout "$blocksize" "$blocks" >> "$1" + xfer_data_to_stdout 1 "$extra" >> "$1" +} + + +recv_hole_to_stdout() +{ + local n + read n + (echo "$n"; cat /dev/zero) | recv_data_to_stdout +} + + +recv_data_to_stdout() +{ + local n + read n + + local blocksize=1048576 + local blocks=$(($n / $blocksize)) + local extra=$(($n % $blocksize)) + + xfer_data_to_stdout "$blocksize" "$blocks" + xfer_data_to_stdout 1 "$extra" +} + + +xfer_data_to_stdout() +{ + local log="$(mktemp)" + if ! dd "bs=$1" count="$2" iflag=fullblock status=noxfer 2> "$log" + then + cat "$log" 1>&2 + rm -f "$log" + exit 1 + else + rm -f "$log" + fi +} + + +type="$1" +case "$type" in + file) + output="$2" + truncate --size=0 "$output" + while read what + do + case "$what" in + DATA) recv_data_to_file "$output" ;; + HOLE) recv_hole_to_file "$output" ;; + *) die "Unknown instruction: $what" ;; + esac + done + ;; + vbox) + output="$2" + disk_size="$3" + while read what + do + case "$what" in + DATA) recv_data_to_stdout ;; + HOLE) recv_hole_to_stdout ;; + *) die "Unknown instruction: $what" ;; + esac + done | + VBoxManage convertfromraw stdin "$output" "$disk_size" + ;; +esac diff --git a/morphlib/util.py b/morphlib/util.py index 0c551296..36ab4e21 100644 --- a/morphlib/util.py +++ b/morphlib/util.py @@ -463,3 +463,27 @@ def iter_trickle(iterable, limit): if len(buf) == 0: break yield buf + + +def get_data_path(relative_path): # pragma: no cover + '''Return path to a data file in the morphlib Python package. + + ``relative_path`` is the name of the data file, relative to the + location in morphlib where the data files are. + + ''' + + morphlib_dir = os.path.dirname(morphlib.__file__) + return os.path.join(morphlib_dir, relative_path) + + +def get_data(relative_path): # pragma: no cover + '''Return contents of a data file from the morphlib Python package. + + ``relative_path`` is the name of the data file, relative to the + location in morphlib where the data files are. + + ''' + + with open(get_data_path(relative_path)) as f: + return f.read() diff --git a/morphlib/xfer-hole b/morphlib/xfer-hole new file mode 100755 index 00000000..0d4cee7a --- /dev/null +++ b/morphlib/xfer-hole @@ -0,0 +1,132 @@ +#!/usr/bin/env python +# +# Send a sparse file more space-efficiently. +# See recv-hole for a description of the protocol. +# +# Note that xfer-hole requires a version of Linux with support for +# SEEK_DATA and SEEK_HOLE. +# +# +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# =*= License: GPL-2 =*= + + + +import errno +import os +import sys + + +SEEK_DATA = 3 +SEEK_HOLE = 4 + + +filename = sys.argv[1] +fd = os.open(filename, os.O_RDONLY) +pos = 0 + + +DATA = 'data' +HOLE = 'hole' +EOF = 'eof' + + +def safe_lseek(fd, pos, whence): + try: + return os.lseek(fd, pos, whence) + except OSError as e: + if e.errno == errno.ENXIO: + return -1 + raise + + +def current_data_or_pos(fd, pos): + length = safe_lseek(fd, 0, os.SEEK_END) + next_data = safe_lseek(fd, pos, SEEK_DATA) + next_hole = safe_lseek(fd, pos, SEEK_HOLE) + + if pos == length: + return EOF, pos + elif pos == next_data: + return DATA, pos + elif pos == next_hole: + return HOLE, pos + else: + assert False, \ + ("Do not understand: pos=%d next_data=%d next_hole=%d" % + (pos, next_data, next_hole)) + + +def next_data_or_hole(fd, pos): + length = safe_lseek(fd, 0, os.SEEK_END) + next_data = safe_lseek(fd, pos, SEEK_DATA) + next_hole = safe_lseek(fd, pos, SEEK_HOLE) + + if pos == length: + return EOF, pos + elif pos == next_data: + # We are at data. + if next_hole == -1 or next_hole == length: + return EOF, length + else: + return HOLE, next_hole + elif pos == next_hole: + # We are at a hole. + if next_data == -1 or next_data == length: + return EOF, length + else: + return DATA, next_data + else: + assert False, \ + ("Do not understand: pos=%d next_data=%d next_hole=%d" % + (pos, next_data, next_hole)) + + +def find_data_and_holes(fd): + pos = safe_lseek(fd, 0, os.SEEK_CUR) + + kind, pos = current_data_or_pos(fd, pos) + while kind != EOF: + yield kind, pos + kind, pos = next_data_or_hole(fd, pos) + yield kind, pos + + +def make_xfer_instructions(fd): + prev_kind = None + prev_pos = None + for kind, pos in find_data_and_holes(fd): + if prev_kind == DATA: + yield (DATA, prev_pos, pos) + elif prev_kind == HOLE: + yield (HOLE, prev_pos, pos) + prev_kind = kind + prev_pos = pos + + +def copy_slice_from_file(to, fd, start, end): + safe_lseek(fd, start, os.SEEK_SET) + data = os.read(fd, end - start) + to.write(data) + + +for kind, start, end in make_xfer_instructions(fd): + if kind == HOLE: + sys.stdout.write('HOLE\n%d\n' % (end - start)) + elif kind == DATA: + sys.stdout.write('DATA\n%d\n' % (end - start)) + copy_slice_from_file(sys.stdout, fd, start, end) |