summaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
authorChristian Schwede <christian.schwede@enovance.com>2014-02-11 09:13:26 +0000
committerChristian Schwede <christian.schwede@enovance.com>2014-02-11 09:13:26 +0000
commit9b3ec3705f39202c48bdd8cb0a9387a6fdb29f2e (patch)
tree6dd20a978b674554e169c223ddcbaa308dcdb388 /bin
parentf13288ae320159547823c7b32ff9e76505ec0ae5 (diff)
downloadpython-swiftclient-9b3ec3705f39202c48bdd8cb0a9387a6fdb29f2e.tar.gz
Add option to skip downloading/uploading identical files
swift has already the upload option "--changed" to upload only changed files. This patch adds a similar feature by comparing the md5 of the local file to the remote object etag. When used in combination with download the MD5 hexdigest of each file is sent with an "If-None-Match" header to skip downloading identical files. When used in combination with upload the MD5 is compared to the remote etag by using the already existing HEAD request. Change-Id: I727b0456558c6a7742b2428c6d1c45c4bfaf66e9
Diffstat (limited to 'bin')
-rwxr-xr-xbin/swift62
1 files changed, 55 insertions, 7 deletions
diff --git a/bin/swift b/bin/swift
index ab53a73..a33c041 100755
--- a/bin/swift
+++ b/bin/swift
@@ -21,7 +21,8 @@ from errno import EEXIST, ENOENT
from hashlib import md5
from optparse import OptionParser, SUPPRESS_HELP
from os import environ, listdir, makedirs, utime, _exit as os_exit
-from os.path import basename, dirname, getmtime, getsize, isdir, join
+from os.path import basename, dirname, getmtime, getsize, isdir, join, \
+ sep as os_path_sep
from random import shuffle
from sys import argv, exit, stderr, stdout
from time import sleep, time, gmtime, strftime
@@ -274,7 +275,7 @@ Optional arguments:
Adds a customized request header to the query, like
"Range" or "If-Match". This argument is repeatable.
Example --header "content-type:text/plain"
-
+ --skip-identical Skip downloading files that are identical on both sides
'''.strip("\n")
@@ -310,6 +311,10 @@ def st_download(parser, args, thread_manager):
'Adds a customized request header to the query, like "Range" or '
'"If-Match". This argument is repeatable. Example'
' --header "content-type:text/plain"')
+ parser.add_option(
+ '--skip-identical', action='store_true', dest='skip_identical',
+ default=False, help='Skip downloading files that are identical on '
+ 'both sides')
(options, args) = parse_args(parser, args)
args = args[1:]
if options.out_file == '-':
@@ -330,6 +335,23 @@ def st_download(parser, args, thread_manager):
container, obj, out_file = queue_arg
else:
raise Exception("Invalid queue_arg length of %s" % len(queue_arg))
+ path = options.yes_all and join(container, obj) or obj
+ path = path.lstrip(os_path_sep)
+ if options.skip_identical and out_file != '-':
+ filename = out_file if out_file else path
+ try:
+ fp = open(filename, 'rb')
+ except IOError:
+ pass
+ else:
+ with fp:
+ md5sum = md5()
+ while True:
+ data = fp.read(65536)
+ if not data:
+ break
+ md5sum.update(data)
+ req_headers['If-None-Match'] = md5sum.hexdigest()
try:
start_time = time()
headers, body = \
@@ -342,9 +364,6 @@ def st_download(parser, args, thread_manager):
else:
content_length = None
etag = headers.get('etag')
- path = options.yes_all and join(container, obj) or obj
- if path[:1] in ('/', '\\'):
- path = path[1:]
md5sum = None
make_dir = not options.no_download and out_file != "-"
if content_type.split(';', 1)[0] == 'text/directory':
@@ -409,6 +428,9 @@ def st_download(parser, args, thread_manager):
else:
thread_manager.print_msg('%s [%s]', path, time_str)
except ClientException as err:
+ if err.http_status == 304 and options.skip_identical:
+ thread_manager.print_msg("Skipped identical file '%s'", path)
+ return
if err.http_status != 404:
raise
thread_manager.error("Object '%s/%s' not found", container, obj)
@@ -762,7 +784,7 @@ def st_post(parser, args, thread_manager):
thread_manager.error('Usage: %s post %s\n%s', basename(argv[0]),
st_post_options, st_post_help)
-st_upload_options = '''[--changed] [--segment-size <size>]
+st_upload_options = '''[--changed] [--skip-identical] [--segment-size <size>]
[--segment-container <container>] [--leave-segments]
[--object-threads <thread>] [--segment-threads <threads>]
[--header <header>] [--use-slo]
@@ -781,6 +803,7 @@ Positional arguments:
Optional arguments:
--changed Only upload files that have changed since the last
upload
+ --skip-identical Skip uploading files that are identical on both sides
--segment-size <size> Upload files in segments no larger than <size> and
then create a "manifest" file that will download all
the segments as if it were the original file
@@ -816,6 +839,10 @@ def st_upload(parser, args, thread_manager):
default=False, help='Will only upload files that have changed since '
'the last upload')
parser.add_option(
+ '--skip-identical', action='store_true', dest='skip_identical',
+ default=False, help='Skip uploading files that are identical on '
+ 'both sides')
+ parser.add_option(
'-S', '--segment-size', dest='segment_size', help='Will '
'upload files in segments no larger than <size> and then create a '
'"manifest" file that will download all the segments as if it were '
@@ -922,11 +949,32 @@ def st_upload(parser, args, thread_manager):
old_manifest = None
old_slo_manifest_paths = []
new_slo_manifest_paths = set()
- if options.changed or not options.leave_segments:
+ if options.changed or options.skip_identical \
+ or not options.leave_segments:
+ if options.skip_identical:
+ checksum = None
+ try:
+ fp = open(path, 'rb')
+ except IOError:
+ pass
+ else:
+ with fp:
+ md5sum = md5()
+ while True:
+ data = fp.read(65536)
+ if not data:
+ break
+ md5sum.update(data)
+ checksum = md5sum.hexdigest()
try:
headers = conn.head_object(container, obj)
cl = int(headers.get('content-length'))
mt = headers.get('x-object-meta-mtime')
+ if (options.skip_identical and
+ checksum == headers.get('etag')):
+ thread_manager.print_msg(
+ "Skipped identical file '%s'", path)
+ return
if options.changed and cl == getsize(path) and \
mt == put_headers['x-object-meta-mtime']:
return