summaryrefslogtreecommitdiff
path: root/rdiff-backup/rdiff_backup/restore.py
blob: c3a2f6c52c4af73df0d217ad813697607345e643 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA

"""Read increment files and restore to original"""

from __future__ import generators
import tempfile, os
from log import Log
import Globals, Time, Rdiff, Hardlink, FilenameMapping, SetConnections, \
	   rorpiter, selection, destructive_stepping, rpath, lazy


# This should be set to selection.Select objects over the source and
# mirror directories respectively.
_select_source = None
_select_mirror = None

class RestoreError(Exception): pass

def Restore(inc_rpath, mirror, target, rest_time):
	"""Recursively restore inc_rpath and mirror to target at rest_time

	Like restore_recusive below, but with a more friendly
	interface (it converts to DSRPaths if necessary, finds the inc
	files with the appropriate base, and makes rid).

	rest_time is the time in seconds to restore to;

	inc_rpath should not be the name of an increment file, but the
	increment file shorn of its suffixes and thus should have the
	same index as mirror.

	"""
	if not isinstance(target, destructive_stepping.DSRPath):
		target = destructive_stepping.DSRPath(None, target)

	mirror_time = get_mirror_time()
	rest_time = get_rest_time(rest_time, mirror_time)
	inc_list = get_inclist(inc_rpath)
	rid = RestoreIncrementData(inc_rpath.index, inc_rpath, inc_list)
	rid.sortincseq(rest_time, mirror_time)
	check_hardlinks(rest_time)
	restore_recursive(inc_rpath.index, mirror, rid, target,
					  rest_time, mirror_time)

def get_mirror_time():
	"""Return the time (in seconds) of latest mirror"""
	current_mirror_incs = get_inclist(Globals.rbdir.append("current_mirror"))
	if not current_mirror_incs:
		Log.FatalError("Could not get time of current mirror")
	elif len(current_mirror_incs) > 1:
		Log("Warning, two different dates for current mirror found", 2)
	return Time.stringtotime(current_mirror_incs[0].getinctime())

def get_rest_time(old_rest_time, mirror_time):
	"""If old_rest_time is between two increments, return older time

	There is a slightly tricky reason for doing this:  The rest of
	the code just ignores increments that are older than
	rest_time.  But sometimes we want to consider the very next
	increment older than rest time, because rest_time will be
	between two increments, and what was actually on the mirror
	side will correspond to the older one.

	So here we assume all rdiff-backup events were recorded in
	"increments" increments, and if its in-between we pick the
	older one here.

	"""
	base_incs = get_inclist(Globals.rbdir.append("increments"))
	if not base_incs: return old_rest_time
	inctimes = [Time.stringtotime(inc.getinctime()) for inc in base_incs]
	inctimes.append(mirror_time)
	older_times = filter(lambda time: time <= old_rest_time, inctimes)
	if older_times: return max(older_times)
	else: # restore time older than oldest increment, just return that
		return min(inctimes)

def get_inclist(inc_rpath):
	"""Returns increments with given base"""
	dirname, basename = inc_rpath.dirsplit()
	parent_dir = rpath.RPath(inc_rpath.conn, dirname, ())
	if not parent_dir.isdir(): return [] # inc directory not created yet
	index = inc_rpath.index

	if index:
		get_inc_ext = lambda filename: \
					  rpath.RPath(inc_rpath.conn, inc_rpath.base,
								  inc_rpath.index[:-1] + (filename,))
	else: get_inc_ext = lambda filename: \
			 rpath.RPath(inc_rpath.conn, os.path.join(dirname, filename))

	inc_list = []
	for filename in parent_dir.listdir():
		inc = get_inc_ext(filename)
		if inc.isincfile() and inc.getincbase_str() == basename:
			inc_list.append(inc)
	return inc_list

def check_hardlinks(rest_time):
	"""Check for hard links and enable hard link support if found"""
	if (Globals.preserve_hardlinks != 0 and
		Hardlink.retrieve_final(rest_time)):
		Log("Hard link information found, attempting to preserve "
			"hard links.", 5)
		SetConnections.UpdateGlobal('preserve_hardlinks', 1)
	else: SetConnections.UpdateGlobal('preserve_hardlinks', None)

def restore_recursive(index, mirror, rid, target, time, mirror_time):
	"""Recursive restore function.

	rid is a RestoreIncrementData object whose inclist is already
	sortedincseq'd, and target is the dsrp to restore to.

	Note that target may have a different index than mirror and
	rid, because we may be restoring a file whose index is, say
	('foo','bar') to a target whose path does not contain
	"foo/bar".

	"""
	assert isinstance(target, destructive_stepping.DSRPath)
	assert mirror.index == rid.index

	target_finalizer = rorpiter.IterTreeReducer(
		rorpiter.DestructiveSteppingFinalizer, ())
	for rcd in yield_rcds(rid.index, mirror, rid, target, time, mirror_time):
		rcd.RestoreFile()
		#if rcd.mirror: mirror_finalizer(rcd.index, rcd.mirror)
		target_finalizer(rcd.target.index, rcd.target)
	target_finalizer.Finish()

def yield_rcds(index, mirrorrp, rid, target, rest_time, mirror_time):
	"""Iterate RestoreCombinedData objects starting with given args

	rid is a RestoreCombinedData object.  target is an rpath where
	the created file should go.

	In this case the "mirror" directory is treated as the source,
	and we are actually copying stuff onto what Select considers
	the source directory.

	"""
	select_result = _select_mirror.Select(target)
	if select_result == 0: return

	if mirrorrp and not _select_source.Select(mirrorrp):
		mirrorrp = None
	rcd = RestoreCombinedData(rid, mirrorrp, target)

	if mirrorrp and mirrorrp.isdir() or \
	   rid and rid.inc_rpath and rid.inc_rpath.isdir():
		sub_rcds = yield_sub_rcds(index, mirrorrp, rid,
								  target, rest_time, mirror_time)
	else: sub_rcds = None

	if select_result == 1:
		yield rcd
		if sub_rcds:
			for sub_rcd in sub_rcds: yield sub_rcd
	elif select_result == 2:
		if sub_rcds:
			try: first = sub_rcds.next()
			except StopIteration: return # no tuples found inside, skip
			yield rcd
			yield first
			for sub_rcd in sub_rcds: yield sub_rcd

def yield_sub_rcds(index, mirrorrp, rid, target, rest_time, mirror_time):
	"""Yield collated tuples from inside given args"""
	if not check_dir_exists(mirrorrp, rid): return
	mirror_iter = yield_mirrorrps(mirrorrp)
	rid_iter = yield_rids(rid, rest_time, mirror_time)

	for indexed_tup in rorpiter.CollateIterators(mirror_iter, rid_iter):
		index = indexed_tup.index
		new_mirrorrp, new_rid = indexed_tup
		for rcd in yield_rcds(index, new_mirrorrp, new_rid,
							target.append(index[-1]), rest_time, mirror_time):
			yield rcd

def check_dir_exists(mirrorrp, rid):
	"""Return true if target should be a directory"""
	if rid and rid.inc_list:
		# Incs say dir if last (earliest) one is a dir increment
		return rid.inc_list[-1].getinctype() == "dir"
	elif mirrorrp: return mirrorrp.isdir() # if no incs, copy mirror
	else: return None

def yield_mirrorrps(mirrorrp):
	"""Yield mirrorrps underneath given mirrorrp"""
	if mirrorrp and mirrorrp.isdir():
		if Globals.quoting_enabled:
			for rp in selection.get_quoted_dir_children(mirrorrp):
				yield rp
		else:
			dirlist = mirrorrp.listdir()
			dirlist.sort()
			for filename in dirlist: yield mirrorrp.append(filename)

def yield_rids(rid, rest_time, mirror_time):
	"""Yield RestoreIncrementData objects within given rid dir

	If the rid doesn't correspond to a directory, don't yield any
	elements.  If there are increments whose corresponding base
	doesn't exist, the first element will be None.  All the rpaths
	involved correspond to files in the increment directory.

	"""
	if not rid or not rid.inc_rpath or not rid.inc_rpath.isdir(): return
	rid_dict = {} # dictionary of basenames:rids
	dirlist = rid.inc_rpath.listdir()		
	if Globals.quoting_enabled:
		dirlist = [FilenameMapping.unquote(fn) for fn in dirlist]

	def affirm_dict_indexed(basename):
		"""Make sure the rid dictionary has given basename as key"""
		if not rid_dict.has_key(basename):
			rid_dict[basename] = RestoreIncrementData(
				rid.index + (basename,), None, []) # init with empty rid

	def add_to_dict(filename):
		"""Add filename to the inc tuple dictionary"""
		rp = rid.inc_rpath.append(filename)
		if Globals.quoting_enabled: rp.quote_path()
		if rp.isincfile() and rp.getinctype() != 'data':
			basename = rp.getincbase_str()
			affirm_dict_indexed(basename)
			rid_dict[basename].inc_list.append(rp)
		elif rp.isdir():
			affirm_dict_indexed(filename)
			rid_dict[filename].inc_rpath = rp

	for filename in dirlist: add_to_dict(filename)
	keys = rid_dict.keys()
	keys.sort()

	# sortincseq now to avoid descending .missing directories later
	for key in keys:
		rid = rid_dict[key]
		if rid.inc_rpath or rid.inc_list:
			rid.sortincseq(rest_time, mirror_time)
			yield rid


class RestoreIncrementData:
	"""Contains information about a specific index from the increments dir

	This is just a container class, used because it would be easier to
	work with than an IndexedTuple.

	"""
	def __init__(self, index, inc_rpath, inc_list):
		self.index = index
		self.inc_rpath = inc_rpath
		self.inc_list = inc_list

	def sortincseq(self, rest_time, mirror_time):
		"""Sort self.inc_list sequence, throwing away irrelevant increments"""
		if not self.inc_list or rest_time >= mirror_time:
			self.inc_list = []
			return

		newer_incs = self.get_newer_incs(rest_time, mirror_time)
		i = 0
		while(i < len(newer_incs)):
			# Only diff type increments require later versions
			if newer_incs[i].getinctype() != "diff": break
			i = i+1
		self.inc_list = newer_incs[:i+1]
		self.inc_list.reverse() # return in reversed order (latest first)
		
	def get_newer_incs(self, rest_time, mirror_time):
		"""Return list of newer incs sorted by time (increasing)

		Also discard increments older than rest_time (rest_time we are
		assuming is the exact time rdiff-backup was run, so no need to
		consider the next oldest increment or any of that)

		"""
		incpairs = []
		for inc in self.inc_list:
			time = Time.stringtotime(inc.getinctime())
			if time >= rest_time: incpairs.append((time, inc))
		incpairs.sort()
		return [pair[1] for pair in incpairs]
				

class RestoreCombinedData:
	"""Combine index information from increment and mirror directories

	This is similar to RestoreIncrementData but has mirror information
	also.

	"""
	def __init__(self, rid, mirror, target):
		"""Init - set values from one or both if they exist

		mirror and target are DSRPaths of the corresponding files in
		the mirror and target directory respectively.  rid is a
		RestoreIncrementData as defined above

		"""
		if rid:
			self.index = rid.index
			self.inc_rpath = rid.inc_rpath
			self.inc_list = rid.inc_list
			if mirror:
				self.mirror = mirror
				assert mirror.index == self.index
			else: self.mirror = None
		elif mirror:
			self.index = mirror.index
			self.mirror = mirror
			self.inc_list = []
			self.inc_rpath = None
		else: assert None, "neither rid nor mirror given"
		self.target = target

	def RestoreFile(self):
		"""Non-recursive restore function """
		if not self.inc_list and not (self.mirror and self.mirror.lstat()):
			return # no increments were applicable
		self.log()

		if self.restore_hardlink(): return

		if not self.inc_list or self.inc_list[0].getinctype() == "diff":
			assert self.mirror and self.mirror.lstat(), \
				   "No base to go with incs for %s" % self.target.path
			rpath.copy_with_attribs(self.mirror, self.target)
		for inc in self.inc_list: self.applyinc(inc, self.target)

	def log(self):
		"""Log current restore action"""
		inc_string = ','.join([inc.path for inc in self.inc_list])
		Log("Restoring %s with increments %s to %s" %
			(self.mirror and self.mirror.path,
			 inc_string, self.target.path), 5)

	def restore_hardlink(self):
		"""Hard link target and return true if hard linking appropriate"""
		if (Globals.preserve_hardlinks and
			Hardlink.restore_link(self.index, self.target)):
			rpath.copy_attribs(self.inc_list and self.inc_list[-1] or
							   self.mirror, self.target)
			return 1
		return None

	def applyinc(self, inc, target):
		"""Apply increment rp inc to targetrp target"""
		Log("Applying increment %s to %s" % (inc.path, target.path), 6)
		inctype = inc.getinctype()
		if inctype == "diff":
			if not target.lstat():
				raise RestoreError("Bad increment sequence at " + inc.path)
			Rdiff.patch_action(target, inc,
							   delta_compressed = inc.isinccompressed()
							   ).execute()
		elif inctype == "dir":
			if not target.isdir():
				if target.lstat():
					raise RestoreError("File %s already exists" % target.path)
				target.mkdir()
		elif inctype == "missing": return
		elif inctype == "snapshot":
			if inc.isinccompressed():
				target.write_from_fileobj(inc.open("rb", compress = 1))
			else: rpath.copy(inc, target)
		else: raise RestoreError("Unknown inctype %s" % inctype)
		rpath.copy_attribs(inc, target)