summaryrefslogtreecommitdiff
path: root/rdiff-backup/rdiff_backup/librsync.py
blob: f3d7c01ffe3e451ddb563d2ca737e9e05b942a27 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# Copyright 2002 2005 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA

"""Provides a high-level interface to some librsync functions

This is a python wrapper around the lower-level _librsync module,
which is written in C.  The goal was to use C as little as possible...

"""

import types, array
import _librsync

blocksize = _librsync.RS_JOB_BLOCKSIZE

class librsyncError(Exception):
	"""Signifies error in internal librsync processing (bad signature, etc.)

	underlying _librsync.librsyncError's are regenerated using this
	class because the C-created exceptions are by default
	unPickleable.  There is probably a way to fix this in _librsync,
	but this scheme was easier.

	"""
	pass


class LikeFile:
	"""File-like object used by SigFile, DeltaFile, and PatchFile"""
	mode = "rb"

	# This will be replaced in subclasses by an object with
	# appropriate cycle() method
	maker = None

	def __init__(self, infile, need_seek = None):
		"""LikeFile initializer - zero buffers, set eofs off"""
		self.check_file(infile, need_seek)
		self.infile = infile
		self.closed = self.infile_closed = None
		self.inbuf = ""
		self.outbuf = array.array('c')
		self.eof = self.infile_eof = None

	def check_file(self, file, need_seek = None):
		"""Raise type error if file doesn't have necessary attributes"""
		if not hasattr(file, "read"):
			raise TypeError("Basis file must have a read() method")
		if not hasattr(file, "close"):
			raise TypeError("Basis file must have a close() method")
		if need_seek and not hasattr(file, "seek"):
			raise TypeError("Basis file must have a seek() method")

	def read(self, length = -1):
		"""Build up self.outbuf, return first length bytes"""
		if length == -1:
			while not self.eof: self._add_to_outbuf_once()
			real_len = len(self.outbuf)
		else:
			while not self.eof and len(self.outbuf) < length:
				self._add_to_outbuf_once()
			real_len = min(length, len(self.outbuf))
			
		return_val = self.outbuf[:real_len].tostring()
		del self.outbuf[:real_len]
		return return_val

	def _add_to_outbuf_once(self):
		"""Add one cycle's worth of output to self.outbuf"""
		if not self.infile_eof: self._add_to_inbuf()
		try: self.eof, len_inbuf_read, cycle_out = self.maker.cycle(self.inbuf)
		except _librsync.librsyncError, e: raise librsyncError(str(e))
		self.inbuf = self.inbuf[len_inbuf_read:]
		self.outbuf.fromstring(cycle_out)

	def _add_to_inbuf(self):
		"""Make sure len(self.inbuf) >= blocksize"""
		assert not self.infile_eof
		while len(self.inbuf) < blocksize:
			new_in = self.infile.read(blocksize)
 			if not new_in:
				self.infile_eof = 1
				self.infile_closeval = self.infile.close()
				self.infile_closed = 1
				break
			self.inbuf += new_in

	def close(self):
		"""Close infile and pass on infile close value"""
		self.closed = 1
		if self.infile_closed: return self.infile_closeval
		else: return self.infile.close()


class SigFile(LikeFile):
	"""File-like object which incrementally generates a librsync signature"""
	def __init__(self, infile, blocksize = _librsync.RS_DEFAULT_BLOCK_LEN):
		"""SigFile initializer - takes basis file

		basis file only needs to have read() and close() methods.  It
		will be closed when we come to the end of the signature.

		"""
		LikeFile.__init__(self, infile)
		try: self.maker = _librsync.new_sigmaker(blocksize)
		except _librsync.librsyncError, e: raise librsyncError(str(e))

class DeltaFile(LikeFile):
	"""File-like object which incrementally generates a librsync delta"""
	def __init__(self, signature, new_file):
		"""DeltaFile initializer - call with signature and new file

		Signature can either be a string or a file with read() and
		close() methods.  New_file also only needs to have read() and
		close() methods.  It will be closed when self is closed.

		"""
		LikeFile.__init__(self, new_file)
		if type(signature) is types.StringType: sig_string = signature
		else:
			self.check_file(signature)
			sig_string = signature.read()
			assert not signature.close()
		try: self.maker = _librsync.new_deltamaker(sig_string)
		except _librsync.librsyncError, e: raise librsyncError(str(e))


class PatchedFile(LikeFile):
	"""File-like object which applies a librsync delta incrementally"""
	def __init__(self, basis_file, delta_file):
		"""PatchedFile initializer - call with basis delta

		Here basis_file must be a true Python file, because we may
		need to seek() around in it a lot, and this is done in C.
		delta_file only needs read() and close() methods.

		"""
		LikeFile.__init__(self, delta_file)
		if hasattr(basis_file, 'file'):
			basis_file = basis_file.file
		if type(basis_file) is not types.FileType:
			raise TypeError("basis_file must be a (true) file")
		try: self.maker = _librsync.new_patchmaker(basis_file)
		except _librsync.librsyncError, e: raise librsyncError(str(e))		


class SigGenerator:
	"""Calculate signature.

	Input and output is same as SigFile, but the interface is like md5
	module, not filelike object

	"""
	def __init__(self, blocksize = _librsync.RS_DEFAULT_BLOCK_LEN):
		"""Return new signature instance"""
		try: self.sig_maker = _librsync.new_sigmaker(blocksize)
		except _librsync.librsyncError, e: raise librsyncError(str(e))
		self.gotsig = None
		self.buffer = ""
		self.sig_string = ""

	def update(self, buf):
		"""Add buf to data that signature will be calculated over"""
		if self.gotsig:
			raise librsyncError("SigGenerator already provided signature")
		self.buffer += buf
		while len(self.buffer) >= blocksize:
			if self.process_buffer():
				raise librsyncError("Premature EOF received from sig_maker")

	def process_buffer(self):
		"""Run self.buffer through sig_maker, add to self.sig_string"""
		try: eof, len_buf_read, cycle_out = self.sig_maker.cycle(self.buffer)
		except _librsync.librsyncError, e: raise librsyncError(str(e))
		self.buffer = self.buffer[len_buf_read:]
		self.sig_string += cycle_out
		return eof

	def getsig(self):
		"""Return signature over given data"""
		while not self.process_buffer(): pass # keep running until eof
		return self.sig_string