From 8c37a5bdfdd46d5cfad6e9d67925ddef9ca382bf Mon Sep 17 00:00:00 2001 From: ben Date: Thu, 21 Mar 2002 07:22:43 +0000 Subject: First checkin git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup/trunk@2 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109 --- rdiff-backup/CHANGELOG | 416 +++++++++++++ rdiff-backup/COPYING | 340 +++++++++++ rdiff-backup/FAQ.html | 122 ++++ rdiff-backup/README | 16 + rdiff-backup/TODO | 8 + rdiff-backup/dist/makedist | 66 ++ rdiff-backup/dist/makerpm | 34 ++ rdiff-backup/dist/makeweb | 27 + rdiff-backup/dist/rdiff-backup.spec | 47 ++ rdiff-backup/misc/find2dirs | 30 + rdiff-backup/misc/init_files.py | 69 +++ rdiff-backup/misc/myrm | 16 + rdiff-backup/misc/remove-comments.py | 32 + rdiff-backup/rdiff-backup.1 | 283 +++++++++ rdiff-backup/rdiff_backup/connection.py | 467 ++++++++++++++ rdiff-backup/rdiff_backup/destructive_stepping.py | 250 ++++++++ rdiff-backup/rdiff_backup/filelist.py | 106 ++++ rdiff-backup/rdiff_backup/header.py | 18 + rdiff-backup/rdiff_backup/highlevel.py | 288 +++++++++ rdiff-backup/rdiff_backup/increment.py | 180 ++++++ rdiff-backup/rdiff_backup/iterfile.py | 235 ++++++++ rdiff-backup/rdiff_backup/lazy.py | 343 +++++++++++ rdiff-backup/rdiff_backup/log.py | 142 +++++ rdiff-backup/rdiff_backup/manage.py | 99 +++ rdiff-backup/rdiff_backup/restore.py | 158 +++++ rdiff-backup/rdiff_backup/rlist.py | 240 ++++++++ rdiff-backup/rdiff_backup/robust.py | 537 +++++++++++++++++ rdiff-backup/rdiff_backup/rorpiter.py | 248 ++++++++ rdiff-backup/rdiff_backup/rpath.py | 704 ++++++++++++++++++++++ rdiff-backup/rdiff_backup/static.py | 30 + rdiff-backup/src/Make | 37 ++ rdiff-backup/src/connection.py | 467 ++++++++++++++ rdiff-backup/src/destructive_stepping.py | 250 ++++++++ rdiff-backup/src/filelist.py | 106 ++++ rdiff-backup/src/globals.py | 172 ++++++ rdiff-backup/src/header.py | 18 + rdiff-backup/src/highlevel.py | 288 +++++++++ rdiff-backup/src/increment.py | 180 ++++++ rdiff-backup/src/iterfile.py | 235 ++++++++ rdiff-backup/src/lazy.py | 343 +++++++++++ rdiff-backup/src/log.py | 142 +++++ rdiff-backup/src/main.py | 401 ++++++++++++ rdiff-backup/src/manage.py | 99 +++ rdiff-backup/src/rdiff.py | 175 ++++++ rdiff-backup/src/restore.py | 158 +++++ rdiff-backup/src/rlist.py | 240 ++++++++ rdiff-backup/src/robust.py | 537 +++++++++++++++++ rdiff-backup/src/rorpiter.py | 248 ++++++++ rdiff-backup/src/rpath.py | 704 ++++++++++++++++++++++ rdiff-backup/src/setconnections.py | 205 +++++++ rdiff-backup/src/static.py | 30 + rdiff-backup/src/ttime.py | 129 ++++ rdiff-backup/testing/chdir-wrapper | 15 + rdiff-backup/testing/commontest.py | 19 + rdiff-backup/testing/connectiontest.py | 201 ++++++ rdiff-backup/testing/destructive_steppingtest.py | 72 +++ rdiff-backup/testing/filelisttest.py | 35 ++ rdiff-backup/testing/finaltest.py | 150 +++++ rdiff-backup/testing/find-max-ram.py | 60 ++ rdiff-backup/testing/highleveltest.py | 75 +++ rdiff-backup/testing/incrementtest.py | 100 +++ rdiff-backup/testing/iterfiletest.py | 27 + rdiff-backup/testing/lazytest.py | 326 ++++++++++ rdiff-backup/testing/rdifftest.py | 127 ++++ rdiff-backup/testing/regressiontest.py | 410 +++++++++++++ rdiff-backup/testing/restoretest.py | 47 ++ rdiff-backup/testing/rlisttest.py | 98 +++ rdiff-backup/testing/robusttest.py | 86 +++ rdiff-backup/testing/roottest.py | 165 +++++ rdiff-backup/testing/rorpitertest.py | 105 ++++ rdiff-backup/testing/rpathtest.py | 337 +++++++++++ rdiff-backup/testing/server.py | 12 + rdiff-backup/testing/setconnectionstest.py | 26 + rdiff-backup/testing/statictest.py | 63 ++ rdiff-backup/testing/testall.py | 26 + rdiff-backup/testing/timetest.py | 71 +++ 76 files changed, 13368 insertions(+) create mode 100644 rdiff-backup/CHANGELOG create mode 100644 rdiff-backup/COPYING create mode 100644 rdiff-backup/FAQ.html create mode 100644 rdiff-backup/README create mode 100644 rdiff-backup/TODO create mode 100755 rdiff-backup/dist/makedist create mode 100755 rdiff-backup/dist/makerpm create mode 100755 rdiff-backup/dist/makeweb create mode 100644 rdiff-backup/dist/rdiff-backup.spec create mode 100755 rdiff-backup/misc/find2dirs create mode 100755 rdiff-backup/misc/init_files.py create mode 100755 rdiff-backup/misc/myrm create mode 100644 rdiff-backup/misc/remove-comments.py create mode 100644 rdiff-backup/rdiff-backup.1 create mode 100644 rdiff-backup/rdiff_backup/connection.py create mode 100644 rdiff-backup/rdiff_backup/destructive_stepping.py create mode 100644 rdiff-backup/rdiff_backup/filelist.py create mode 100644 rdiff-backup/rdiff_backup/header.py create mode 100644 rdiff-backup/rdiff_backup/highlevel.py create mode 100644 rdiff-backup/rdiff_backup/increment.py create mode 100644 rdiff-backup/rdiff_backup/iterfile.py create mode 100644 rdiff-backup/rdiff_backup/lazy.py create mode 100644 rdiff-backup/rdiff_backup/log.py create mode 100644 rdiff-backup/rdiff_backup/manage.py create mode 100644 rdiff-backup/rdiff_backup/restore.py create mode 100644 rdiff-backup/rdiff_backup/rlist.py create mode 100644 rdiff-backup/rdiff_backup/robust.py create mode 100644 rdiff-backup/rdiff_backup/rorpiter.py create mode 100644 rdiff-backup/rdiff_backup/rpath.py create mode 100644 rdiff-backup/rdiff_backup/static.py create mode 100755 rdiff-backup/src/Make create mode 100644 rdiff-backup/src/connection.py create mode 100644 rdiff-backup/src/destructive_stepping.py create mode 100644 rdiff-backup/src/filelist.py create mode 100644 rdiff-backup/src/globals.py create mode 100644 rdiff-backup/src/header.py create mode 100644 rdiff-backup/src/highlevel.py create mode 100644 rdiff-backup/src/increment.py create mode 100644 rdiff-backup/src/iterfile.py create mode 100644 rdiff-backup/src/lazy.py create mode 100644 rdiff-backup/src/log.py create mode 100755 rdiff-backup/src/main.py create mode 100644 rdiff-backup/src/manage.py create mode 100644 rdiff-backup/src/rdiff.py create mode 100644 rdiff-backup/src/restore.py create mode 100644 rdiff-backup/src/rlist.py create mode 100644 rdiff-backup/src/robust.py create mode 100644 rdiff-backup/src/rorpiter.py create mode 100644 rdiff-backup/src/rpath.py create mode 100644 rdiff-backup/src/setconnections.py create mode 100644 rdiff-backup/src/static.py create mode 100644 rdiff-backup/src/ttime.py create mode 100755 rdiff-backup/testing/chdir-wrapper create mode 100644 rdiff-backup/testing/commontest.py create mode 100644 rdiff-backup/testing/connectiontest.py create mode 100644 rdiff-backup/testing/destructive_steppingtest.py create mode 100644 rdiff-backup/testing/filelisttest.py create mode 100644 rdiff-backup/testing/finaltest.py create mode 100755 rdiff-backup/testing/find-max-ram.py create mode 100644 rdiff-backup/testing/highleveltest.py create mode 100644 rdiff-backup/testing/incrementtest.py create mode 100644 rdiff-backup/testing/iterfiletest.py create mode 100644 rdiff-backup/testing/lazytest.py create mode 100644 rdiff-backup/testing/rdifftest.py create mode 100644 rdiff-backup/testing/regressiontest.py create mode 100644 rdiff-backup/testing/restoretest.py create mode 100644 rdiff-backup/testing/rlisttest.py create mode 100644 rdiff-backup/testing/robusttest.py create mode 100644 rdiff-backup/testing/roottest.py create mode 100644 rdiff-backup/testing/rorpitertest.py create mode 100644 rdiff-backup/testing/rpathtest.py create mode 100755 rdiff-backup/testing/server.py create mode 100644 rdiff-backup/testing/setconnectionstest.py create mode 100644 rdiff-backup/testing/statictest.py create mode 100644 rdiff-backup/testing/testall.py create mode 100644 rdiff-backup/testing/timetest.py diff --git a/rdiff-backup/CHANGELOG b/rdiff-backup/CHANGELOG new file mode 100644 index 0000000..755b590 --- /dev/null +++ b/rdiff-backup/CHANGELOG @@ -0,0 +1,416 @@ +New in v0.6.0 (2002/03/14) +-------------------------- + +Fixed some assorted manual "bugs". + +Fixed endless loop bug in certain error recovery situation reported by +Nick Duffek, and slightly changed around some other error correction +code. + +Switching to new version numbering system: versions x.2n+1.x are +unstable, versions x.2n.x are supposed to be more stable. + + +New in v0.5.4 (2002/03/06) +-------------------------- + +Fixed bug present since 0.5.0 wherein rdiff-backup would make +snapshots instead of diffs when regular files change. + +May have fixed race condition involving rdiff execution. + + +New in v0.5.3 (2002/03/03) +-------------------------- + +It turns out the previous version broke device handling. Sorry about +that.. + + +New in v0.5.2 (2002/03/02) +-------------------------- + +Fixed bugs which made rdiff-backup try to preserve mod times when it +wasn't necessary, and exit instead of warning when it wasn't being run +as root and found a file it didn't own. (Reported by Alberto +Accomazi.) + +Added some more error checking; maybe this will fix a bug reported by +John Goerzen wherein rdiff-backup can crash if file is deleted while +rdiff-backup is processing it. + +Changed locations of some of the temp files; filenames will be +determined by the tempfile module. + + +New in v0.5.1 (2002/02/22) +-------------------------- + +When establishing a connection, print a warning if the server version +is different from the client version. + +When find rdiff error value 256, tell user that it is probably because +rdiff couldn't be found in the path. + +Fixed a serious bug that can apparently cause a remote backups to fail +(reported by John Goerzen). + +May have fixed a bug that causes recovery from certain errors to fail. + + +New in v0.5.0 (2002/02/17) +-------------------------- + +Now every so often (default is 20 seconds, the --checkpoint-interval +option controls it) rdiff-backup checkpoints by dumping its state to +temporary files in the rdiff-backup-data directory. If rdiff-backup +is rerun with the same destination directory, it can either try to +resume the previous backup or at least clean things up so the archive +is consistent and accurate. + +Added new options --resume, --no-resume, and --resume-interval, which +control when rdiff-backup tries to resume a previous failed backup. + +Fixed a bug with the --exclude-device-files option which caused the +option to be ignored when the source directory was remote. + +By default, if rdiff-backup encounters a certain kind of IOError +(currently types 26 and 5) while trying to access a file, it logs the +error, skips the file, and tries to continue. + +If settings requiring an integer argument (like -v or +--checkpoint-interval) are given a bad (non-integer) argument, fail +with better explanation. + +Fixed annoying logging bug. Now no matter which computer a logging +message originates on, it should be routed to the process which is +writing to the logging file, and written correctly. However, logging +messages about network traffic will not be routed, as this will +generate more traffic and lead to an infinite regress. + +When calling rdiff, uses popen2.Popen3 and os.spawnvp instead of +os.popen and os.system. This should make rdiff-backup more secure. +Thanks to Jamie Heilman for the suggestion. + +Instead of calling the external shell command 'stat', rdiff-backup +uses os.lstat().st_rdev to determine a device file's major and minor +numbers. The new method should be more portable. Thanks to Jamie +Heilman for the suggestion. + +All the file operations were examined and tweaked to try to +minimize/eliminate the chance of leaving the backup directory in an +inconsistent state. + +Upon catchable kinds of errors, try to checkpoint before exiting so +later rdiff-backup processes have more information to work with. + +At the suggestion of Jason Piterak, added a --windows-time-format +option so rdiff-backup will (perhaps) work under MS windows NT. + + +New in v0.4.4 (2002/01/09) +-------------------------- + +Applied Berkan Eskikaya's "xmas patch" (I was travelling and didn't +have a chance on Christmas). He fixed important bugs in the +--terminal-verbosity and --remove-older-than options. + +Added an --exclude-device-files option, which makes rdiff-backup skip +any device files in the same way it skips files selected with the +--exclude option. + + +New in v0.4.3 (2001/12/17) +-------------------------- + +Plugged another memory hole. At first I thought it might have been +python's fault, but it was all me. If rdiff-backup uses more than a +few megabytes of memory, tell me because it is probably another memory +hole.. + +rdiff-backup is now a bit more careful about deleting temporary files +it creates when it is done with them. + +Changed the rpm spec a little. The enclosed man page is gzipped and +the package file is GPG signed (it can be checked with, for example, +"rpm --checksig -v rdiff-backup-0.4.3-1.noarch.rpm"). + +rdiff-backup no longer checks the mtimes or atimes of device files. +Use of these times was inconsistent (sometimes writing to device files +updates their times, sometimes not) and leads to unnecessary backing +up of files. + + +New in v0.4.2 (2001/11/19) +-------------------------- + +Significant speed increases (maybe 20% for local sessions) when +dealing with directories that do not need to be updated much. + +Fixed memory leak. rdiff-backup should now run in almost constant +memory (about 6MB on my system). + +Enabled buffering of object transfers, so remote sessions can be +50-100%+ faster. + +rdiff-backup now thinks it is running as root if the destination +connection is root. Thus rdiff-backup will preserve ownership even if +it is not running as root on the source end. + +If you abort rdiff-backup or it fails for some reason, it is now more +robust about recovering the next time it is run (before it could fail +in ways which made subsequent sessions fail also). However, it is +still not a good idea to abort, as individual files could be in the +process of being written and could get corrupted. + +If rdiff-backup encounters an unreadable file (or, if +--change-source-perms is given, a file whose permissions it cannot +change), it will log a warning, ignore the file, and continue, instead +of exiting with an error. + + +New in v0.4.1 (2001/11/9) +------------------------- + +Now either the source, or the target, or both can be remote. To make +this less confusing, now rdiff-backup supports host::file notation. +So it is legal to run: + +rdiff-backup bill@host1.net::source_file jones@host2.net::target + +Also, the test suites have been improved and found a number of bugs +(which were then fixed). + + +New in v0.4.0 (2001/11/4) +------------------------- + +Much of the rdiff-backup internals were rewritten. The result should +be better performance when operating remotely over a pipe with +significant latency. Also the code dealing with changing permissions +is much cleaner, and should generalize later to similar jobs (for +instance preserving atimes.) + +Listing and deleting increments and restoring should work remotely +now. In earlier versions a file or directory had to be restored +locally and then copied over to its final destination. + +At the request of the FSF, a copy of the GPL has been included in the +packaged distributions. It is in the file "COPYING". + + +New in v0.3.4 (2001/10/31) +-------------------------- + +A change in python from the 2.2a series to 2.2b series made remote +backup on version 0.3.3 stop work, a small change fixes it. (Thanks +to Berkan Eskikaya for telling me about this.) + +Listed some missing features/bugs on the manual page. + + +New in v0.3.3 (2001/10/16) +-------------------------- + +Changed quoting system yet again after learning that the old system +was not very portable between shells (thanks Hans +) + + +New in v0.3.2 (2001/10/9) +------------------------- + +Added --list-increments and --remove-older-than commands. +--list-increments will just tell you what increments you have and +their dates. This isn't anything you couldn't get from "ls", but it +may be formatted more nicely. The --remove-older-than command is used +to delete older increments that you don't want, or don't have space +for. + +Also, on some systems ssh was adding a spurious "Broken pipe" message, +even though everything went fine. Maybe this version will prevent +this confusing message. + + +New in v0.3.1 (2001/9/11) +------------------------- + +Fix for stupid bug - when running remotely as users with different +uids, rdiff-backup now doesn't check the uid/gid. Before it kept +thinking that the files needed to be updated because they didn't have +the right ownership. This shouldn't have resulted in any data loss - +just some unnecessary .rdiff files. (Thanks to Michael Friedlander +for finding this.) + +Added check to make sure that rdiff exits successfully. + + +New in v0.3.0 (2001/9/9 - Billennium edition) +--------------------------------------------- + +rdiff-backup has been almost completely rewritten for v0.3.0, as it +was for v0.1.0. The main problem with versions 0.2.x was that the +networking code was added to the not-remote-capable v0.1, and the +result was unyieldy and prone to bugs when operating over a pipe. + +There are some new features: + +- Hopefully very few bugs, at least in basic file handling. + rdiff-backup has an extensive testing suite now, so it should be + much more reliable. + +- Complete support for reading and writing from and to files and + directories that lack permissions, by temporarily changing them, and + then changing them back later. (See for instance the + --change-source-perms switch.) As I found out there is a lot to + this, so much that I'm not sure in retrospect I should have + bothered. :-) + +- New more standard format for increment files. See + http://www.w3.org/TR/NOTE-datetime for the time standard. The old + format, besides being less standard, didn't take timezones into + account. + +- In the initial mirroring, rdiff-backup only copies the files that it + needs to, so it is much quicker when you almost have an initial + mirror already. You can even the --mirror-only switch and make + rdiff-backup into a slow version of rsync. + +- Terminal and file verbosity levels can be selected separately. So + if you like a lot in your backup.log/restore.log but not much on + your terminal, or vice-versa, you can set them at different numbers. + +- New --test-server option so if something goes wrong you can see if + it is because the server on the other side isn't being initialized + properly. + +- New --no-rdiff-copy option, which disables using rdiff to move files + across a connection (it will still be used to make increment files + however). If the bottleneck is not bandwidth but local disks/CPUs, + this options should speed things up. + +There are, however, a few negatives: + +- rdiff-backup now requires Python version 2.2 or later. Sorry for + the inconvenience but I use the new features a lot. + +- It may be slightly slower overall than versions 0.2.x - the remote + code is cleaner, but probably has higher overhead. At least on my + computer, rdiff-backup is still quicker than rsync for local + mirroring of large files, but for remote mirroring, rsync will + usually be much quicker, because it uses a fairly low-overhead + pipelining protocol. + +- Any old increments are incompatible because they use a different + date/time standard. If this is a big deal, try mailing me. A + converter shouldn't be very difficult to write, but I didn't want to + take the time unless someone really wanted it. + + +New in v0.2.8 (2001/9/4) +------------------------- + +Fixed two stupid bugs that would cause rdiff-backup to exit with an +exception. (I can't believe they were in there.) + + +New in v0.2.7 (2001/8/29) +------------------------- + +Added new long options --backup-mode and --verbosity which are +equivalent to -b and -v. + +rdiff-backup should be a little more resistant to the filesystem it is +backup up changing underneath it (although it is not setup to handle +this in general). Thanks Alberto Accomazzi + for these suggestions. + + +New in v0.2.6 (2001/8/27) +------------------------- + +Fixed bug where, for non-root users, rdiff-backup could, in the +process of mirroring an unwritable directory, make the copy +unwriteable and then fail. Now rdiff-backup goes through and makes +what it needs to be readable and writeable, and then changes things +back at the end. (Another one found by Jeb Campbell!) + + +New in v0.2.5 (2001/8/26) +------------------------- + +Added better error reporting when server throws an exception. + +Fixed bug so that backed-up setuid files will also be setuid. + +Now rdiff-backup thinks it's running as root only if both client and +server are running as root (Thanks to Jeb Campbell for finding these +previous two bugs). + +Fixed miscellaneous Path bug that could occur in remote operation. + + +New in v0.2.4 (2001/8/25) +------------------------- + +Added more logging options that may help other track down a mysterious +bug. + + +New in v0.2.3 (2001/8/24) +------------------------- + +Fixed typing bug that caused an Assertion Error in remote operation, +thanks again to Jeb Campbell for finding it. + + +New in v0.2.2 (2001/8/24) +------------------------- + +Fixed bug in remote creation of special files and symlinks (thanks to +Jeb Campbell for finding it). + +Fixed another error report. + + +New in v0.2.1 (2001/8/7) +------------------------ + +Now if rdiff-backup isn't running as root, it doesn't try to change +file ownership. + +Fixed an error report. + +Stopped flushing an open pipe to fix a race condition on IRIX. + + +New in v0.2 (2001/8/3) +---------------------- + +rdiff-backup can now operate in a bandwidth efficient manner (a la +rsync) using a pipe setup with, for instance, ssh. + +I was too hasty with the last bug fix and didn't deal with all +filenames properly. Maybe this one will work. + + +New in v0.1.1 (2001/8/2) +------------------------- + +Bug fix: Filenames that may contain spaces, backslashes, and other +special characters are quoted now and should be handled correctly. + + +New in v0.1 (2001/7/15) +---------------------- + +Large portion (majority?) of rdiff-backup was rewritten for v0.1. New +version highlights: + + - No new features! + - No speed improvements! It may even be slower... + - No bug fixes! (ok maybe a few) + +However, the new version is much cleaner and better documented. This +version should have fewer bugs, and it should be easier to fix any +future bugs. diff --git a/rdiff-backup/COPYING b/rdiff-backup/COPYING new file mode 100644 index 0000000..5b6e7c6 --- /dev/null +++ b/rdiff-backup/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/rdiff-backup/FAQ.html b/rdiff-backup/FAQ.html new file mode 100644 index 0000000..774da4a --- /dev/null +++ b/rdiff-backup/FAQ.html @@ -0,0 +1,122 @@ + + + + rdiff-backup FAQ + + + +

rdiff-backup FAQ

+ +

Table of contents

+ +
  1. When I try to run rdiff-backup it says +"ImportError: No module named __future__" or "SyntaxError: invalid +syntax". What's happening?
  2. + +
  3. What do the different verbosity levels mean?
  4. + +
  5. Does rdiff-backup run under Windows?
  6. +
+ +

FAQ

+ +
    + + +
  1. When I try to run rdiff-backup it says "ImportError: No +module named __future__" or "SyntaxError: invalid syntax". What's +happening? + +

    rdiff-backup versions 0.2.x require Python version 2.1 or later, +and versions 0.3.x require Python version 2.2 or later. If you don't +know what version of python you are running, type in "python -V" from +the shell. I'm sorry if this is inconvenient, but rdiff-backup uses +generators, iterators, nested scoping, and static/class methods +extensively, and these were only added in version 2.2. + +

    If you have two versions of python installed, and running "python" +defaults to an early version, you'll probably have to change the first +line of the rdiff-backup script. For instance, you could set it to: + +

    +#/usr/bin/env python2.2
    +
    +
  2. + +
    +
  3. What do the different verbosity levels mean? + +

    There is no formal specification, but here is a rough description +(settings are always cumulative, so 5 displays everything 4 does): + +

    + + + + + + + + + + + +
    0No information given
    1Fatal Errors displayed
    2Warnings
    3Important messages, and maybe later some global statistics (default)
    4Some global settings, miscellaneous messages
    5Mentions which files were changed
    6More information on each file processed
    7More information on various things
    8All logging is dated
    9Details on which objects are moving across the connection
    + +
    +

  4. Does rdiff-backup run under Windows? + +

    Yes, apparently it is possible. First, follow Jason Piterak's +instructions: + +

    +Subject: Cygwin rdiff-backup
    +From: Jason  Piterak <Jason_Piterak@c-i-s.com>
    +Date: Mon, 4 Feb 2002 16:54:24 -0500 (13:54 PST)
    +To: rdiff-backup@keywest.Stanford.EDU
    +
    +Hello all,
    +  On a lark, I thought I would attempt to get rdiff-backup to work under
    +Windows98 under Cygwin. We have a number of NT/Win2K servers in the field
    +that I'd love to be backing up via rdiff-backup, and this was the start of
    +getting that working. 
    +
    +SUMMARY: 
    +  o You can get all the pieces for rdiff-backup working under Cygwin.
    +  o The backup process works up to the point of writing any files with
    +timestamps.
    +      ... This is because the ':' character is reserved for Alternate Data
    +Stream (ADS) file designations under NTFS.
    +
    +HOW TO GET IT WORKING (to a point, anyway):
    +  o Install Cygwin
    +  o Download the Python 2.2 update through the Cygwin installer and install.
    +  o Download the librsync libraries from the usual place, but before
    +compiling...
    +  o Cygwin does not use/provide glibc. Because of this, you have to repoint
    +some header files in the Makefile:
    +
    +   -- Make sure that you have /usr/include/inttypes.h
    +      redirected to /usr/include/sys/types.h. Do this by:
    +
    +      create a file /usr/include/inttypes.h with the contents:
    +      #include <sys/types.h>
    +  o Put rdiff-backup in your PATH, as you normally would.
    +
    +
    + +Then, whenever you use rdiff-backup (or at least if you are backing up +to or restoring from a Windows system), use the --windows-time-format +switch, which will tell rdiff-backup not to put a colon (":") in a +filename (this option was added after Jason posted his message). + +
+ +
+
Ben Escoto
<bescoto@stanford.edu>
+ + +Last modified: Sat Mar 16 13:22:34 PST 2002 + + + diff --git a/rdiff-backup/README b/rdiff-backup/README new file mode 100644 index 0000000..e4430e9 --- /dev/null +++ b/rdiff-backup/README @@ -0,0 +1,16 @@ +Thank you for trying rdiff-backup. + +Remember that you must have Python 2.2 or later and librsync installed +(this means that "python" and "rdiff" should be in your path). To +download, see http://www.python.org and +http://sourceforge.net/projects/rproxy/ respectively. + +For remote operation, rdiff-backup should be in installed and in the +PATH on remote system(s) (see man page for more information). + + +If you have the above installed, and it still doesn't work, contact +Ben Escoto , or post to the mailing list (see +web page at http://www.stanford.edu/~bescoto/rdiff-backup for more +information). + diff --git a/rdiff-backup/TODO b/rdiff-backup/TODO new file mode 100644 index 0000000..510fca0 --- /dev/null +++ b/rdiff-backup/TODO @@ -0,0 +1,8 @@ +Accept a list of files?? + +Security audit + +hardlinks + +Don't produce stack trace which looks like crash/include file name in +logging stats diff --git a/rdiff-backup/dist/makedist b/rdiff-backup/dist/makedist new file mode 100755 index 0000000..20198f4 --- /dev/null +++ b/rdiff-backup/dist/makedist @@ -0,0 +1,66 @@ +#!/usr/bin/env python + +import os, re, shutil, time + +filelist = ["rdiff-backup", "CHANGELOG", "COPYING", "README", "FAQ.html"] + +# Various details about the files must also be specified by the rpm +# spec template. +spec_template = "rdiff-backup.spec" + + +def GetVersion(): + """Return version string by reading in ./rdiff-backup""" + fp = open("rdiff-backup", "r") + match = re.search("Version (.*?) ", fp.read()) + fp.close() + return match.group(1) + +def CopyMan(destination, version): + """Create updated man page at the specified location""" + fp = open(destination, "w") + date = time.strftime("%B %Y", time.localtime(time.time())) + version = "Version "+version + firstline = ('.TH RDIFF-BACKUP 1 "%s" "%s" "User Manuals"\n' % + (date, version)) + fp.write(firstline) + infp = open("rdiff-backup.1", "r") + infp.readline() + fp.write(infp.read()) + fp.close() + infp.close() + +def MakeTar(version): + """Create rdiff-backup tar file""" + tardir = "rdiff-backup-%s" % version + tarfile = "rdiff-backup-%s.tar.gz" % version + os.mkdir(tardir) + for file in filelist: shutil.copyfile(file, os.path.join(tardir, file)) + os.chmod(os.path.join(tardir, "rdiff-backup"), 0755) + CopyMan(os.path.join(tardir, "rdiff-backup.1"), version) + os.system("tar -cvzf %s %s" % (tarfile, tardir)) + shutil.rmtree(tardir) + return tarfile + +def MakeSpecFile(version): + """Create spec file using spec template""" + specfile = "rdiff-backup-%s-1.spec" % version + outfp = open(specfile, "w") + outfp.write("Version: %s\n" % version) + infp = open(spec_template, "r") + outfp.write(infp.read()) + infp.close() + outfp.close() + return specfile + +def Main(): + assert not os.system("./Make") + version = GetVersion() + print "Processing version " + version + tarfile = MakeTar(version) + print "Made tar file " + tarfile + specfile = MakeSpecFile(version) + print "Made specfile " + specfile + +if __name__ == "__main__": Main() + diff --git a/rdiff-backup/dist/makerpm b/rdiff-backup/dist/makerpm new file mode 100755 index 0000000..b3da88d --- /dev/null +++ b/rdiff-backup/dist/makerpm @@ -0,0 +1,34 @@ +#!/usr/bin/env python + +import os, sys, re + +def GetVersion(): + """Return version string by reading in ./rdiff-backup""" + fp = open("rdiff-backup", "r") + match = re.search("Version (.*?) ", fp.read()) + fp.close() + return match.group(1) + + +if len(sys.argv) == 1: + specfile = "rdiff-backup-%s-1.spec" % GetVersion() + print "Using specfile %s" % specfile +elif len(sys.argv) == 2: + specfile = sys.argv[1] + print "Using specfile %s" % specfile +else: + print ("%s takes zero or one argument, the name of the rpm spec " + "file" % sys.argv[0]) + sys.exit(1) + +base = ".".join(specfile.split(".")[:-1]) +srcrpm = base+".src.rpm" +noarchrpm = base+".noarch.rpm" +tarfile = "-".join(base.split("-")[:-1]) + ".tar.gz" + +os.system("install -o root -g root -m 644 %s /usr/src/redhat/SOURCES" % + tarfile) +os.system("rpm -ba --sign -vv --target noarch " + specfile) +#os.system("install -o ben -g ben -m 644 /usr/src/redhat/SRPMS/%s ." % srcrpm) +os.system("install -o ben -g ben -m 644 /usr/src/redhat/RPMS/noarch/%s ." % + noarchrpm) diff --git a/rdiff-backup/dist/makeweb b/rdiff-backup/dist/makeweb new file mode 100755 index 0000000..acbca1f --- /dev/null +++ b/rdiff-backup/dist/makeweb @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +import sys, os + +def RunCommand(cmd): + print cmd + os.system(cmd) + +if not sys.argv[1:]: + print 'Call with version number, as in "./makeweb 0.3.1"' + sys.exit(1) + +version = sys.argv[1] +webprefix = "/home/ben/misc/html/mirror/rdiff-backup/" + + +RunCommand("cp *%s* %s" % (version, webprefix)) +RunCommand("rman -f html -r '' rdiff-backup.1 > %srdiff-backup.1.html" + % webprefix) +RunCommand("cp FAQ.html CHANGELOG %s" % webprefix) + + +os.chdir(webprefix) +print "cd ", webprefix +RunCommand("rm latest latest.rpm latest.tar.gz") +RunCommand("ln -s *rpm latest.rpm") +RunCommand("ln -s *tar.gz latest.tar.gz") diff --git a/rdiff-backup/dist/rdiff-backup.spec b/rdiff-backup/dist/rdiff-backup.spec new file mode 100644 index 0000000..40f1e27 --- /dev/null +++ b/rdiff-backup/dist/rdiff-backup.spec @@ -0,0 +1,47 @@ +Summary: A backup prog that combines mirroring with incremental backup +Name: rdiff-backup +Release: 1 +URL: http://www.stanford.edu/~bescoto/rdiff-backup/ +Source: %{name}-%{version}.tar.gz +Copyright: GPL +Group: Applications/Archiving +BuildRoot: %{_tmppath}/%{name}-root +requires: librsync, python >= 2.2 + +%description +rdiff-backup is a script, written in Python, that backs up one +directory to another and is intended to be run periodically (nightly +from cron for instance). The target directory ends up a copy of the +source directory, but extra reverse diffs are stored in the target +directory, so you can still recover files lost some time ago. The idea +is to combine the best features of a mirror and an incremental +backup. rdiff-backup can also operate in a bandwidth efficient manner +over a pipe, like rsync. Thus you can use rdiff-backup and ssh to +securely back a hard drive up to a remote location, and only the +differences from the previous backup will be transmitted. + +%prep +%setup + +%build + +%install +rm -rf $RPM_BUILD_ROOT +mkdir -p $RPM_BUILD_ROOT/usr/bin +mkdir -p $RPM_BUILD_ROOT/usr/share/man/man1 + +install -m 755 rdiff-backup $RPM_BUILD_ROOT/usr/bin/rdiff-backup +install -m 644 rdiff-backup.1 $RPM_BUILD_ROOT/usr/share/man/man1/rdiff-backup.1 +%clean + +%files +%defattr(-,root,root) + +/usr/bin/rdiff-backup +/usr/share/man/man1/rdiff-backup.1.gz +%doc CHANGELOG COPYING README FAQ.html + +%changelog +* Sun Nov 4 2001 Ben Escoto +- Initial RPM + diff --git a/rdiff-backup/misc/find2dirs b/rdiff-backup/misc/find2dirs new file mode 100755 index 0000000..9d919d3 --- /dev/null +++ b/rdiff-backup/misc/find2dirs @@ -0,0 +1,30 @@ +#!/usr/bin/env python + +from __future__ import generators +import sys, os, stat + +def usage(): + print "Usage: find2dirs dir1 dir2" + print + print "Given the name of two directories, list all the files in both, one" + print "per line, but don't repeat a file even if it is in both directories" + sys.exit(1) + +def getlist(base, ext = ""): + """Return iterator yielding filenames from directory""" + if ext: yield ext + else: yield "." + + fullname = os.path.join(base, ext) + if stat.S_ISDIR(stat.S_IFMT(os.lstat(fullname)[stat.ST_MODE])): + for subfile in os.listdir(fullname): + for fn in getlist(base, os.path.join(ext, subfile)): yield fn + +def main(dir1, dir2): + d = {} + for fn in getlist(dir1): d[fn] = 1 + for fn in getlist(dir2): d[fn] = 1 + for fn in d.keys(): print fn + +if not len(sys.argv) == 3: usage() +else: main(sys.argv[1], sys.argv[2]) diff --git a/rdiff-backup/misc/init_files.py b/rdiff-backup/misc/init_files.py new file mode 100755 index 0000000..1d8651a --- /dev/null +++ b/rdiff-backup/misc/init_files.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +"""init_smallfiles.py + +This program makes a number of files of the given size in the +specified directory. + +""" + +import os, stat, sys, math + +if len(sys.argv) > 5 or len(sys.argv) < 4: + print "Usage: init_files [directory name] [file size] [file count] [base]" + print + print "Creates file_count files in directory_name of size file_size." + print "The created directory has a tree type structure where each level" + print "has at most base files or directories in it. Default is 50." + sys.exit(1) + +dirname = sys.argv[1] +filesize = int(sys.argv[2]) +filecount = int(sys.argv[3]) +block_size = 16384 +block = "." * block_size +block_change = "." * (filesize % block_size) +if len(sys.argv) == 4: base = 50 +else: base = int(sys.argv[4]) + +def make_file(path): + """Make the file at path""" + fp = open(path, "w") + for i in xrange(int(math.floor(filesize/block_size))): fp.write(block) + fp.write(block_change) + fp.close() + +def find_sublevels(count): + """Return number of sublevels required for count files""" + return int(math.ceil(math.log(count)/math.log(base))) + +def make_dir(dir, count): + """Make count files in the directory, making subdirectories if necessary""" + print "Making directory %s with %d files" % (dir, count) + os.mkdir(dir) + level = find_sublevels(count) + assert count <= pow(base, level) + if level == 1: + for i in range(count): make_file(os.path.join(dir, "file%d" %i)) + else: + files_per_subdir = pow(base, level-1) + full_dirs = int(count/files_per_subdir) + assert full_dirs <= base + for i in range(full_dirs): + make_dir(os.path.join(dir, "subdir%d" % i), files_per_subdir) + + change = count - full_dirs*files_per_subdir + assert change >= 0 + if change > 0: + make_dir(os.path.join(dir, "subdir%d" % full_dirs), change) + +def start(dir): + try: os.stat(dir) + except os.error: pass + else: + print "Directory %s already exists, exiting." % dir + sys.exit(1) + + make_dir(dirname, filecount) + +start(dirname) diff --git a/rdiff-backup/misc/myrm b/rdiff-backup/misc/myrm new file mode 100755 index 0000000..1d8350f --- /dev/null +++ b/rdiff-backup/misc/myrm @@ -0,0 +1,16 @@ +#!/usr/bin/python + +import sys, os + +curdir = os.getcwd() +os.chdir("../src") +execfile("destructive_stepping.py") +os.chdir(curdir) + +lc = Globals.local_connection + +for filename in sys.argv[1:]: + #print "Deleting %s" % filename + rp = RPath(lc, filename) + if rp.lstat(): rp.delete() + diff --git a/rdiff-backup/misc/remove-comments.py b/rdiff-backup/misc/remove-comments.py new file mode 100644 index 0000000..e24e3ba --- /dev/null +++ b/rdiff-backup/misc/remove-comments.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python + +"""remove-comments.py + +Given a python program on standard input, spit one out on stdout that +should work the same, but has blank and comment lines removed. + +""" + +import sys, re + +triple_regex = re.compile('"""') + +def eattriple(initial_line_stripped): + """Keep reading until end of doc string""" + assert initial_line_stripped.startswith('"""') + if triple_regex.search(initial_line_stripped[3:]): return + while 1: + line = sys.stdin.readline() + if not line or triple_regex.search(line): break + +while 1: + line = sys.stdin.readline() + if not line: break + stripped = line.strip() + if not stripped: continue + if stripped[0] == "#": continue + if stripped.startswith('"""'): + eattriple(stripped) + continue + sys.stdout.write(line) + diff --git a/rdiff-backup/rdiff-backup.1 b/rdiff-backup/rdiff-backup.1 new file mode 100644 index 0000000..1c4a589 --- /dev/null +++ b/rdiff-backup/rdiff-backup.1 @@ -0,0 +1,283 @@ +.TH RDIFF-BACKUP 1 "AUGUST 2001" "Version 0.2.1" "User Manuals" \" -*- nroff -*- +.SH NAME +rdiff-backup \- rsync-like incremental backup utility +.SH SYNOPSIS +.B rdiff-backup +.BI [ options ] +.BI [[[ user@ ] host1.foo ]:: source_directory ] +.BI [[[ user@ ] host2.foo ]:: destination_directory ] + + +.SH DESCRIPTION +.B rdiff-backup +is a script, written in +.BR python (1) , +that uses the +.BR rdiff (1) +program to back up one directory to another. The target directory +ends up a copy of the source directory, but extra reverse diffs are +stored in the target directory, so you can still recover files lost +some time ago. The idea is to combine the best features of a mirror +and an incremental backup. rdiff-backup also preserves symlinks, +special files, permissions, uid/gid ownership (if it is running as +root), and modification times. + +.B rdiff-backup +can also operate +in a bandwidth efficient manner over a pipe, like +.BR rsync (1). +Thus you can use ssh and rdiff-backup to securely back a hard drive up +to a remote location, and only the differences will be transmitted. +Using the default settings, rdiff-backup requires that the remote +system accept ssh connections, and that +.B rdiff-backup +is installed in the user's PATH on the remote system. For information +on other options, see the section on +.B REMOTE OPERATION. + + +.SH OPTIONS +.TP +.B -b, --backup-mode +Force backup mode even if first argument appears to be an increment file. +.TP +.B --change-source-perms +If this option is set, rdiff-backup will try to change the mode of any +unreadable files or unreadable/unexecutable directories in the source +directory so it can back them up. It will then restore their original +permissions and mtimes afterwards. +.TP +.BI "--checkpoint-interval " seconds +This option controls every how many seconds rdiff-backup checkpoints +its current status. The default is 20. +.TP +.BI "--exclude " regexp +Exclude files matching regexp. This argument can be used multiple times. +.TP +.B "--exclude-device-files" +Exclude all device files. This can be useful for security/permissions +reasons or if rdiff-backup is not handling device files correctly. +.TP +.BI "--exclude-mirror " regexp +Exclude files in the mirror area matching regexp. This argument can +be used multiple times. The rdiff-backup-data directory is +automatically excluded. +.TP +.B --force +Authorize overwriting of a destination directory. rdiff-backup will +generally tell you if it needs this. +.TP +.B "-l, --list-increments" +List the number and date of partial incremental backups contained in +the specified destination directory. +.TP +.B "-m, --mirror-only" +Do not create an rdiff-backup-data directory or make any increments. +In this mode rdiff-backup is similar to rsync (but usually +slower). +.TP +.B --no-resume +Do not resume last aborted backup even if it falls within the resume +window. +.TP +.BI "--remote-cmd " command +This command has been depreciated as of version 0.4.1. Use +--remote-schema instead. +.TP +.BI "--remote-schema " schema +Specify an alternate method of connecting to a remote computer. This +is necessary to get rdiff-backup not to use ssh for remote backups, or +if, for instance, rdiff-backup is not in the PATH on the remote side. +See the +.B REMOTE OPERATION +section for more information. +.TP +.BI "--remove-older-than " time_interval +Remove the incremental backups in the destination directory that have +been around longer than time_interval. The time interval is an +integer followed by the character s, m, h, D, M, or Y, indicating +seconds, minutes, hours, days, months, or years respectively. Thus +32m means 32 minutes, while 1M means one month (30 days). +.TP +.B --resume +Resume the last aborted backup. If no aborted backups are found, exit +with error. +.TP +.BI "--resume-window " seconds +Resume the last aborted backup if it started less than the specified +number of seconds ago. Otherwise start a new backup. The default is +7200 (2 hours). +.TP +.B --server +Enter server mode (not to be invoked directly, but instead used by +another rdiff-backup process on a remote computer). +.TP +.BI "--terminal-verbosity " [0-9] +Select which messages will be displayed to the terminal. If missing +the level defaults to the verbosity level. +.TP +.B --test-server +Test for the presence of a compatible rdiff-backup server as specified +in the following host::filename argument(s). The filename section +will be ignored. +.TP +.BI -v [0-9] ", --verbosity " [0-9] +Specify verbosity level (0 is totally silent, 3 is the default, and 9 +is noisiest). This determines how much is written to the log file. +.TP +.B "-V, --version" +Print the current version and exit +.TP +.B --windows-time-format +If this option is present, use underscores instead of colons in +increment files, so 2001-07-15T04:09:38-07:00 becomes +2001-07-15T04_09_38-07_00. This option may be useful under MS windows +NT, which prohibits colons in filenames. + +.SH EXAMPLES +Simplest case---backup directory foo to directory bar, with increments +in bar/rdiff-backup-data: +.PP +.RS +rdiff-backup foo bar +.PP +.RE +This is exactly the same as previous example because trailing slashes +are ignored: +.PP +.RS +rdiff-backup foo/ bar/ +.PP +.RE +Back files up from /home/bob to /mnt/backup, leaving increments in /mnt/backup/rdiff-backup-data. Do not back up directory /home/bob/tmp or any files in it. +.PP +.RS +rdiff-backup --exclude /home/bob/tmp /home/bob /mnt/backup +.PP +.RE +You can also use regular expressions in the --exclude statements. +This will skip any files whose full pathnames contain the word +"cache", or any files whose name is "tmp" or "temp". +.PP +.RS +rdiff-backup --exclude '.*cache.*' --exclude '.*/te?mp' /home/bob /mnt/backup +.PP +.RE +After the previous command was completed, this command will list the +backups present on the destination disk: +.PP +.RS +rdiff-backup --list-increments /mnt/backup +.PP +.RE +If space is running out on the /mnt/backup directory, older +incremental backups can be erased. The following command erases +backup information older than a week: +.PP +.RS +rdiff-backup --remove-older-than 7D /mnt/backup +.PP +.RE +The following reads the file +important-data.2001-07-15T04:09:38-07:00.dir and restores the +resulting directory important-data as it was on Februrary 14, 2001, +calling the new directory "temp". Note that rdiff-backup goes into +restore mode because it recognizes the suffix of the file. The -v9 +means keep lots of logging information. +.PP +.RS +rdiff-backup -v9 important-data.2001-07-15T04:09:38-07:00.dir temp +.PP +.RE +This command causes rdiff-backup to backup the directory +/some/local-dir to the directory /whatever/remote-dir on the machine +hostname.net. It uses ssh to open the necessary pipe to the remote +copy of rdiff-backup. Here the username on the local machine and on +hostname.net are the same. +.PP +.RS +rdiff-backup /some/local-dir hostname.net::/whatever/remote-dir +.PP +.RE +This command logs into hostname.net as smith and restores the remote +increment old-file on a remote computer to the current directory on +the local computer: +.PP +.RS +rdiff-backup smith@hostname.net::/foo/rdiff-backup-data/increments/bar/old-file.2001-11-09T12:43:53-04:00.diff +.PP +.RE +Backup foo on one remote machine to bar on another. This will +probably be slower than running rdiff-backup from either machine. +.PP +.RS +rdiff-backup smith@host1::foo jones@host2::bar +.PP +.RE +Test to see if the specified ssh command really opens up a working +rdiff-backup server on the remote side. +.RS +rdiff-backup --test-server hostname.net::/this/is/ignored + +.SH REMOTE OPERATION +In order to access remote files, rdiff-backup opens up a pipe to a +copy of rdiff-backup running on the remote machine. Thus rdiff-backup +must be installed on both ends. To open this pipe, rdiff-backup first +splits the filename into host_info::pathname. It then substitutes +host_info into the remote schema, and runs the resulting command, +reading its input and output. +.PP +The default remote schema is 'ssh %s rdiff-backup --server' meaning if +the host_info is user@host.net, then rdiff-backup runs 'ssh +user@host.net rdiff-backup --server'. The '%s' keyword is substituted +with the host_info. Using --remote-schema, rdiff-backup can invoke an +arbitrary command in order to open up a remote pipe. For instance, +.RS +rdiff-backup --remote-schema 'cd /usr; %s' foo 'rdiff-backup +--server'::bar +.RE +is basically equivalent to (but slower than) +.RS +rdiff-backup foo /usr/bar +.RE +.PP +Concerning quoting, if for some reason you need to put two consecutive +colons in the host_info section of a host_info::pathname argument, or +in the pathname of a local file, you can quote one of them by +prepending a backslash. So in 'a\\::b::c', host_info is 'a::b' and +the pathname is 'c'. Similarly, if you want to refer to a local file +whose filename contains two consecutive colons, like 'strange::file', +you'll have to quote one of the colons as in 'strange\\::file'. +Because the backslash is a quote character in these circumstances, it +too must be quoted to get a literal backslash, so 'foo\\::\\\\bar' +evaluates to 'foo::\\bar'. To make things more complicated, because +the backslash is also a common shell quoting character, you may need +to type in '\\\\\\\\' at the shell prompt to get a literal backslash +(if it makes you feel better, I had to type in 8 backslashes to get +that in this man page...). And finally, to include a literal % in the +string specified by --remote-schema, quote it with another %, as in +%%. + +.SH BUGS +Hard linking is not preserved. + +rdiff-backup uses the shell command +.BR mknod (1) +to backup device files (e.g. /dev/ttyS0), so device files won't be +handled correctly on systems with non-standard mknod syntax. + +.SH AUTHOR +Ben Escoto +.PP +Feel free to ask me questions or send me bug reports, but also check +out the mailing list mentioned below. + +.SH SEE ALSO +.BR python (1), +.BR rdiff (1), +.BR rsync (1), +.BR ssh (1). +The main rdiff-backup web page is at +.IR http://www.stanford.edu/~bescoto/rdiff-backup . +There also a mailing list described at +.IR http://keywest.Stanford.EDU/mailman/listinfo/rdiff-backup . diff --git a/rdiff-backup/rdiff_backup/connection.py b/rdiff-backup/rdiff_backup/connection.py new file mode 100644 index 0000000..83fc874 --- /dev/null +++ b/rdiff-backup/rdiff_backup/connection.py @@ -0,0 +1,467 @@ +execfile("rdiff.py") +import types, os, tempfile, cPickle, shutil, traceback + +####################################################################### +# +# connection - Code that deals with remote execution +# + +class ConnectionError(Exception): + pass + +class ConnectionQuit(Exception): + pass + + +class Connection: + """Connection class - represent remote execution + + The idea is that, if c is an instance of this class, c.foo will + return the object on the remote side. For functions, c.foo will + return a function that, when called, executes foo on the remote + side, sending over the arguments and sending back the result. + + """ + def __repr__(self): return self.__str__() + + +class LocalConnection(Connection): + """Local connection + + This is a dummy connection class, so that LC.foo just evaluates to + foo using global scope. + + """ + def __init__(self): + """This prevents two instances of LocalConnection""" + assert not Globals.local_connection + self.conn_number = 0 # changed by SetConnections for server + + def __getattr__(self, name): + try: return globals()[name] + except KeyError: + builtins = globals()["__builtins__"] + try: + if type(builtins) is types.ModuleType: + return builtins.__dict__[name] + else: return builtins[name] + except KeyError: raise NameError, name + + def __setattr__(self, name, value): + globals()[name] = value + + def __delattr__(self, name): + del globals()[name] + + def __str__(self): return "LocalConnection" + + def reval(self, function_string, *args): + return apply(eval(function_string), args) + + def quit(self): pass + +Globals.local_connection = LocalConnection() +Globals.connections.append(Globals.local_connection) +# Following changed by server in SetConnections +Globals.connection_dict[0] = Globals.local_connection + + +class ConnectionRequest: + """Simple wrapper around a PipeConnection request""" + def __init__(self, function_string, num_args): + self.function_string = function_string + self.num_args = num_args + + def __str__(self): + return "ConnectionRequest: %s with %d arguments" % \ + (self.function_string, self.num_args) + + +class LowLevelPipeConnection(Connection): + """Routines for just sending objects from one side of pipe to another + + Each thing sent down the pipe is paired with a request number, + currently limited to be between 0 and 255. The size of each thing + should be less than 2^56. + + Each thing also has a type, indicated by one of the following + characters: + + o - generic object + i - iterator/generator of RORPs + f - file object + b - string + q - quit signal + t - TempFile + R - RPath + r - RORPath only + c - PipeConnection object + + """ + def __init__(self, inpipe, outpipe): + """inpipe is a file-type open for reading, outpipe for writing""" + self.inpipe = inpipe + self.outpipe = outpipe + + def __str__(self): + """Return string version + + This is actually an important function, because otherwise + requests to represent this object would result in "__str__" + being executed on the other side of the connection. + + """ + return "LowLevelPipeConnection" + + def _put(self, obj, req_num): + """Put an object into the pipe (will send raw if string)""" + Log.conn("sending", obj, req_num) + if type(obj) is types.StringType: self._putbuf(obj, req_num) + elif isinstance(obj, Connection): self._putconn(obj, req_num) + elif isinstance(obj, TempFile): self._puttempfile(obj, req_num) + elif isinstance(obj, RPath): self._putrpath(obj, req_num) + elif isinstance(obj, RORPath): self._putrorpath(obj, req_num) + elif ((hasattr(obj, "read") or hasattr(obj, "write")) + and hasattr(obj, "close")): self._putfile(obj, req_num) + elif hasattr(obj, "next"): self._putiter(obj, req_num) + else: self._putobj(obj, req_num) + + def _putobj(self, obj, req_num): + """Send a generic python obj down the outpipe""" + self._write("o", cPickle.dumps(obj, 1), req_num) + + def _putbuf(self, buf, req_num): + """Send buffer buf down the outpipe""" + self._write("b", buf, req_num) + + def _putfile(self, fp, req_num): + """Send a file to the client using virtual files""" + self._write("f", str(VirtualFile.new(fp)), req_num) + + def _putiter(self, iterator, req_num): + """Put an iterator through the pipe""" + self._write("i", str(VirtualFile.new(RORPIter.ToFile(iterator))), + req_num) + + def _puttempfile(self, tempfile, req_num): + """Put a tempfile into pipe. See _putrpath""" + tf_repr = (tempfile.conn.conn_number, tempfile.base, + tempfile.index, tempfile.data) + self._write("t", cPickle.dumps(tf_repr, 1), req_num) + + def _putrpath(self, rpath, req_num): + """Put an rpath into the pipe + + The rpath's connection will be encoded as its conn_number. It + and the other information is put in a tuple. + + """ + rpath_repr = (rpath.conn.conn_number, rpath.base, + rpath.index, rpath.data) + self._write("R", cPickle.dumps(rpath_repr, 1), req_num) + + def _putrorpath(self, rorpath, req_num): + """Put an rorpath into the pipe + + This is only necessary because if there is a .file attached, + it must be excluded from the pickling + + """ + rorpath_repr = (rorpath.index, rorpath.data) + self._write("r", cPickle.dumps(rorpath_repr, 1), req_num) + + def _putconn(self, pipeconn, req_num): + """Put a connection into the pipe + + A pipe connection is represented just as the integer (in + string form) of its connection number it is *connected to*. + + """ + self._write("c", str(pipeconn.conn_number), req_num) + + def _putquit(self): + """Send a string that takes down server""" + self._write("q", "", 255) + + def _write(self, headerchar, data, req_num): + """Write header and then data to the pipe""" + self.outpipe.write(headerchar + chr(req_num) + self._l2s(len(data))) + self.outpipe.write(data) + self.outpipe.flush() + + def _read(self, length): + """Read length bytes from inpipe, returning result""" + return self.inpipe.read(length) + + def _s2l(self, s): + """Convert string to long int""" + assert len(s) == 7 + l = 0L + for i in range(7): l = l*256 + ord(s[i]) + return l + + def _l2s(self, l): + """Convert long int to string""" + s = "" + for i in range(7): + l, remainder = divmod(l, 256) + s = chr(remainder) + s + assert remainder == 0 + return s + + def _get(self): + """Read an object from the pipe and return (req_num, value)""" + header_string = self.inpipe.read(9) + assert len(header_string) == 9, \ + "Error reading from pipe (problem probably originated remotely)" + try: + format_string, req_num, length = (header_string[0], + ord(header_string[1]), + self._s2l(header_string[2:])) + except IndexError: raise ConnectionError() + if format_string == "o": result = cPickle.loads(self._read(length)) + elif format_string == "b": result = self._read(length) + elif format_string == "f": + result = VirtualFile(self, int(self._read(length))) + elif format_string == "i": + result = RORPIter.FromFile(BufferedRead( + VirtualFile(self, int(self._read(length))))) + elif format_string == "t": + result = self._gettempfile(self._read(length)) + elif format_string == "r": + result = self._getrorpath(self._read(length)) + elif format_string == "R": result = self._getrpath(self._read(length)) + elif format_string == "c": + result = Globals.connection_dict[int(self._read(length))] + else: + assert format_string == "q", header_string + raise ConnectionQuit("Received quit signal") + Log.conn("received", result, req_num) + return (req_num, result) + + def _getrorpath(self, raw_rorpath_buf): + """Reconstruct RORPath object from raw data""" + index, data = cPickle.loads(raw_rorpath_buf) + return RORPath(index, data) + + def _gettempfile(self, raw_tf_buf): + """Return TempFile object indicated by raw_tf_buf""" + conn_number, base, index, data = cPickle.loads(raw_tf_buf) + return TempFile(Globals.connection_dict[conn_number], + base, index, data) + + def _getrpath(self, raw_rpath_buf): + """Return RPath object indicated by raw_rpath_buf""" + conn_number, base, index, data = cPickle.loads(raw_rpath_buf) + return RPath(Globals.connection_dict[conn_number], base, index, data) + + def _close(self): + """Close the pipes associated with the connection""" + self.outpipe.close() + self.inpipe.close() + + +class PipeConnection(LowLevelPipeConnection): + """Provide server and client functions for a Pipe Connection + + Both sides act as modules that allows for remote execution. For + instance, self.conn.pow(2,8) will execute the operation on the + server side. + + The only difference between the client and server is that the + client makes the first request, and the server listens first. + + """ + def __init__(self, inpipe, outpipe, conn_number = 0): + """Init PipeConnection + + conn_number should be a unique (to the session) integer to + identify the connection. For instance, all connections to the + client have conn_number 0. Other connections can use this + number to route commands to the correct process. + + """ + LowLevelPipeConnection.__init__(self, inpipe, outpipe) + self.conn_number = conn_number + self.unused_request_numbers = {} + for i in range(256): self.unused_request_numbers[i] = None + + def __str__(self): return "PipeConnection %d" % self.conn_number + + def get_response(self, desired_req_num): + """Read from pipe, responding to requests until req_num. + + Sometimes after a request is sent, the other side will make + another request before responding to the original one. In + that case, respond to the request. But return once the right + response is given. + + """ + while 1: + try: req_num, object = self._get() + except ConnectionQuit: + self._put("quitting", self.get_new_req_num()) + return + if req_num == desired_req_num: return object + else: + assert isinstance(object, ConnectionRequest) + self.answer_request(object, req_num) + + def answer_request(self, request, req_num): + """Put the object requested by request down the pipe""" + del self.unused_request_numbers[req_num] + argument_list = [] + for i in range(request.num_args): + arg_req_num, arg = self._get() + assert arg_req_num == req_num + argument_list.append(arg) + try: result = apply(eval(request.function_string), argument_list) + except: result = self.extract_exception() + self._put(result, req_num) + self.unused_request_numbers[req_num] = None + + def extract_exception(self): + """Return active exception""" + Log("Sending back exception: \n" + + "".join(traceback.format_tb(sys.exc_info()[2])), 2) + return sys.exc_info()[1] + + def Server(self): + """Start server's read eval return loop""" + Globals.server = 1 + Globals.connections.append(self) + Log("Starting server", 6) + self.get_response(-1) + + def reval(self, function_string, *args): + """Execute command on remote side + + The first argument should be a string that evaluates to a + function, like "pow", and the remaining are arguments to that + function. + + """ + req_num = self.get_new_req_num() + self._put(ConnectionRequest(function_string, len(args)), req_num) + for arg in args: self._put(arg, req_num) + result = self.get_response(req_num) + self.unused_request_numbers[req_num] = None + if isinstance(result, Exception): raise result + else: return result + + def get_new_req_num(self): + """Allot a new request number and return it""" + if not self.unused_request_numbers: + raise ConnectionError("Exhaused possible connection numbers") + req_num = self.unused_request_numbers.keys()[0] + del self.unused_request_numbers[req_num] + return req_num + + def quit(self): + """Close the associated pipes and tell server side to quit""" + assert not Globals.server + self._putquit() + self._get() + self._close() + + def __getattr__(self, name): + """Intercept attributes to allow for . invocation""" + return EmulateCallable(self, name) + + +class RedirectedConnection(Connection): + """Represent a connection more than one move away + + For instance, suppose things are connected like this: S1---C---S2. + If Server1 wants something done by Server2, it will have to go + through the Client. So on S1's side, S2 will be represented by a + RedirectedConnection. + + """ + def __init__(self, conn_number, routing_number = 0): + """RedirectedConnection initializer + + Returns a RedirectedConnection object for the given + conn_number, where commands are routed through the connection + with the given routing_number. 0 is the client, so the + default shouldn't have to be changed. + + """ + self.conn_number = conn_number + self.routing_number = routing_number + self.routing_conn = Globals.connection_dict[routing_number] + + def __str__(self): + return "RedirectedConnection %d,%d" % (self.conn_number, + self.routing_number) + + def __getattr__(self, name): + return EmulateCallable(self.routing_conn, + "Globals.get_dict_val('connection_dict', %d).%s" + % (self.conn_number, name)) + + +class EmulateCallable: + """This is used by PipeConnection in calls like conn.os.chmod(foo)""" + def __init__(self, connection, name): + self.connection = connection + self.name = name + def __call__(self, *args): + return apply(self.connection.reval, (self.name,) + args) + def __getattr__(self, attr_name): + return EmulateCallable(self.connection, + "%s.%s" % (self.name, attr_name)) + + +class VirtualFile: + """When the client asks for a file over the connection, it gets this + + The returned instance then forwards requests over the connection. + The class's dictionary is used by the server to associate each + with a unique file number. + + """ + #### The following are used by the server + vfiles = {} + counter = 0 + + def getbyid(cls, id): + return cls.vfiles[id] + getbyid = classmethod(getbyid) + + def readfromid(cls, id, length): + return cls.vfiles[id].read(length) + readfromid = classmethod(readfromid) + + def writetoid(cls, id, buffer): + return cls.vfiles[id].write(buffer) + writetoid = classmethod(writetoid) + + def closebyid(cls, id): + fp = cls.vfiles[id] + del cls.vfiles[id] + return fp.close() + closebyid = classmethod(closebyid) + + def new(cls, fileobj): + """Associate a new VirtualFile with a read fileobject, return id""" + count = cls.counter + cls.vfiles[count] = fileobj + cls.counter = count + 1 + return count + new = classmethod(new) + + + #### And these are used by the client + def __init__(self, connection, id): + self.connection = connection + self.id = id + + def read(self, length = -1): + return self.connection.VirtualFile.readfromid(self.id, length) + + def write(self, buf): + return self.connection.VirtualFile.writetoid(self.id, buf) + + def close(self): + return self.connection.VirtualFile.closebyid(self.id) diff --git a/rdiff-backup/rdiff_backup/destructive_stepping.py b/rdiff-backup/rdiff_backup/destructive_stepping.py new file mode 100644 index 0000000..80d274e --- /dev/null +++ b/rdiff-backup/rdiff_backup/destructive_stepping.py @@ -0,0 +1,250 @@ +from __future__ import generators +execfile("rorpiter.py") + +####################################################################### +# +# destructive-stepping - Deal with side effects from traversing trees +# + +class DSRPath(RPath): + """Destructive Stepping RPath + + Sometimes when we traverse the directory tree, even when we just + want to read files, we have to change things, like the permissions + of a file or directory in order to read it, or the file's access + times. This class is like an RPath, but the permission and time + modifications are delayed, so that they can be done at the very + end when they won't be disturbed later. + + """ + def __init__(self, *args): + self.perms_delayed = self.times_delayed = None + RPath.__init__(self, *args) + + def __getstate__(self): + """Return picklable state. See RPath __getstate__.""" + assert self.conn is Globals.local_connection # Can't pickle a conn + pickle_dict = {} + for attrib in ['index', 'data', 'perms_delayed', 'times_delayed', + 'newperms', 'newtimes', 'path', 'base']: + if self.__dict__.has_key(attrib): + pickle_dict[attrib] = self.__dict__[attrib] + return pickle_dict + + def __setstate__(self, pickle_dict): + """Set state from object produced by getstate""" + self.conn = Globals.local_connection + for attrib in pickle_dict.keys(): + self.__dict__[attrib] = pickle_dict[attrib] + + def delay_perm_writes(self): + """Signal that permission writing should be delayed until the end""" + self.perms_delayed = 1 + self.newperms = None + + def delay_time_changes(self): + """Signal that time changes should also be delayed until the end""" + self.times_delayed = 1 + self.newtimes = None + + def chmod(self, permissions): + """Change permissions, delaying if self.perms_delayed is set""" + if self.perms_delayed: + self.newperms = 1 + self.data['perms'] = permissions + else: RPath.chmod(self, permissions) + + def chmod_bypass(self, permissions): + """Change permissions without updating the data dictionary""" + self.conn.os.chmod(self.path, permissions) + self.perms_delayed = self.newperms = 1 + + def remember_times(self): + """Mark times as changed so they can be restored later""" + self.times_delayed = self.newtimes = 1 + + def settime(self, accesstime, modtime): + """Change times, delaying if self.times_delayed is set""" + if self.times_delayed: + self.newtimes = 1 + self.data['atime'] = accesstime + self.data['mtime'] = modtime + else: RPath.settime(self, accesstime, modtime) + + def settime_bypass(self, accesstime, modtime): + """Change times without updating data dictionary""" + self.conn.os.utime(self.path, (accesstime, modtime)) + + def setmtime(self, modtime): + """Change mtime, delaying if self.times_delayed is set""" + if self.times_delayed: + self.newtimes = 1 + self.data['mtime'] = modtime + else: RPath.setmtime(self, modtime) + + def setmtime_bypass(self, modtime): + """Change mtime without updating data dictionary""" + self.conn.os.utime(self.path, (time.time(), modtime)) + + def restoretimes(self): + """Write times in self.data back to file""" + RPath.settime(self, self.data['atime'], self.data['mtime']) + + def restoreperms(self): + """Write permissions in self.data back to file""" + RPath.chmod(self, self.data['perms']) + + def write_changes(self): + """Write saved up permission/time changes""" + if not self.lstat(): return # File has been deleted in meantime + + if self.perms_delayed and self.newperms: + self.conn.os.chmod(self.path, self.getperms()) + if self.times_delayed: + if self.data.has_key('atime'): + self.settime_bypass(self.getatime(), self.getmtime()) + elif self.newtimes and self.data.has_key('mtime'): + self.setmtime_bypass(self.getmtime()) + + +class DestructiveStepping: + """Destructive stepping""" + def initialize(dsrpath, source): + """Change permissions of dsrpath, possibly delay writes + + Abort if we need to access something and can't. If the file + is on the source partition, just log warning and return true. + Return false if everything good to go. + + """ + if not source or Globals.change_source_perms: + dsrpath.delay_perm_writes() + + def warn(err): + Log("Received error '%s' when dealing with file %s, skipping..." + % (err, dsrpath.path), 1) + + def abort(): + Log.FatalError("Missing access to file %s - aborting." % + dsrpath.path) + + def try_chmod(perms): + """Try to change the perms. If fail, return error.""" + try: dsrpath.chmod_bypass(perms) + except os.error, err: return err + return None + + if dsrpath.isreg() and not dsrpath.readable(): + if source: + if Globals.change_source_perms and dsrpath.isowner(): + err = try_chmod(0400) + if err: + warn(err) + return 1 + else: + warn("No read permissions") + return 1 + elif not Globals.change_mirror_perms or try_chmod(0600): abort() + elif dsrpath.isdir(): + if source and (not dsrpath.readable() or not dsrpath.executable()): + if Globals.change_source_perms and dsrpath.isowner(): + err = try_chmod(0500) + if err: + warn(err) + return 1 + else: + warn("No read or exec permissions") + return 1 + elif not source and not dsrpath.hasfullperms(): + if Globals.change_mirror_perms: try_chmod(0700) + + # Permissions above; now try to preserve access times if necessary + if (source and (Globals.preserve_atime or + Globals.change_source_perms) or + not source): + # These are the circumstances under which we will have to + # touch up a file's times after we are done with it + dsrpath.remember_times() + return None + + def Finalizer(initial_state = None): + """Return a finalizer that can work on an iterator of dsrpaths + + The reason we have to use an IterTreeReducer is that some files + should be updated immediately, but for directories we sometimes + need to update all the files in the directory before finally + coming back to it. + + """ + return IterTreeReducer(lambda x: None, lambda x,y: None, None, + lambda dsrpath, x, y: dsrpath.write_changes(), + initial_state) + + def isexcluded(dsrp, source): + """Return true if given DSRPath is excluded/ignored + + If source = 1, treat as source file, otherwise treat as + destination file. + + """ + if Globals.exclude_device_files and dsrp.isdev(): return 1 + + if source: exclude_regexps = Globals.exclude_regexps + else: exclude_regexps = Globals.exclude_mirror_regexps + + for regexp in exclude_regexps: + if regexp.match(dsrp.path): + Log("Excluding %s" % dsrp.path, 6) + return 1 + return None + + def Iterate_from(baserp, source, starting_index = None): + """Iterate dsrps from baserp, skipping any matching exclude_regexps + + includes only dsrps with indicies greater than starting_index + if starting_index is not None. + + """ + def helper_starting_from(dsrpath): + """Like helper, but only start iterating after starting_index""" + if dsrpath.index > starting_index: + # Past starting_index, revert to normal helper + for dsrp in helper(dsrpath): yield dsrp + elif dsrpath.index == starting_index[:len(dsrpath.index)]: + # May encounter starting index on this branch + if (not DestructiveStepping.isexcluded(dsrpath, source) and + not DestructiveStepping.initialize(dsrpath, source)): + if dsrpath.isdir(): + dir_listing = dsrpath.listdir() + dir_listing.sort() + for filename in dir_listing: + for dsrp in helper_starting_from( + dsrpath.append(filename)): + yield dsrp + + def helper(dsrpath): + if (not DestructiveStepping.isexcluded(dsrpath, source) and + not DestructiveStepping.initialize(dsrpath, source)): + yield dsrpath + if dsrpath.isdir(): + dir_listing = dsrpath.listdir() + dir_listing.sort() + for filename in dir_listing: + for dsrp in helper(dsrpath.append(filename)): + yield dsrp + + base_dsrpath = DSRPath(baserp.conn, baserp.base, + baserp.index, baserp.data) + if starting_index is None: return helper(base_dsrpath) + else: return helper_starting_from(base_dsrpath) + + def Iterate_with_Finalizer(baserp, source): + """Like Iterate_from, but finalize each dsrp afterwards""" + finalize = DestructiveStepping.Finalizer() + for dsrp in DestructiveStepping.Iterate_from(baserp, source): + yield dsrp + finalize(dsrp) + finalize.getresult() + + +MakeStatic(DestructiveStepping) diff --git a/rdiff-backup/rdiff_backup/filelist.py b/rdiff-backup/rdiff_backup/filelist.py new file mode 100644 index 0000000..7a660c3 --- /dev/null +++ b/rdiff-backup/rdiff_backup/filelist.py @@ -0,0 +1,106 @@ +from __future__ import generators +execfile("manage.py") + +####################################################################### +# +# filelist - Some routines that help with operations over files listed +# in standard input instead of over whole directories. +# + +class FilelistError(Exception): pass + +class Filelist: + """Many of these methods have analogs in highlevel.py""" + def File2Iter(fp, baserp): + """Convert file obj with one pathname per line into rpiter + + Closes fp when done. Given files are added to baserp. + + """ + while 1: + line = fp.readline() + if not line: break + if line[-1] == "\n": line = line[:-1] # strip trailing newline + if not line: continue # skip blank lines + elif line[0] == "/": raise FilelistError( + "Read in absolute file name %s." % line) + yield baserp.append(line) + assert not fp.close(), "Error closing filelist fp" + + def Mirror(src_rpath, dest_rpath, rpiter): + """Copy files in fileiter from src_rpath to dest_rpath""" + sigiter = dest_rpath.conn.Filelist.get_sigs(dest_rpath, rpiter) + diffiter = Filelist.get_diffs(src_rpath, sigiter) + dest_rpath.conn.Filelist.patch(dest_rpath, diffiter) + dest_rpath.setdata() + + def Mirror_and_increment(src_rpath, dest_rpath, inc_rpath): + """Mirror + put increment in tree based at inc_rpath""" + sigiter = dest_rpath.conn.Filelist.get_sigs(dest_rpath, rpiter) + diffiter = Filelist.get_diffs(src_rpath, sigiter) + dest_rpath.conn.Filelist.patch_and_increment(dest_rpath, diffiter, + inc_rpath) + dest_rpath.setdata() + + def get_sigs(dest_rpbase, rpiter): + """Get signatures of file analogs in rpiter + + This is meant to be run on the destination side. Only the + extention part of the rps in rpiter will be used; the base is + ignored. + + """ + def dest_iter(src_iter): + for src_rp in src_iter: yield dest_rpbase.new_index(src_rp.index) + return RORPIter.Signatures(dest_iter()) + + def get_diffs(src_rpbase, sigiter): + """Get diffs based on sigiter and files in src_rpbase + + This should be run on the local side. + + """ + for sig_rorp in sigiter: + new_rp = src_rpbase.new_index(sig_rorp.index) + yield RORPIter.diffonce(sig_rorp, new_rp) + + def patch(dest_rpbase, diffiter): + """Process diffs in diffiter and update files in dest_rbpase. + + Run remotely. + + """ + for diff_rorp in diffiter: + basisrp = dest_rpbase.new_index(diff_rorp.index) + if basisrp.lstat(): Filelist.make_subdirs(basisrp) + Log("Processing %s" % basisrp.path, 7) + RORPIter.patchonce(dest_rpbase, basisrp, diff_rorp) + + def patch_and_increment(dest_rpbase, diffiter, inc_rpbase): + """Apply diffs in diffiter to dest_rpbase, and increment to inc_rpbase + + Also to be run remotely. + + """ + for diff_rorp in diffiter: + basisrp = dest_rpbase.new_index(diff_rorp.index) + if diff_rorp.lstat(): Filelist.make_subdirs(basisrp) + Log("Processing %s" % basisrp.path, 7) + # XXX This isn't done yet... + + def make_subdirs(rpath): + """Make sure that all the directories under the rpath exist + + This function doesn't try to get the permissions right on the + underlying directories, just do the minimum to make sure the + file can be created. + + """ + dirname = rpath.dirsplit()[0] + if dirname == '.' or dirname == '': return + dir_rp = RPath(rpath.conn, dirname) + Filelist.make_subdirs(dir_rp) + if not dir_rp.lstat(): dir_rp.mkdir() + + +MakeStatic(Filelist) diff --git a/rdiff-backup/rdiff_backup/header.py b/rdiff-backup/rdiff_backup/header.py new file mode 100644 index 0000000..31b3ff0 --- /dev/null +++ b/rdiff-backup/rdiff_backup/header.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python +# +# rdiff-backup -- Mirror files while keeping incremental changes +# Version 0.6.0 released March 14, 2002 +# Copyright (C) 2001 Ben Escoto +# +# This program is licensed under the GNU General Public License (GPL). +# Distributions of rdiff-backup usually include a copy of the GPL in a +# file called COPYING. The GPL is also available online at +# http://www.gnu.org/copyleft/gpl.html. +# +# Please send mail to me or the mailing list if you find bugs or have +# any suggestions. + +from __future__ import nested_scopes, generators +import os, stat, time, sys, getopt, re, cPickle, types, shutil, sha, marshal, traceback, popen2, tempfile + + diff --git a/rdiff-backup/rdiff_backup/highlevel.py b/rdiff-backup/rdiff_backup/highlevel.py new file mode 100644 index 0000000..55fe007 --- /dev/null +++ b/rdiff-backup/rdiff_backup/highlevel.py @@ -0,0 +1,288 @@ +from __future__ import generators +execfile("filelist.py") + +####################################################################### +# +# highlevel - High level functions for mirroring, mirror & inc, etc. +# + +class SkipFileException(Exception): + """Signal that the current file should be skipped but then continue + + This exception will often be raised when there is problem reading + an individual file, but it makes sense for the rest of the backup + to keep going. + + """ + pass + + +class HighLevel: + """High level static functions + + The design of some of these functions is represented on the + accompanying diagram. + + """ + def Mirror(src_rpath, dest_rpath, checkpoint = 1, session_info = None): + """Turn dest_rpath into a copy of src_rpath + + Checkpoint true means to checkpoint periodically, otherwise + not. If session_info is given, try to resume Mirroring from + that point. + + """ + SourceS = src_rpath.conn.HLSourceStruct + DestS = dest_rpath.conn.HLDestinationStruct + + SourceS.set_session_info(session_info) + DestS.set_session_info(session_info) + src_init_dsiter = SourceS.split_initial_dsiter(src_rpath) + dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter) + diffiter = SourceS.get_diffs_and_finalize(dest_sigiter) + DestS.patch_and_finalize(dest_rpath, diffiter, checkpoint) + + dest_rpath.setdata() + + def Mirror_and_increment(src_rpath, dest_rpath, inc_rpath, + session_info = None): + """Mirror + put increments in tree based at inc_rpath""" + SourceS = src_rpath.conn.HLSourceStruct + DestS = dest_rpath.conn.HLDestinationStruct + + SourceS.set_session_info(session_info) + DestS.set_session_info(session_info) + if not session_info: dest_rpath.conn.SaveState.touch_last_file() + src_init_dsiter = SourceS.split_initial_dsiter(src_rpath) + dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter) + diffiter = SourceS.get_diffs_and_finalize(dest_sigiter) + DestS.patch_increment_and_finalize(dest_rpath, diffiter, inc_rpath) + + dest_rpath.setdata() + inc_rpath.setdata() + + def Restore(rest_time, mirror_base, baseinc_tup, target_base): + """Like Restore.RestoreRecursive but check arguments""" + if not isinstance(target_base, DSRPath): + target_base = DSRPath(target_base.conn, target_base.base, + target_base.index, target_base.data) + Restore.RestoreRecursive(rest_time, mirror_base, + baseinc_tup, target_base) + +MakeStatic(HighLevel) + + +class HLSourceStruct: + """Hold info used by HL on the source side""" + _session_info = None # set to si if resuming + def set_session_info(cls, session_info): + cls._session_info = session_info + + def iterate_from(cls, rpath): + """Supply more aruments to DestructiveStepping.Iterate_from""" + if cls._session_info: + return DestructiveStepping.Iterate_from(rpath, 1, + cls._session_info.last_index) + else: return DestructiveStepping.Iterate_from(rpath, 1) + + def split_initial_dsiter(cls, rpath): + """Set iterators of all dsrps from rpath, returning one""" + dsiter = cls.iterate_from(rpath) + initial_dsiter1, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2) + return initial_dsiter1 + + def get_diffs_and_finalize(cls, sigiter): + """Return diffs and finalize any dsrp changes remaining + + Return a rorpiterator with files included of signatures of + dissimilar files. This is the last operation run on the local + filestream, so finalize dsrp writes. + + """ + collated = RORPIter.CollateIterators(cls.initial_dsiter2, sigiter) + finalizer = DestructiveStepping.Finalizer() + def diffs(): + for dsrp, dest_sig in collated: + try: + if dest_sig: + if dest_sig.isplaceholder(): yield dest_sig + else: yield RORPIter.diffonce(dest_sig, dsrp) + if dsrp: finalizer(dsrp) + except (IOError, OSError, RdiffException): + Log.exception() + Log("Error processing %s, skipping" % + str(dest_sig.index), 2) + finalizer.getresult() + return diffs() + +MakeClass(HLSourceStruct) + + +class HLDestinationStruct: + """Hold info used by HL on the destination side""" + _session_info = None # set to si if resuming + def set_session_info(cls, session_info): + cls._session_info = session_info + + def iterate_from(cls, rpath): + """Supply more arguments to DestructiveStepping.Iterate_from""" + if cls._session_info: + return DestructiveStepping.Iterate_from(rpath, None, + cls._session_info.last_index) + else: return DestructiveStepping.Iterate_from(rpath, None) + + def split_initial_dsiter(cls, rpath): + """Set initial_dsiters (iteration of all dsrps from rpath)""" + dsiter = cls.iterate_from(rpath) + result, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2) + return result + + def get_dissimilar(cls, baserp, src_init_iter, dest_init_iter): + """Get dissimilars + + Returns an iterator which enumerates the dsrps which are + different on the source and destination ends. The dsrps do + not necessarily exist on the destination end. + + Also, to prevent the system from getting backed up on the + remote end, if we don't get enough dissimilars, stick in a + placeholder every so often, like fiber. The more + placeholders, the more bandwidth used, but if there aren't + enough, lots of memory will be used because files will be + accumulating on the source side. How much will accumulate + will depend on the Globals.conn_bufsize value. + + """ + collated = RORPIter.CollateIterators(src_init_iter, dest_init_iter) + def generate_dissimilar(): + counter = 0 + for src_rorp, dest_dsrp in collated: + if not dest_dsrp: + dsrp = DSRPath(baserp.conn, baserp.base, src_rorp.index) + if dsrp.lstat(): + Log("Warning: Found unexpected destination file %s." + % dsrp.path, 2) + if DestructiveStepping.isexcluded(dsrp, None): continue + counter = 0 + yield dsrp + elif not src_rorp or not src_rorp == dest_dsrp: + counter = 0 + yield dest_dsrp + else: # source and destinition both exist and are same + if counter == 20: + placeholder = RORPath(src_rorp.index) + placeholder.make_placeholder() + counter = 0 + yield placeholder + else: counter += 1 + return generate_dissimilar() + + def get_sigs(cls, baserp, src_init_iter): + """Return signatures of all dissimilar files""" + dest_iters1 = cls.split_initial_dsiter(baserp) + dissimilars = cls.get_dissimilar(baserp, src_init_iter, dest_iters1) + return RORPIter.Signatures(dissimilars) + + def get_dsrp(cls, dest_rpath, index): + """Return initialized dsrp based on dest_rpath with given index""" + dsrp = DSRPath(dest_rpath.conn, dest_rpath.base, index) + DestructiveStepping.initialize(dsrp, None) + return dsrp + + def get_finalizer(cls): + """Return finalizer, starting from session info if necessary""" + init_state = cls._session_info and cls._session_info.finalizer_state + return DestructiveStepping.Finalizer(init_state) + + def get_ITR(cls, inc_rpath): + """Return ITR, starting from state if necessary""" + init_state = cls._session_info and cls._session_info.ITR_state + return Inc.make_patch_increment_ITR(inc_rpath, init_state) + + def patch_and_finalize(cls, dest_rpath, diffs, checkpoint = 1): + """Apply diffs and finalize""" + collated = RORPIter.CollateIterators(diffs, cls.initial_dsiter2) + finalizer = cls.get_finalizer() + dsrp = None + + def error_checked(): + """Inner writing loop, check this for errors""" + indexed_tuple = collated.next() + Log("Processing %s" % str(indexed_tuple), 7) + diff_rorp, dsrp = indexed_tuple + if not dsrp: + dsrp = cls.get_dsrp(dest_rpath, diff_rorp.index) + DestructiveStepping.initialize(dsrp, None) + if diff_rorp and not diff_rorp.isplaceholder(): + RORPIter.patchonce_action(None, dsrp, diff_rorp).execute() + finalizer(dsrp) + return dsrp + + try: + while 1: + try: dsrp = cls.check_skip_error(error_checked) + except StopIteration: break + if checkpoint: SaveState.checkpoint_mirror(finalizer, dsrp) + except: cls.handle_last_error(dsrp, finalizer) + finalizer.getresult() + if checkpoint: SaveState.checkpoint_remove() + + def patch_increment_and_finalize(cls, dest_rpath, diffs, inc_rpath): + """Apply diffs, write increment if necessary, and finalize""" + collated = RORPIter.CollateIterators(diffs, cls.initial_dsiter2) + finalizer, ITR = cls.get_finalizer(), cls.get_ITR(inc_rpath) + dsrp = None + + def error_checked(): + """Inner writing loop, catch variety of errors from this""" + indexed_tuple = collated.next() + Log("Processing %s" % str(indexed_tuple), 7) + diff_rorp, dsrp = indexed_tuple + if not dsrp: + dsrp = cls.get_dsrp(dest_rpath, indexed_tuple.index) + DestructiveStepping.initialize(dsrp, None) + indexed_tuple = IndexedTuple(indexed_tuple.index, + (diff_rorp, dsrp)) + if diff_rorp and diff_rorp.isplaceholder(): + indexed_tuple = IndexedTuple(indexed_tuple.index, + (None, dsrp)) + ITR(indexed_tuple) + finalizer(dsrp) + return dsrp + + try: + while 1: + try: dsrp = cls.check_skip_error(error_checked) + except StopIteration: break + SaveState.checkpoint_inc_backup(ITR, finalizer, dsrp) + except: cls.handle_last_error(dsrp, finalizer, ITR) + ITR.getresult() + finalizer.getresult() + SaveState.checkpoint_remove() + + def check_skip_error(cls, thunk): + """Run thunk, catch certain errors skip files""" + try: return thunk() + except (IOError, OSError, SkipFileException), exp: + Log.exception() + if (not isinstance(exp, IOError) or + (isinstance(exp, IOError) and + (exp[0] in [2, # Means that a file is missing + 5, # Reported by docv (see list) + 13, # Permission denied IOError + 26] # Requested by Campbell (see list) - + # happens on some NT systems + ))): + Log("Skipping file", 2) + return None + else: raise + + def handle_last_error(cls, dsrp, finalizer, ITR = None): + """If catch fatal error, try to checkpoint before exiting""" + Log.exception(1) + if ITR: SaveState.checkpoint_inc_backup(ITR, finalizer, dsrp, 1) + else: SaveState.checkpoint_mirror(finalizer, dsrp, 1) + SaveState.touch_last_file_definitive() + raise + +MakeClass(HLDestinationStruct) diff --git a/rdiff-backup/rdiff_backup/increment.py b/rdiff-backup/rdiff_backup/increment.py new file mode 100644 index 0000000..4ed6a39 --- /dev/null +++ b/rdiff-backup/rdiff_backup/increment.py @@ -0,0 +1,180 @@ +execfile("destructive_stepping.py") + +####################################################################### +# +# increment - Provides Inc class, which writes increment files +# +# This code is what writes files ending in .diff, .snapshot, etc. +# + +class Inc: + """Class containing increment functions""" + def Increment_action(new, mirror, incpref): + """Main file incrementing function, returns RobustAction + + new is the file on the active partition, + mirror is the mirrored file from the last backup, + incpref is the prefix of the increment file. + + This function basically moves mirror -> incpref. + + """ + if not (new and new.lstat() or mirror.lstat()): + return Robust.null_action # Files deleted in meantime, do nothing + + Log("Incrementing mirror file " + mirror.path, 5) + if ((new and new.isdir()) or mirror.isdir()) and not incpref.isdir(): + incpref.mkdir() + + if not mirror.lstat(): return Inc.makemissing_action(incpref) + elif mirror.isdir(): return Inc.makedir_action(mirror, incpref) + elif new.isreg() and mirror.isreg(): + return Inc.makediff_action(new, mirror, incpref) + else: return Inc.makesnapshot_action(mirror, incpref) + + def Increment(new, mirror, incpref): + Inc.Increment_action(new, mirror, incpref).execute() + + def makemissing_action(incpref): + """Signify that mirror file was missing""" + return RobustAction(lambda: None, + Inc.get_inc_ext(incpref, "missing").touch, + lambda exp: None) + + def makesnapshot_action(mirror, incpref): + """Copy mirror to incfile, since new is quite different""" + snapshotrp = Inc.get_inc_ext(incpref, "snapshot") + return Robust.copy_with_attribs_action(mirror, snapshotrp) + + def makediff_action(new, mirror, incpref): + """Make incfile which is a diff new -> mirror""" + diff = Inc.get_inc_ext(incpref, "diff") + return Robust.chain([Rdiff.write_delta_action(new, mirror, diff), + Robust.copy_attribs_action(mirror, diff)]) + + def makedir_action(mirrordir, incpref): + """Make file indicating directory mirrordir has changed""" + dirsign = Inc.get_inc_ext(incpref, "dir") + def final(): + dirsign.touch() + RPath.copy_attribs(mirrordir, dirsign) + return RobustAction(lambda: None, final, dirsign.delete) + + def get_inc_ext(rp, typestr): + """Return RPath/DSRPath like rp but with inc/time extension + + If the file exists, then probably a previous backup has been + aborted. We then keep asking FindTime to get a time later + than the one that already has an inc file. + + """ + def get_newinc(timestr): + """Get new increment rp with given time suffix""" + addtostr = lambda s: "%s.%s.%s" % (s, timestr, typestr) + if rp.index: + return rp.__class__(rp.conn, rp.base, rp.index[:-1] + + (addtostr(rp.index[-1]),)) + else: return rp.__class__(rp.conn, addtostr(rp.base), rp.index) + + inctime = 0 + while 1: + inctime = Resume.FindTime(rp.index, inctime) + incrp = get_newinc(Time.timetostring(inctime)) + if not incrp.lstat(): return incrp + + def make_patch_increment_ITR(inc_rpath, initial_state = None): + """Return IterTreeReducer that patches and increments + + This has to be an ITR because directories that have files in + them changed are flagged with an increment marker. There are + four possibilities as to the order: + + 1. Normal file -> Normal file: right away + 2. Directory -> Directory: wait until files in the directory + are processed, as we won't know whether to add a marker + until the end. + 3. Normal file -> Directory: right away, so later files will + have a directory to go into. + 4. Directory -> Normal file: Wait until the end, so we can + process all the files in the directory. + + """ + def base_init(indexed_tuple): + """Patch if appropriate, return (a,b) tuple + + a is true if found directory and thus didn't take action + + if a is false, b is true if some changes were made + + if a is true, b is the rp of a temporary file used to hold + the diff_rorp's data (for dir -> normal file change), and + false if none was necessary. + + """ + diff_rorp, dsrp = indexed_tuple + incpref = inc_rpath.new_index(indexed_tuple.index) + if dsrp.isdir(): return init_dir(dsrp, diff_rorp, incpref) + else: return init_non_dir(dsrp, diff_rorp, incpref) + + def init_dir(dsrp, diff_rorp, incpref): + """Initial processing of a directory + + Make the corresponding directory right away, but wait + until the end to write the replacement. However, if the + diff_rorp contains data, we must write it locally before + continuing, or else that data will be lost in the stream. + + """ + if not (incpref.lstat() and incpref.isdir()): incpref.mkdir() + if diff_rorp and diff_rorp.isreg() and diff_rorp.file: + tf = TempFileManager.new(dsrp) + RPathStatic.copy_with_attribs(diff_rorp, tf) + tf.set_attached_filetype(diff_rorp.get_attached_filetype()) + return (1, tf) + else: return (1, None) + + def init_non_dir(dsrp, diff_rorp, incpref): + """Initial processing of non-directory + + If a reverse diff is called for it is generated by apply + the forwards diff first on a temporary file. + + """ + if diff_rorp: + if dsrp.isreg() and diff_rorp.isreg(): + tf = TempFileManager.new(dsrp) + def init_thunk(): + Rdiff.patch_with_attribs_action(dsrp, diff_rorp, + tf).execute() + Inc.Increment_action(tf, dsrp, incpref).execute() + Robust.make_tf_robustaction(init_thunk, (tf,), + (dsrp,)).execute() + else: + Robust.chain([Inc.Increment_action(diff_rorp, dsrp, + incpref), + RORPIter.patchonce_action( + None, dsrp, diff_rorp)]).execute() + return (None, 1) + return (None, None) + + def base_final(base_tuple, base_init_tuple, changed): + """Patch directory if not done, return true iff made change""" + if base_init_tuple[0]: # was directory + diff_rorp, dsrp = base_tuple + if changed or diff_rorp: + if base_init_tuple[1]: diff_rorp = base_init_tuple[1] + Inc.Increment(diff_rorp, dsrp, + inc_rpath.new_index(base_tuple.index)) + if diff_rorp: + RORPIter.patchonce_action(None, dsrp, + diff_rorp).execute() + if isinstance(diff_rorp, TempFile): diff_rorp.delete() + return 1 + return None + else: # changed iff base_init_tuple says it was + return base_init_tuple[1] + + return IterTreeReducer(base_init, lambda x,y: x or y, None, + base_final, initial_state) + +MakeStatic(Inc) diff --git a/rdiff-backup/rdiff_backup/iterfile.py b/rdiff-backup/rdiff_backup/iterfile.py new file mode 100644 index 0000000..21629b2 --- /dev/null +++ b/rdiff-backup/rdiff_backup/iterfile.py @@ -0,0 +1,235 @@ +execfile("ttime.py") +import cPickle + +####################################################################### +# +# iterfile - Convert an iterator to a file object and vice-versa +# + +class IterFileException(Exception): pass + +class UnwrapFile: + """Contains some basic methods for parsing a file containing an iter""" + def __init__(self, file): + self.file = file + + def _s2l(self, s): + """Convert string to long int""" + assert len(s) == 7 + l = 0L + for i in range(7): l = l*256 + ord(s[i]) + return l + + def _get(self): + """Return pair (type, data) next in line on the file + + type is a single character which is either "o" for object, "f" + for file, "c" for a continution of a file, or None if no more + data can be read. Data is either the file's data, if type is + "c" or "f", or the actual object if the type is "o". + + """ + header = self.file.read(8) + if not header: return None, None + assert len(header) == 8, "Header is only %d bytes" % len(header) + type, length = header[0], self._s2l(header[1:]) + buf = self.file.read(length) + if type == "o": return type, cPickle.loads(buf) + else: return type, buf + + +class IterWrappingFile(UnwrapFile): + """An iterator generated from a file. + + Initialize with a file type object, and then it will return the + elements of the file in order. + + """ + def __init__(self, file): + UnwrapFile.__init__(self, file) + self.currently_in_file = None + + def __iter__(self): return self + + def next(self): + if self.currently_in_file: + self.currently_in_file.close() # no error checking by this point + type, data = self._get() + if not type: raise StopIteration + if type == "o": return data + elif type == "f": + file = IterVirtualFile(self, data) + if data: self.currently_in_file = file + else: self.currently_in_file = None + return file + else: raise IterFileException("Bad file type %s" % type) + + +class IterVirtualFile(UnwrapFile): + """Another version of a pretend file + + This is returned by IterWrappingFile when a file is embedded in + the main file that the IterWrappingFile is based around. + + """ + def __init__(self, iwf, initial_data): + """Initializer + + initial_data is the data from the first block of the file. + iwf is the iter wrapping file that spawned this + IterVirtualFile. + + """ + UnwrapFile.__init__(self, iwf.file) + self.iwf = iwf + self.bufferlist = [initial_data] + self.bufferlen = len(initial_data) + self.closed = None + + def check_consistency(self): + l = len("".join(self.bufferlist)) + assert l == self.bufferlen, \ + "Length of IVF bufferlist doesn't match (%s, %s)" % \ + (l, self.bufferlen) + + def read(self, length): + assert not self.closed + if self.iwf.currently_in_file: + while length >= self.bufferlen: + if not self.addtobuffer(): break + + real_len = min(length, self.bufferlen) + combined_buffer = "".join(self.bufferlist) + assert len(combined_buffer) == self.bufferlen, \ + (len(combined_buffer), self.bufferlen) + self.bufferlist = [combined_buffer[real_len:]] + self.bufferlen = self.bufferlen - real_len + return combined_buffer[:real_len] + + def addtobuffer(self): + """Read a chunk from the file and add it to the buffer""" + assert self.iwf.currently_in_file + type, data = self._get() + assert type == "c", "Type is %s instead of c" % type + if data: + self.bufferlen = self.bufferlen + len(data) + self.bufferlist.append(data) + return 1 + else: + self.iwf.currently_in_file = None + return None + + def close(self): + """Currently just reads whats left and discards it""" + while self.iwf.currently_in_file: + self.addtobuffer() + self.bufferlist = [] + self.bufferlen = 0 + self.closed = 1 + + +class FileWrappingIter: + """A file interface wrapping around an iterator + + This is initialized with an iterator, and then converts it into a + stream of characters. The object will evaluate as little of the + iterator as is necessary to provide the requested bytes. + + The actual file is a sequence of marshaled objects, each preceded + by 8 bytes which identifies the following the type of object, and + specifies its length. File objects are not marshalled, but the + data is written in chunks of Globals.blocksize, and the following + blocks can identify themselves as continuations. + + """ + def __init__(self, iter): + """Initialize with iter""" + self.iter = iter + self.bufferlist = [] + self.bufferlen = 0L + self.currently_in_file = None + self.closed = None + + def read(self, length): + """Return next length bytes in file""" + assert not self.closed + while self.bufferlen < length: + if not self.addtobuffer(): break + + combined_buffer = "".join(self.bufferlist) + assert len(combined_buffer) == self.bufferlen + real_len = min(self.bufferlen, length) + self.bufferlen = self.bufferlen - real_len + self.bufferlist = [combined_buffer[real_len:]] + return combined_buffer[:real_len] + + def addtobuffer(self): + """Updates self.bufferlist and self.bufferlen, adding on a chunk + + Returns None if we have reached the end of the iterator, + otherwise return true. + + """ + if self.currently_in_file: + buf = "c" + self.addfromfile() + else: + try: currentobj = self.iter.next() + except StopIteration: return None + if hasattr(currentobj, "read") and hasattr(currentobj, "close"): + self.currently_in_file = currentobj + buf = "f" + self.addfromfile() + else: + pickle = cPickle.dumps(currentobj, 1) + buf = "o" + self._l2s(len(pickle)) + pickle + + self.bufferlist.append(buf) + self.bufferlen = self.bufferlen + len(buf) + return 1 + + def addfromfile(self): + """Read a chunk from the current file and return it""" + buf = self.currently_in_file.read(Globals.blocksize) + if not buf: + assert not self.currently_in_file.close() + self.currently_in_file = None + return self._l2s(len(buf)) + buf + + def _l2s(self, l): + """Convert long int to string of 7 characters""" + s = "" + for i in range(7): + l, remainder = divmod(l, 256) + s = chr(remainder) + s + assert remainder == 0 + return s + + def close(self): self.closed = 1 + + +class BufferedRead: + """Buffer the .read() calls to the given file + + This is used to lessen overhead and latency when a file is sent + over a connection. + + """ + def __init__(self, file): + self.file = file + self.buffer = "" + self.bufsize = Globals.conn_bufsize + + def read(self, l = -1): + if l < 0: # Read as much as possible + result = self.buffer + self.file.read() + self.buffer = "" + return result + + if len(self.buffer) < l: # Try to make buffer as long as l + self.buffer += self.file.read(max(self.bufsize, + l - len(self.buffer))) + actual_size = min(l, len(self.buffer)) + result = self.buffer[:actual_size] + self.buffer = self.buffer[actual_size:] + return result + + def close(self): return self.file.close() diff --git a/rdiff-backup/rdiff_backup/lazy.py b/rdiff-backup/rdiff_backup/lazy.py new file mode 100644 index 0000000..28e92c3 --- /dev/null +++ b/rdiff-backup/rdiff_backup/lazy.py @@ -0,0 +1,343 @@ +from __future__ import generators +execfile("static.py") +import os, stat, types + +####################################################################### +# +# lazy - Define some lazy data structures and functions acting on them +# + +class Iter: + """Hold static methods for the manipulation of lazy iterators""" + + def filter(predicate, iterator): + """Like filter in a lazy functional programming language""" + for i in iterator: + if predicate(i): yield i + + def map(function, iterator): + """Like map in a lazy functional programming language""" + for i in iterator: yield function(i) + + def foreach(function, iterator): + """Run function on each element in iterator""" + for i in iterator: function(i) + + def cat(*iters): + """Lazily concatenate iterators""" + for iter in iters: + for i in iter: yield i + + def cat2(iter_of_iters): + """Lazily concatenate iterators, iterated by big iterator""" + for iter in iter_of_iters: + for i in iter: yield i + + def empty(iter): + """True if iterator has length 0""" + for i in iter: return None + return 1 + + def equal(iter1, iter2, verbose = None, operator = lambda x, y: x == y): + """True if iterator 1 has same elements as iterator 2 + + Use equality operator, or == if it is unspecified. + + """ + for i1 in iter1: + try: i2 = iter2.next() + except StopIteration: + if verbose: print "End when i1 = %s" % i1 + return None + if not operator(i1, i2): + if verbose: print "%s not equal to %s" % (i1, i2) + return None + try: i2 = iter2.next() + except StopIteration: return 1 + if verbose: print "End when i2 = %s" % i2 + return None + + def Or(iter): + """True if any element in iterator is true. Short circuiting""" + i = None + for i in iter: + if i: return i + return i + + def And(iter): + """True if all elements in iterator are true. Short circuiting""" + i = 1 + for i in iter: + if not i: return i + return i + + def len(iter): + """Return length of iterator""" + i = 0 + while 1: + try: iter.next() + except StopIteration: return i + i = i+1 + + def foldr(f, default, iter): + """foldr the "fundamental list recursion operator"?""" + try: next = iter.next() + except StopIteration: return default + return f(next, Iter.foldr(f, default, iter)) + + def foldl(f, default, iter): + """the fundamental list iteration operator..""" + while 1: + try: next = iter.next() + except StopIteration: return default + default = f(default, next) + + def multiplex(iter, num_of_forks, final_func = None, closing_func = None): + """Split a single iterater into a number of streams + + The return val will be a list with length num_of_forks, each + of which will be an iterator like iter. final_func is the + function that will be called on each element in iter just as + it is being removed from the buffer. closing_func is called + when all the streams are finished. + + """ + if num_of_forks == 2 and not final_func and not closing_func: + im2 = IterMultiplex2(iter) + return (im2.yielda(), im2.yieldb()) + if not final_func: final_func = lambda i: None + if not closing_func: closing_func = lambda: None + + # buffer is a list of elements that some iterators need and others + # don't + buffer = [] + + # buffer[forkposition[i]] is the next element yieled by iterator + # i. If it is -1, yield from the original iter + starting_forkposition = [-1] * num_of_forks + forkposition = starting_forkposition[:] + called_closing_func = [None] + + def get_next(fork_num): + """Return the next element requested by fork_num""" + if forkposition[fork_num] == -1: + try: buffer.insert(0, iter.next()) + except StopIteration: + # call closing_func if necessary + if (forkposition == starting_forkposition and + not called_closing_func[0]): + closing_func() + called_closing_func[0] = None + raise StopIteration + for i in range(num_of_forks): forkposition[i] += 1 + + return_val = buffer[forkposition[fork_num]] + forkposition[fork_num] -= 1 + + blen = len(buffer) + if not (blen-1) in forkposition: + # Last position in buffer no longer needed + assert forkposition[fork_num] == blen-2 + final_func(buffer[blen-1]) + del buffer[blen-1] + return return_val + + def make_iterator(fork_num): + while(1): yield get_next(fork_num) + + return tuple(map(make_iterator, range(num_of_forks))) + +MakeStatic(Iter) + + +class IterMultiplex2: + """Multiplex an iterator into 2 parts + + This is a special optimized case of the Iter.multiplex function, + used when there is no closing_func or final_func, and we only want + to split it into 2. By profiling, this is a time sensitive class. + + """ + def __init__(self, iter): + self.a_leading_by = 0 # How many places a is ahead of b + self.buffer = [] + self.iter = iter + + def yielda(self): + """Return first iterator""" + buf, iter = self.buffer, self.iter + while(1): + if self.a_leading_by >= 0: # a is in front, add new element + elem = iter.next() # exception will be passed + buf.append(elem) + else: elem = buf.pop(0) # b is in front, subtract an element + self.a_leading_by += 1 + yield elem + + def yieldb(self): + """Return second iterator""" + buf, iter = self.buffer, self.iter + while(1): + if self.a_leading_by <= 0: # b is in front, add new element + elem = iter.next() # exception will be passed + buf.append(elem) + else: elem = buf.pop(0) # a is in front, subtract an element + self.a_leading_by -= 1 + yield elem + + +class IterTreeReducer: + """Tree style reducer object for iterator + + The indicies of a RORPIter form a tree type structure. This class + can be used on each element of an iter in sequence and the result + will be as if the corresponding tree was reduced. This tries to + bridge the gap between the tree nature of directories, and the + iterator nature of the connection between hosts and the temporal + order in which the files are processed. + + The elements of the iterator are required to have a tuple-style + .index, called "indexed elem" below. + + """ + def __init__(self, base_init, branch_reducer, + branch_base, base_final, initial_state = None): + """ITR initializer + + base_init is a function of one argument, an indexed elem. It + is called immediately on any elem in the iterator. It should + return some value type A. + + branch_reducer and branch_base are used to form a value on a + bunch of reduced branches, in the way that a linked list of + type C can be folded to form a value type B. + + base_final is called when leaving a tree. It takes three + arguments, the indexed elem, the output (type A) of base_init, + the output of branch_reducer on all the branches (type B) and + returns a value type C. + + """ + self.base_init = base_init + self.branch_reducer = branch_reducer + self.base_final = base_final + self.branch_base = branch_base + + if initial_state: self.setstate(initial_state) + else: + self.state = IterTreeReducerState(branch_base) + self.subreducer = None + + def setstate(self, state): + """Update with new state, recursive if necessary""" + self.state = state + if state.substate: self.subreducer = self.newinstance(state.substate) + else: self.subreducer = None + + def getstate(self): return self.state + + def getresult(self): + """Return results of calculation""" + if not self.state.calculated: self.calculate_final_val() + return self.state.final_val + + def intree(self, index): + """Return true if index is still in current tree""" + return self.state.base_index == index[:len(self.state.base_index)] + + def newinstance(self, state = None): + """Return reducer of same type as self + + If state is None, sets substate of self.state, otherwise + assume this is already set. + + """ + new = self.__class__(self.base_init, self.branch_reducer, + self.branch_base, self.base_final, state) + if state is None: self.state.substate = new.state + return new + + def process_w_subreducer(self, indexed_elem): + """Give object to subreducer, if necessary update branch_val""" + if not self.subreducer: + self.subreducer = self.newinstance() + if not self.subreducer(indexed_elem): + self.state.branch_val = self.branch_reducer(self.state.branch_val, + self.subreducer.getresult()) + self.subreducer = self.newinstance() + assert self.subreducer(indexed_elem) + + def calculate_final_val(self): + """Set final value""" + if self.subreducer: + self.state.branch_val = self.branch_reducer(self.state.branch_val, + self.subreducer.getresult()) + if self.state.current_index is None: + # No input, set None as default value + self.state.final_val = None + else: + self.state.final_val = self.base_final(self.state.base_elem, + self.state.base_init_val, + self.state.branch_val) + self.state.calculated = 1 + + def __call__(self, indexed_elem): + """Process elem, current position in iterator + + Returns true if elem successfully processed, false if elem is + not in the current tree and thus the final result is + available. + + """ + index = indexed_elem.index + assert type(index) is types.TupleType + + if self.state.current_index is None: # must be at base + self.state.base_init_val = self.base_init(indexed_elem) + # Do most crash-prone op first, so we don't leave inconsistent + self.state.current_index = index + self.state.base_index = index + self.state.base_elem = indexed_elem + return 1 + elif not index > self.state.current_index: + Log("Warning: oldindex %s >= newindex %s" % + (self.state.current_index, index), 2) + + if not self.intree(index): + self.calculate_final_val() + return None + else: + self.process_w_subreducer(indexed_elem) + self.state.current_index = index + return 1 + + +class IterTreeReducerState: + """Holds the state for IterTreeReducers + + An IterTreeReducer cannot be pickled directly because it holds + some anonymous functions. This class contains the relevant data + that is likely to be picklable, so the ITR can be saved and loaded + if the associated functions are known. + + """ + def __init__(self, branch_base): + """ITRS initializer + + Class variables: + self.current_index - last index processing started on, or None + self.base_index - index of first element processed + self.base_elem - first element processed + self.branch_val - default branch reducing value + + self.calculated - true iff the final value has been calculated + self.base_init_val - return value of base_init function + self.final_val - Final value once it's calculated + self.substate - IterTreeReducerState when subreducer active + + """ + self.current_index = None + self.calculated = None + self.branch_val = branch_base + self.substate = None + diff --git a/rdiff-backup/rdiff_backup/log.py b/rdiff-backup/rdiff_backup/log.py new file mode 100644 index 0000000..5416fd2 --- /dev/null +++ b/rdiff-backup/rdiff_backup/log.py @@ -0,0 +1,142 @@ +import time, sys +execfile("lazy.py") + +####################################################################### +# +# log - Manage logging +# + +class LoggerError(Exception): pass + +class Logger: + """All functions which deal with logging""" + def __init__(self): + self.log_file_open = None + self.log_file_local = None + self.verbosity = self.term_verbosity = 3 + # termverbset is true if the term_verbosity has been explicity set + self.termverbset = None + + def setverbosity(self, verbosity_string): + """Set verbosity levels. Takes a number string""" + try: self.verbosity = int(verbosity_string) + except ValueError: + Log.FatalError("Verbosity must be a number, received '%s' " + "instead." % verbosity_string) + if not self.termverbset: self.term_verbosity = self.verbosity + + def setterm_verbosity(self, termverb_string): + """Set verbosity to terminal. Takes a number string""" + try: self.term_verbosity = int(termverb_string) + except ValueError: + Log.FatalError("Terminal verbosity must be a number, received " + "'%s' insteaxd." % termverb_string) + self.termverbset = 1 + + def open_logfile(self, rpath): + """Inform all connections of an open logfile. + + rpath.conn will write to the file, and the others will pass + write commands off to it. + + """ + for conn in Globals.connections: + conn.Log.open_logfile_allconn(rpath.conn) + rpath.conn.Log.open_logfile_local(rpath) + + def open_logfile_allconn(self, log_file_conn): + """Run on all connections to signal log file is open""" + self.log_file_open = 1 + self.log_file_conn = log_file_conn + + def open_logfile_local(self, rpath): + """Open logfile locally - should only be run on one connection""" + assert self.log_file_conn is Globals.local_connection + self.log_file_local = 1 + self.logrp = rpath + self.logfp = rpath.open("a") + + def close_logfile(self): + """Close logfile and inform all connections""" + if self.log_file_open: + for conn in Globals.connections: + conn.Log.close_logfile_allconn() + self.log_file_conn.Log.close_logfile_local() + + def close_logfile_allconn(self): + """Run on every connection""" + self.log_file_open = None + + def close_logfile_local(self): + """Run by logging connection - close logfile""" + assert self.log_file_conn is Globals.local_connection + assert not self.logfp.close() + + def format(self, message, verbosity): + """Format the message, possibly adding date information""" + if verbosity < 9: return message + "\n" + else: return "%s %s\n" % (time.asctime(time.localtime(time.time())), + message) + + def __call__(self, message, verbosity): + """Log message that has verbosity importance""" + if verbosity <= self.verbosity: self.log_to_file(message) + if verbosity <= self.term_verbosity: + self.log_to_term(message, verbosity) + + def log_to_file(self, message): + """Write the message to the log file, if possible""" + if self.log_file_open: + if self.log_file_local: + self.logfp.write(self.format(message, self.verbosity)) + else: self.log_file_conn.Log.log_to_file(message) + + def log_to_term(self, message, verbosity): + """Write message to stdout/stderr""" + if verbosity <= 2 or Globals.server: termfp = sys.stderr + else: termfp = sys.stdout + termfp.write(self.format(message, self.term_verbosity)) + + def conn(self, direction, result, req_num): + """Log some data on the connection + + The main worry with this function is that something in here + will create more network traffic, which will spiral to + infinite regress. So, for instance, logging must only be done + to the terminal, because otherwise the log file may be remote. + + """ + if self.term_verbosity < 9: return + if type(result) is types.StringType: result_repr = repr(result) + else: result_repr = str(result) + if Globals.server: conn_str = "Server" + else: conn_str = "Client" + self.log_to_term("%s %s (%d): %s" % + (conn_str, direction, req_num, result_repr), 9) + + def FatalError(self, message): + self("Fatal Error: " + message, 1) + Globals.Main.cleanup() + sys.exit(1) + + def exception(self, only_terminal = 0): + """Log an exception and traceback at verbosity 2 + + If only_terminal is None, log normally. If it is 1, then only + log to disk if log file is local (self.log_file_open = 1). If + it is 2, don't log to disk at all. + + """ + assert only_terminal in (0, 1, 2) + if (only_terminal == 0 or + (only_terminal == 1 and self.log_file_open)): + logging_func = self.__call__ + else: logging_func = self.log_to_term + + exc_info = sys.exc_info() + logging_func("Exception %s raised of class %s" % + (exc_info[1], exc_info[0]), 2) + logging_func("".join(traceback.format_tb(exc_info[2])), 2) + + +Log = Logger() diff --git a/rdiff-backup/rdiff_backup/manage.py b/rdiff-backup/rdiff_backup/manage.py new file mode 100644 index 0000000..c0f4a85 --- /dev/null +++ b/rdiff-backup/rdiff_backup/manage.py @@ -0,0 +1,99 @@ +execfile("restore.py") + +####################################################################### +# +# manage - list, delete, and otherwise manage increments +# + +class ManageException(Exception): pass + +class Manage: + def get_incobjs(datadir): + """Return Increments objects given the rdiff-backup data directory""" + return map(IncObj, Manage.find_incrps_with_base(datadir, "increments")) + + def find_incrps_with_base(dir_rp, basename): + """Return list of incfiles with given basename in dir_rp""" + rps = map(dir_rp.append, dir_rp.listdir()) + incrps = filter(RPath.isincfile, rps) + result = filter(lambda rp: rp.getincbase_str() == basename, incrps) + Log("find_incrps_with_base: found %d incs" % len(result), 6) + return result + + def describe_root_incs(datadir): + """Return a string describing all the the root increments""" + result = [] + currentrps = Manage.find_incrps_with_base(datadir, "current_mirror") + if not currentrps: + Log("Warning: no current mirror marker found", 1) + elif len(currentrps) > 1: + Log("Warning: multiple mirror markers found", 1) + for rp in currentrps: + result.append("Found mirror marker %s" % rp.path) + result.append("Indicating latest mirror taken at %s" % + Time.stringtopretty(rp.getinctime())) + result.append("---------------------------------------------" + "-------------") + + # Sort so they are in reverse order by time + time_w_incobjs = map(lambda io: (-io.time, io), + Manage.get_incobjs(datadir)) + time_w_incobjs.sort() + incobjs = map(lambda x: x[1], time_w_incobjs) + result.append("Found %d increments:" % len(incobjs)) + result.append("\n------------------------------------------\n".join( + map(IncObj.full_description, incobjs))) + return "\n".join(result) + + def delete_earlier_than(baserp, time): + """Deleting increments older than time in directory baserp + + time is in seconds. It will then delete any empty directories + in the tree. To process the entire backup area, the + rdiff-backup-data directory should be the root of the tree. + + """ + def yield_files(rp): + yield rp + if rp.isdir(): + for filename in rp.listdir(): + for sub_rp in yield_files(rp.append(filename)): + yield sub_rp + + for rp in yield_files(baserp): + if ((rp.isincfile() and + Time.stringtotime(rp.getinctime()) < time) or + (rp.isdir() and not rp.listdir())): + Log("Deleting increment file %s" % rp.path, 5) + rp.delete() + +MakeStatic(Manage) + + +class IncObj: + """Increment object - represent a completed increment""" + def __init__(self, incrp): + """IncObj initializer + + incrp is an RPath of a path like increments.TIMESTR.dir + standing for the root of the increment. + + """ + if not incrp.isincfile(): + raise ManageException("%s is not an inc file" % incrp.path) + self.incrp = incrp + self.time = Time.stringtotime(incrp.getinctime()) + + def getbaserp(self): + """Return rp of the incrp without extensions""" + return self.incrp.getincbase() + + def pretty_time(self): + """Return a formatted version of inc's time""" + return Time.timetopretty(self.time) + + def full_description(self): + """Return string describing increment""" + s = ["Increment file %s" % self.incrp.path, + "Date: %s" % self.pretty_time()] + return "\n".join(s) diff --git a/rdiff-backup/rdiff_backup/restore.py b/rdiff-backup/rdiff_backup/restore.py new file mode 100644 index 0000000..1f7d24e --- /dev/null +++ b/rdiff-backup/rdiff_backup/restore.py @@ -0,0 +1,158 @@ +from __future__ import generators +execfile("increment.py") +import tempfile + +####################################################################### +# +# restore - Read increment files and restore to original +# + +class RestoreError(Exception): pass + +class Restore: + def RestoreFile(rest_time, rpbase, inclist, rptarget): + """Non-recursive restore function + + rest_time is the time in seconds to restore to, + rpbase is the base name of the file being restored, + inclist is a list of rpaths containing all the relevant increments, + and rptarget is the rpath that will be written with the restored file. + + """ + inclist = Restore.sortincseq(rest_time, inclist) + if not inclist and not (rpbase and rpbase.lstat()): + return # no increments were applicable + Log("Restoring %s with increments %s to %s" % + (rpbase and rpbase.path, + Restore.inclist2str(inclist), rptarget.path), 5) + if not inclist or inclist[0].getinctype() == "diff": + assert rpbase and rpbase.lstat(), \ + "No base to go with incs %s" % Restore.inclist2str(inclist) + RPath.copy_with_attribs(rpbase, rptarget) + for inc in inclist: Restore.applyinc(inc, rptarget) + + def inclist2str(inclist): + """Return string version of inclist for logging""" + return ",".join(map(lambda x: x.path, inclist)) + + def sortincseq(rest_time, inclist): + """Sort the inc sequence, and throw away irrelevant increments""" + incpairs = map(lambda rp: (Time.stringtotime(rp.getinctime()), rp), + inclist) + # Only consider increments at or after the time being restored + incpairs = filter(lambda pair: pair[0] >= rest_time, incpairs) + + # Now throw away older unnecessary increments + incpairs.sort() + i = 0 + while(i < len(incpairs)): + # Only diff type increments require later versions + if incpairs[i][1].getinctype() != "diff": break + i = i+1 + incpairs = incpairs[:i+1] + + # Return increments in reversed order + incpairs.reverse() + return map(lambda pair: pair[1], incpairs) + + def applyinc(inc, target): + """Apply increment rp inc to targetrp target""" + Log("Applying increment %s to %s" % (inc.path, target.path), 6) + inctype = inc.getinctype() + if inctype == "diff": + if not target.lstat(): + raise RestoreError("Bad increment sequence at " + inc.path) + Rdiff.patch_action(target, inc).execute() + elif inctype == "dir": + if not target.isdir(): + if target.lstat(): + raise RestoreError("File %s already exists" % target.path) + target.mkdir() + elif inctype == "missing": return + elif inctype == "snapshot": RPath.copy(inc, target) + else: raise RestoreError("Unknown inctype %s" % inctype) + RPath.copy_attribs(inc, target) + + def RestoreRecursive(rest_time, mirror_base, baseinc_tup, target_base): + """Recursive restore function. + + rest_time is the time in seconds to restore to; + mirror_base is an rpath of the mirror directory corresponding + to the one to be restored; + baseinc_tup is the inc tuple (incdir, list of incs) to be + restored; + and target_base in the dsrp of the target directory. + + """ + assert isinstance(target_base, DSRPath) + collated = RORPIter.CollateIterators( + DestructiveStepping.Iterate_from(mirror_base, None), + Restore.yield_inc_tuples(baseinc_tup)) + mirror_finalizer = DestructiveStepping.Finalizer() + target_finalizer = DestructiveStepping.Finalizer() + + for mirror, inc_tup in collated: + if not inc_tup: + inclist = [] + target = target_base.new_index(mirror.index) + else: + inclist = inc_tup[1] + target = target_base.new_index(inc_tup.index) + DestructiveStepping.initialize(target, None) + Restore.RestoreFile(rest_time, mirror, inclist, target) + target_finalizer(target) + if mirror: mirror_finalizer(mirror) + target_finalizer.getresult() + mirror_finalizer.getresult() + + def yield_inc_tuples(inc_tuple): + """Iterate increment tuples starting with inc_tuple + + An increment tuple is an IndexedTuple (pair). The first will + be the rpath of a directory, and the second is a list of all + the increments associated with that directory. If there are + increments that do not correspond to a directory, the first + element will be None. All the rpaths involved correspond to + files in the increment directory. + + """ + oldindex, rpath = inc_tuple.index, inc_tuple[0] + yield inc_tuple + if not rpath or not rpath.isdir(): return + + inc_list_dict = {} # Index tuple lists by index + dirlist = rpath.listdir() + + def affirm_dict_indexed(index): + """Make sure the inc_list_dict has given index""" + if not inc_list_dict.has_key(index): + inc_list_dict[index] = [None, []] + + def add_to_dict(filename): + """Add filename to the inc tuple dictionary""" + rp = rpath.append(filename) + if rp.isincfile(): + basename = rp.getincbase_str() + affirm_dict_indexed(basename) + inc_list_dict[basename][1].append(rp) + elif rp.isdir(): + affirm_dict_indexed(filename) + inc_list_dict[filename][0] = rp + + def list2tuple(index): + """Return inc_tuple version of dictionary entry by index""" + inclist = inc_list_dict[index] + if not inclist[1]: return None # no increments, so ignore + return IndexedTuple(oldindex + (index,), inclist) + + for filename in dirlist: add_to_dict(filename) + keys = inc_list_dict.keys() + keys.sort() + for index in keys: + new_inc_tuple = list2tuple(index) + if not new_inc_tuple: continue + elif new_inc_tuple[0]: # corresponds to directory + for i in Restore.yield_inc_tuples(new_inc_tuple): yield i + else: yield new_inc_tuple + +MakeStatic(Restore) diff --git a/rdiff-backup/rdiff_backup/rlist.py b/rdiff-backup/rdiff_backup/rlist.py new file mode 100644 index 0000000..c0f8ee9 --- /dev/null +++ b/rdiff-backup/rdiff_backup/rlist.py @@ -0,0 +1,240 @@ +from __future__ import generators +import marshal, sha, types +execfile("iterfile.py") + +####################################################################### +# +# rlist - Define the CachingIter, and sig/diff/patch ops on iterators +# + +class CachingIter: + """Cache parts of an iter using a list + + Turn an iter into something that you can prepend elements into, + and also read from without apparently changing the state. + + """ + def __init__(self, iter_or_list): + if type(iter_or_list) is types.ListType: + self.iter = iter(iter_or_list) + else: self.iter = iter_or_list + self.next = self.iter.next + self.head = [] + + def __iter__(self): return self + + def _next(self): + """Take elements from the head list + + When there are elements waiting before the main iterator, this + is the next function. If not, iter.next returns to being next. + + """ + head = self.head + a = head[0] + del head[0] + if not head: self.next = self.iter.next + return a + + def nextrange(self, m): + """Return next m elements in list""" + l = head[:m] + del head[:m] + for i in xrange(m - len(l)): l.append(self.iter.next()) + return l + + def peek(self): + """Return next element without removing it from iterator""" + n = self.next() + self.push(n) + return n + + def push(self, elem): + """Insert an element into the iterator at the beginning""" + if not self.head: self.next = self._next + self.head.insert(0, elem) + + def pushrange(self, elem_list): + """Insert list of multiple elements at the beginning""" + if not self.head: self.next = self._next + self.head[:0] = elem_list + + def cache(self, m): + """Move next m elements from iter to internal list + + If m is None, append the entire rest of the iterator. + + """ + h, it = self.head, self.iter + if m is None: + for i in it: h.append(i) + else: + for i in xrange(m): h.append(it.next()) + + def __getitem__(self, key): + """Support a[i:j] style notation. Non destructive""" + if type(key) is types.SliceType: + if key.stop > len(self.head): self.cache(key.stop - len(self.head)) + return self.head[key.start, key.stop] + else: + if key >= len(self.head): self.cache(key + 1 - len(self.head)) + return self.head[key] + + + +class RListDelta: + """Note a difference from one iterator (A) to another (B) + + The min, max pairs are indicies which stand for the half-open + interval (min, max], and elemlist is a list of all the elements in + A which fall within this interval. + + These are produced by the function RList.Deltas(...) + + """ + def __init__(self, (min, max), elemlist): + self.min, self.max = min, max + self.elemlist = elemlist + + + +class RList: + """Tools for signatures, diffing, and patching an iterator + + This class requires that the iterators involved are yielding + objects that have .index and .data attributes. Two objects with + the same .data attribute are supposed to be equivalent. The + iterator must also yield the objects in increasing order with + respect to the .index attribute. + + """ + blocksize = 100 + + def Signatures(iter): + """Return iterator of signatures from stream of pairs + + Each signature is an ordered pair (last index sig applies to, + SHA digest of data) + + """ + i, s = 0, sha.new() + for iter_elem in iter: + s.update(marshal.dumps(iter_elem.data)) + i = i+1 + if i == RList.blocksize: + yield (iter_elem.index, s.digest()) + i, s = 0, sha.new() + if i != 0: yield (iter_elem.index, s.digest()) + + def sig_one_block(iter_or_list): + """Return the digest portion of a signature on given list""" + s = sha.new() + for iter_elem in iter_or_list: s.update(marshal.dumps(iter_elem.data)) + return s.digest() + + def Deltas(remote_sigs, iter): + """Return iterator of Delta objects that bring iter to remote""" + def get_before(index, iter): + """Return elements in iter whose index is before or equal index + iter needs to be pushable + """ + l = [] + while 1: + try: iter_elem = iter.next() + except StopIteration: return l + if iter_elem.index > index: break + l.append(iter_elem) + iter.push(iter_elem) + return l + + if not isinstance(iter, CachingIter): iter = CachingIter(iter) + oldindex = None + for (rs_index, rs_digest) in remote_sigs: + l = get_before(rs_index, iter) + if rs_digest != RList.sig_one_block(l): + yield RListDelta((oldindex, rs_index), l) + oldindex = rs_index + + def patch_once(basis, delta): + """Apply one delta to basis to return original iterator + + This returns original iterator up to and including the max range + of delta, then stop. basis should be pushable. + + """ + # Return elements of basis until start of delta range + for basis_elem in basis: + if basis_elem.index > delta.min: + basis.push(basis_elem) + break + yield basis_elem + + # Yield elements of delta... + for elem in delta.elemlist: yield elem + + # Finally, discard basis until end of delta range + for basis_elem in basis: + if basis_elem.index > delta.max: + basis.push(basis_elem) + break + + def Patch(basis, deltas): + """Apply a delta stream to basis iterator, yielding original""" + if not isinstance(basis, CachingIter): basis = CachingIter(basis) + for d in deltas: + for elem in RList.patch_once(basis, d): yield elem + for elem in basis: yield elem + + def get_difference_once(basis, delta): + """From one delta, find differences from basis + + Will return pairs (basis_elem, new_elem) where basis_elem is + the element from the basis iterator and new_elem is the + element from the other iterator. If either is missing None + will take its place. If both are present iff two have the + same index. + + """ + # Discard any elements of basis before delta starts + for basis_elem in basis: + if basis_elem.index > delta.min: + basis.push(basis_elem) + break + + # In range compare each one by one + di, boverflow, doverflow = 0, None, None + while 1: + # Set indicies and data, or mark if at end of range already + try: + basis_elem = basis.next() + if basis_elem.index > delta.max: + basis.push(basis_elem) + boverflow = 1 + except StopIteration: boverflow = 1 + if di >= len(delta.elemlist): doverflow = 1 + else: delta_elem = delta.elemlist[di] + + if boverflow and doverflow: break + elif boverflow: + yield (None, delta_elem) + di = di+1 + elif doverflow: yield (basis_elem, None) + + # Now can assume that everything is in range + elif basis_elem.index > delta_elem.index: + yield (None, delta_elem) + basis.push(basis_elem) + di = di+1 + elif basis_elem.index == delta_elem.index: + if basis_elem.data != delta_elem.data: + yield (basis_elem, delta_elem) + di = di+1 + else: yield (basis_elem, None) + + def Dissimilar(basis, deltas): + """Return iter of differences from delta iter and basis iter""" + if not isinstance(basis, CachingIter): basis = CachingIter(basis) + for d in deltas: + for triple in RList.get_difference_once(basis, d): yield triple + +MakeStatic(RList) diff --git a/rdiff-backup/rdiff_backup/robust.py b/rdiff-backup/rdiff_backup/robust.py new file mode 100644 index 0000000..c23ff6a --- /dev/null +++ b/rdiff-backup/rdiff_backup/robust.py @@ -0,0 +1,537 @@ +import tempfile +execfile("rpath.py") + +####################################################################### +# +# robust - code which prevents mirror from being corrupted, error-recovery +# +# Ideally no matter an instance of rdiff-backup gets aborted, no +# information should get lost. The target directory should be left in +# a coherent state, and later instances of rdiff-backup should clean +# things up so there is no sign that anything ever got aborted or +# failed. +# +# Thus, files should be updated in an atomic way as possible. Each +# file should be updated (and the corresponding diff files written) or +# not, and it should be clear which happened. In general, I don't +# think this is possible, since the creation of the diff files and the +# changing of updated files cannot be guarateed to happen together. +# It is possible, I think, to record various information to files +# which would allow a later process to figure out what the last +# operation was, but this would add several file operations to the +# processing of each file, and I don't think, would be a good +# tradeoff. +# +# The compromise reached here is that diff files should be created +# just before the mirror files are updated, and each file update +# should be done with a rename operation on a file in the same +# directory. Furthermore, every once in a while, rdiff-backup will +# record which file it just finished processing. If any fatal errors +# are caught, it will also record the last processed file. Future +# instances may not know exactly when the previous instance was +# aborted, but they will be able to narrow down the possibilities. + +class RobustAction: + """Represents a file operation to be accomplished later""" + def __init__(self, init_thunk, final_thunk, error_thunk): + """RobustAction initializer + + All the thunks are functions whose return value will be + ignored. init_thunk should not make any irreversible changes + but prepare for the writing of the important data. final_thunk + should be as short as possible and do the real work. + error_thunk is run if there is an error in init_thunk or + final_thunk. Errors in init_thunk should be corrected by + error_thunk as if nothing had been run in the first place. + The functions take no arguments except for error_thunk, which + receives the exception as its only argument. + + """ + self.init_thunk = init_thunk + self.final_thunk = final_thunk + self.error_thunk = error_thunk + + def execute(self): + """Actually run the operation""" + try: + self.init_thunk() + self.final_thunk() + except Exception, exp: # Catch all errors + Log.exception() + self.error_thunk(exp) + raise exp + + +class Robust: + """Contains various file operations made safer using tempfiles""" + null_action = RobustAction(lambda: None, lambda: None, lambda e: None) + def chain(robust_action_list): + """Return chain tying together a number of robust actions + + The whole chain will be aborted if some error occurs in + initialization stage of any of the component actions. + + """ + ras_with_completed_inits = [] + def init(): + for ra in robust_action_list: + ras_with_completed_inits.append(ra) + ra.init_thunk() + def final(): + for ra in robust_action_list: ra.final_thunk() + def error(exp): + for ra in ras_with_completed_inits: ra.error_thunk(exp) + return RobustAction(init, final, error) + + def chain_nested(robust_action_list): + """Like chain but final actions performed in reverse order""" + ras_with_completed_inits = [] + def init(): + for ra in robust_action_list: + ras_with_completed_inits.append(ra) + ra.init_thunk() + def final(): + ralist_copy = robust_action_list[:] + ralist_copy.reverse() + for ra in ralist_copy: ra.final_thunk() + def error(exp): + for ra in ras_with_completed_inits: ra.error_thunk(exp) + return RobustAction(init, final, error) + + def make_tf_robustaction(init_thunk, tempfiles, final_renames = None): + """Shortcut RobustAction creator when only tempfiles involved + + Often the robust action will just consist of some initial + stage, renaming tempfiles in the final stage, and deleting + them if there is an error. This function makes it easier to + create RobustActions of that type. + + """ + assert type(tempfiles) is types.TupleType, tempfiles + if final_renames is None: final = lambda: None + else: + assert len(tempfiles) == len(final_renames) + def final(): # rename tempfiles to final positions + for i in range(len(tempfiles)): + final_name = final_renames[i] + if final_name: + if final_name.isdir(): # Cannot rename over directory + final_name.delete() + tempfiles[i].rename(final_name) + def error(exp): + for tf in tempfiles: tf.delete() + return RobustAction(init_thunk, final, error) + + def copy_action(rorpin, rpout): + """Return robust action copying rorpin to rpout + + The source can be a rorp or an rpath. Does not recurse. If + directories copied, then just exit (output directory not + overwritten). + + """ + tfl = [None] # Need mutable object that init and final can access + def init(): + if not (rorpin.isdir() and rpout.isdir()): # already a dir + tfl[0] = TempFileManager.new(rpout) + if rorpin.isreg(): tfl[0].write_from_fileobj(rorpin.open("rb")) + else: RPath.copy(rorpin, tf) + def final(): + if tfl[0] and tfl[0].lstat(): + if rpout.isdir(): rpout.delete() + tfl[0].rename(rpout) + return RobustAction(init, final, lambda e: tfl[0] and tfl[0].delete()) + + def copy_with_attribs_action(rorpin, rpout): + """Like copy_action but also copy attributes""" + tfl = [None] # Need mutable object that init and final can access + def init(): + if not (rorpin.isdir() and rpout.isdir()): # already a dir + tfl[0] = TempFileManager.new(rpout) + if rorpin.isreg(): tfl[0].write_from_fileobj(rorpin.open("rb")) + else: RPath.copy(rorpin, tfl[0]) + if tfl[0].lstat(): # Some files, like sockets, won't be created + RPathStatic.copy_attribs(rorpin, tfl[0]) + def final(): + if rorpin.isdir() and rpout.isdir(): + RPath.copy_attribs(rorpin, rpout) + elif tfl[0] and tfl[0].lstat(): + if rpout.isdir(): rpout.delete() + tfl[0].rename(rpout) + return RobustAction(init, final, lambda e: tfl[0] and tfl[0].delete()) + + def copy_attribs_action(rorpin, rpout): + """Return action which just copies attributes + + Copying attributes is already pretty atomic, so just run + normal sequence. + + """ + def final(): RPath.copy_attribs(rorpin, rpout) + return RobustAction(lambda: None, final, lambda e: None) + + def symlink_action(rpath, linktext): + """Return symlink action by moving one file over another""" + tf = TempFileManager.new(rpath) + def init(): tf.symlink(linktext) + return Robust.make_tf_robustaction(init, (tf,), (rpath,)) + + def destructive_write_action(rp, s): + """Return action writing string s to rpath rp in robust way + + This will overwrite any data currently in rp. + + """ + tf = TempFileManager.new(rp) + def init(): + fp = tf.open("wb") + fp.write(s) + assert not fp.close() + tf.setdata() + return Robust.make_tf_robustaction(init, (tf,), (rp,)) + +MakeStatic(Robust) + + +class TempFileManager: + """Manage temp files""" + + # This is a connection-specific list of temp files, to be cleaned + # up before rdiff-backup exits. + _tempfiles = [] + + # To make collisions less likely, this gets put in the file name + # and incremented whenever a new file is requested. + _tfindex = 0 + + def new(cls, rp_base, same_dir = 1): + """Return new tempfile that isn't in use. + + If same_dir, tempfile will be in same directory as rp_base. + Otherwise, use tempfile module to get filename. + + """ + conn = rp_base.conn + if conn is not Globals.local_connection: + return conn.TempFileManager.new(rp_base, same_dir) + + def find_unused(conn, dir): + """Find an unused tempfile with connection conn in directory dir""" + while 1: + if cls._tfindex > 100000000: + Log("Resetting index", 2) + cls._tfindex = 0 + tf = TempFile(conn, os.path.join(dir, + "rdiff-backup.tmp.%d" % cls._tfindex)) + cls._tfindex = cls._tfindex+1 + if not tf.lstat(): return tf + + if same_dir: tf = find_unused(conn, rp_base.dirsplit()[0]) + else: tf = TempFile(conn, tempfile.mktemp()) + cls._tempfiles.append(tf) + return tf + + def remove_listing(cls, tempfile): + """Remove listing of tempfile""" + if Globals.local_connection is not tempfile.conn: + tempfile.conn.TempFileManager.remove_listing(tempfile) + elif tempfile in cls._tempfiles: cls._tempfiles.remove(tempfile) + + def delete_all(cls): + """Delete all remaining tempfiles""" + for tf in cls._tempfiles[:]: tf.delete() + +MakeClass(TempFileManager) + + +class TempFile(RPath): + """Like an RPath, but keep track of which ones are still here""" + def rename(self, rp_dest): + """Rename temp file to permanent location, possibly overwriting""" + if self.isdir() and not rp_dest.isdir(): + # Cannot move a directory directly over another file + rp_dest.delete() + if (isinstance(rp_dest, DSRPath) and rp_dest.perms_delayed + and not self.hasfullperms()): + # If we are moving to a delayed perm directory, delay + # permission change on destination. + rp_dest.chmod(self.getperms()) + self.chmod(0700) + RPathStatic.rename(self, rp_dest) + TempFileManager.remove_listing(self) + + def delete(self): + RPath.delete(self) + TempFileManager.remove_listing(self) + + +class SaveState: + """Save state in the middle of backups for resuming later""" + _last_file_sym = None # RPath of sym pointing to last file processed + _last_file_definitive_rp = None # Touch this if last file is really last + _last_checkpoint_time = 0 # time in seconds of last checkpoint + _checkpoint_rp = None # RPath of checkpoint data pickle + + def init_filenames(cls, incrementing): + """Set rpaths of markers. Assume rbdir already set. + + If incrementing, then indicate increment operation, otherwise + indicate mirror. + + """ + if not Globals.isbackup_writer: + return Globals.backup_writer.SaveState.init_filenames(incrementing) + + assert Globals.local_connection is Globals.rbdir.conn, \ + Globals.rbdir.conn + if incrementing: cls._last_file_sym = Globals.rbdir.append( + "last-file-incremented.%s.snapshot" % Time.curtimestr) + else: cls._last_file_sym = Globals.rbdir.append( + "last-file-mirrored.%s.snapshot" % Time.curtimestr) + cls._checkpoint_rp = Globals.rbdir.append( + "checkpoint-data.%s.snapshot" % Time.curtimestr) + cls._last_file_definitive_rp = Globals.rbdir.append( + "last-file-definitive.%s.snapshot" % Time.curtimestr) + + def touch_last_file(cls): + """Touch last file marker, indicating backup has begun""" + cls._last_file_sym.touch() + + def touch_last_file_definitive(cls): + """Create last-file-definitive marker + + When a backup gets aborted, there may be time to indicate the + last file successfully processed, and this should be touched. + Sometimes when the abort is hard, there may be a last file + indicated, but further files since then have been processed, + in which case this shouldn't be touched. + + """ + cls._last_file_definitive_rp.touch() + + def record_last_file_action(cls, last_file_rorp): + """Action recording last file to be processed as symlink in rbdir + + last_file_rorp is None means that no file is known to have + been processed. + + """ + if last_file_rorp: + symtext = apply(os.path.join, + ('increments',) + last_file_rorp.index) + return Robust.symlink_action(cls._last_file_sym, symtext) + else: return RobustAction(lambda: None, cls.touch_last_file, + lambda exp: None) + + def checkpoint_inc_backup(cls, ITR, finalizer, last_file_rorp, + override = None): + """Save states of tree reducer and finalizer during inc backup + + If override is true, checkpoint even if one isn't due. + + """ + if not override and not cls.checkpoint_needed(): return + assert cls._checkpoint_rp, "_checkpoint_rp not set yet" + + cls._last_checkpoint_time = time.time() + Log("Writing checkpoint time %s" % cls._last_checkpoint_time, 7) + state_string = cPickle.dumps((ITR.getstate(), finalizer.getstate())) + Robust.chain([Robust.destructive_write_action(cls._checkpoint_rp, + state_string), + cls.record_last_file_action(last_file_rorp)]).execute() + + def checkpoint_mirror(cls, finalizer, last_file_rorp, override = None): + """For a mirror, only finalizer and last_file should be saved""" + if not override and not cls.checkpoint_needed(): return + if not cls._checkpoint_rp: + Log("Warning, _checkpoint_rp not set yet", 2) + return + + cls._last_checkpoint_time = time.time() + Log("Writing checkpoint time %s" % cls._last_checkpoint_time, 7) + state_string = cPickle.dumps(finalizer.getstate()) + Robust.chain([Robust.destructive_write_action(cls._checkpoint_rp, + state_string), + cls.record_last_file_action(last_file_rorp)]).execute() + + def checkpoint_needed(cls): + """Returns true if another checkpoint is called for""" + return (time.time() > cls._last_checkpoint_time + + Globals.checkpoint_interval) + + def checkpoint_remove(cls): + """Remove all checkpointing data after successful operation""" + for rp in Resume.get_relevant_rps(): rp.delete() + +MakeClass(SaveState) + + +class Resume: + """Check for old aborted backups and resume if necessary""" + _session_info_list = None # List of ResumeSessionInfo's, sorted by time + def FindTime(cls, index, later_than = 0): + """For a given index, find the appropriate time to use for inc + + If it is clear which time to use (because it is determined by + definitive records, or there are no aborted backup, etc.) then + just return the appropriate time. Otherwise, if an aborted + backup was last checkpointed before the index, assume that it + didn't get there, and go for the older time. If an inc file + is already present, the function will be rerun with later time + specified. + + """ + if Time.prevtime > later_than: return Time.prevtime # usual case + + for si in cls.get_sis_covering_index(index): + if si.time > later_than: return si.time + raise SkipFileException("Index %s already covered, skipping" % + str(index)) + + def get_sis_covering_index(cls, index): + """Return sorted list of SessionInfos which may cover index + + Aborted backup may be relevant unless index is lower and we + are sure that it didn't go further. + + """ + return filter(lambda session_info: + not ((session_info.last_index is None or + session_info.last_index < index) and + session_info.last_definitive), + cls._session_info_list) + + def SetSessionInfo(cls): + """Read data directory and initialize _session_info""" + silist = [] + rp_quad_dict = cls.group_rps_by_time(cls.get_relevant_rps()) + times = rp_quad_dict.keys() + times.sort() + for time in times: + silist.append(cls.quad_to_si(time, rp_quad_dict[time])) + cls._session_info_list = silist + + def get_relevant_rps(cls): + """Return list of relevant rpaths in rbdata directory""" + relevant_bases = ['last-file-incremented', 'last-file-mirrored', + 'checkpoint-data', 'last-file-definitive'] + rps = map(Globals.rbdir.append, Globals.rbdir.listdir()) + return filter(lambda rp: rp.isincfile() + and rp.getincbase_str() in relevant_bases, rps) + + def group_rps_by_time(cls, rplist): + """Take list of rps return time dict {time: quadlist} + + Times in seconds are the keys, values are triples of rps + [last-file-incremented, last-file-mirrored, checkpoint-data, + last-is-definitive]. + + """ + result = {} + for rp in rplist: + time = Time.stringtotime(rp.getinctime()) + if result.has_key(time): quadlist = result[time] + else: quadlist = [None, None, None, None] + base_string = rp.getincbase_str() + if base_string == 'last-file-incremented': quadlist[0] = rp + elif base_string == 'last-file-mirrored': quadlist[1] = rp + elif base_string == 'last-file-definitive': quadlist[3] = 1 + else: + assert base_string == 'checkpoint-data' + quadlist[2] = rp + result[time] = quadlist + return result + + def quad_to_si(cls, time, quad): + """Take time, quadlist, return associated ResumeSessionInfo""" + increment_sym, mirror_sym, checkpoint_rp, last_definitive = quad + assert not (increment_sym and mirror_sym) # both shouldn't exist + ITR, finalizer = None, None + if increment_sym: + mirror = None + last_index = cls.sym_to_index(increment_sym) + if checkpoint_rp: + ITR, finalizer = cls.unpickle_checkpoint(checkpoint_rp) + elif mirror_sym: + mirror = 1 + last_index = cls.sym_to_index(mirror_sym) + if checkpoint_rp: + finalizer = cls.unpickle_checkpoint(checkpoint_rp) + return ResumeSessionInfo(mirror, time, last_index, last_definitive, + finalizer, ITR) + + def sym_to_index(cls, sym_rp): + """Read last file sym rp, return last file index + + If sym_rp is not a sym at all, return None, indicating that no + file index was ever conclusively processed. + + """ + if not sym_rp.issym(): return None + link_components = sym_rp.readlink().split("/") + assert link_components[0] == 'increments' + return tuple(link_components[1:]) + + def unpickle_checkpoint(cls, checkpoint_rp): + """Read data from checkpoint_rp and return unpickled data + + Return value is pair finalizer state for a mirror checkpoint, + and (patch increment ITR, finalizer state) for increment + checkpoint. + + """ + fp = checkpoint_rp.open("rb") + data = fp.read() + fp.close() + return cPickle.loads(data) + + def ResumeCheck(cls): + """Return relevant ResumeSessionInfo if there's one we should resume + + Also if find RSI to resume, reset current time to old resume + time. + + """ + cls.SetSessionInfo() + if not cls._session_info_list: + if Globals.resume == 1: + Log.FatalError("User specified resume, but no data on " + "previous backup found.") + else: return None + else: + si = cls._session_info_list[-1] + if (Globals.resume == 1 or + (time.time() <= (si.time + Globals.resume_window) and + not Globals.resume == 0)): + Log("Resuming aborted backup dated %s" % + Time.timetopretty(si.time), 2) + Time.setcurtime(si.time) + return si + else: + Log("Last backup dated %s was aborted, but we aren't " + "resuming it." % Time.timetopretty(si.time), 2) + return None + assert 0 + +MakeClass(Resume) + + +class ResumeSessionInfo: + """Hold information about a previously aborted session""" + def __init__(self, mirror, time, last_index, + last_definitive, finalizer_state = None, ITR_state = None): + """Class initializer + + time - starting time in seconds of backup + mirror - true if backup was a mirror, false if increment + last_index - Last confirmed index processed by backup, or None + last_definitive - True is we know last_index is really last + finalizer_state - finalizer reducer state if available + ITR_state - For increment, ITM reducer state (assume mirror if NA) + + """ + self.time = time + self.mirror = mirror + self.last_index = last_index + self.last_definitive = last_definitive + self.ITR_state, self.finalizer_state, = ITR_state, finalizer_state diff --git a/rdiff-backup/rdiff_backup/rorpiter.py b/rdiff-backup/rdiff_backup/rorpiter.py new file mode 100644 index 0000000..5740ef8 --- /dev/null +++ b/rdiff-backup/rdiff_backup/rorpiter.py @@ -0,0 +1,248 @@ +execfile("robust.py") +from __future__ import generators +import tempfile + +####################################################################### +# +# rorpiter - Operations on Iterators of Read Only Remote Paths +# + +class RORPIterException(Exception): pass + +class RORPIter: + """Functions relating to iterators of Read Only RPaths + + The main structure will be an iterator that yields RORPaths. + Every RORPath has a "raw" form that makes it more amenable to + being turned into a file. The raw form of the iterator yields + each RORPath in the form of the tuple (index, data_dictionary, + files), where files is the number of files attached (usually 1 or + 0). After that, if a file is attached, it yields that file. + + """ + def ToRaw(rorp_iter): + """Convert a rorp iterator to raw form""" + for rorp in rorp_iter: + if rorp.file: + yield (rorp.index, rorp.data, 1) + yield rorp.file + else: yield (rorp.index, rorp.data, 0) + + def FromRaw(raw_iter): + """Convert raw rorp iter back to standard form""" + for index, data, num_files in raw_iter: + rorp = RORPath(index, data) + if num_files: + assert num_files == 1, "Only one file accepted right now" + rorp.setfile(RORPIter.getnext(raw_iter)) + yield rorp + + def ToFile(rorp_iter): + """Return file version of iterator""" + return FileWrappingIter(RORPIter.ToRaw(rorp_iter)) + + def FromFile(fileobj): + """Recover rorp iterator from file interface""" + return RORPIter.FromRaw(IterWrappingFile(fileobj)) + + def IterateRPaths(base_rp): + """Return an iterator yielding RPaths with given base rp""" + yield base_rp + if base_rp.isdir(): + dirlisting = base_rp.listdir() + dirlisting.sort() + for filename in dirlisting: + for rp in RORPIter.IterateRPaths(base_rp.append(filename)): + yield rp + + def Signatures(rp_iter): + """Yield signatures of rpaths in given rp_iter""" + for rp in rp_iter: + if rp.isplaceholder(): yield rp + else: + rorp = rp.getRORPath() + if rp.isreg(): rorp.setfile(Rdiff.get_signature(rp)) + yield rorp + + def GetSignatureIter(base_rp): + """Return a signature iterator recurring over the base_rp""" + return RORPIter.Signatures(RORPIter.IterateRPaths(base_rp)) + + def CollateIterators(*rorp_iters): + """Collate RORPath iterators by index + + So it takes two or more iterators of rorps and returns an + iterator yielding tuples like (rorp1, rorp2) with the same + index. If one or the other lacks that index, it will be None + + """ + # overflow[i] means that iter[i] has been exhausted + # rorps[i] is None means that it is time to replenish it. + iter_num = len(rorp_iters) + if iter_num == 2: + return RORPIter.Collate2Iters(rorp_iters[0], rorp_iters[1]) + overflow = [None] * iter_num + rorps = overflow[:] + + def setrorps(overflow, rorps): + """Set the overflow and rorps list""" + for i in range(iter_num): + if not overflow[i] and rorps[i] is None: + try: rorps[i] = rorp_iters[i].next() + except StopIteration: + overflow[i] = 1 + rorps[i] = None + + def getleastindex(rorps): + """Return the first index in rorps, assuming rorps isn't empty""" + return min(map(lambda rorp: rorp.index, + filter(lambda x: x, rorps))) + + def yield_tuples(iter_num, overflow, rorps): + while 1: + setrorps(overflow, rorps) + if not None in overflow: break + + index = getleastindex(rorps) + yieldval = [] + for i in range(iter_num): + if rorps[i] and rorps[i].index == index: + yieldval.append(rorps[i]) + rorps[i] = None + else: yieldval.append(None) + yield IndexedTuple(index, yieldval) + return yield_tuples(iter_num, overflow, rorps) + + def Collate2Iters(riter1, riter2): + """Special case of CollateIterators with 2 arguments + + This does the same thing but is faster because it doesn't have + to consider the >2 iterator case. Profiler says speed is + important here. + + """ + relem1, relem2 = None, None + while 1: + if not relem1: + try: relem1 = riter1.next() + except StopIteration: + if relem2: yield IndexedTuple(index2, (None, relem2)) + for relem2 in riter2: + yield IndexedTuple(relem2.index, (None, relem2)) + break + index1 = relem1.index + if not relem2: + try: relem2 = riter2.next() + except StopIteration: + if relem1: yield IndexedTuple(index1, (relem1, None)) + for relem1 in riter1: + yield IndexedTuple(relem1.index, (relem1, None)) + break + index2 = relem2.index + + if index1 < index2: + yield IndexedTuple(index1, (relem1, None)) + relem1 = None + elif index1 == index2: + yield IndexedTuple(index1, (relem1, relem2)) + relem1, relem2 = None, None + else: # index2 is less + yield IndexedTuple(index2, (None, relem2)) + relem2 = None + + def getnext(iter): + """Return the next element of an iterator, raising error if none""" + try: next = iter.next() + except StopIteration: raise RORPIterException("Unexpected end to iter") + return next + + def GetDiffIter(sig_iter, new_iter): + """Return delta iterator from sig_iter to new_iter + + The accompanying file for each will be a delta as produced by + rdiff, unless the destination file does not exist, in which + case it will be the file in its entirety. + + sig_iter may be composed of rorps, but new_iter should have + full RPaths. + + """ + collated_iter = RORPIter.CollateIterators(sig_iter, new_iter) + for rorp, rp in collated_iter: yield RORPIter.diffonce(rorp, rp) + + def diffonce(sig_rorp, new_rp): + """Return one diff rorp, based from signature rorp and orig rp""" + if sig_rorp and sig_rorp.isreg() and new_rp and new_rp.isreg(): + diff_rorp = new_rp.getRORPath() + diff_rorp.setfile(Rdiff.get_delta_sigfileobj(sig_rorp.open("rb"), + new_rp)) + diff_rorp.set_attached_filetype('diff') + return diff_rorp + else: + # Just send over originial if diff isn't appropriate + if sig_rorp: sig_rorp.close_if_necessary() + if not new_rp: return RORPath(sig_rorp.index) + elif new_rp.isreg(): + diff_rorp = new_rp.getRORPath(1) + diff_rorp.set_attached_filetype('snapshot') + return diff_rorp + else: return new_rp.getRORPath() + + def PatchIter(base_rp, diff_iter): + """Patch the appropriate rps in basis_iter using diff_iter""" + basis_iter = RORPIter.IterateRPaths(base_rp) + collated_iter = RORPIter.CollateIterators(basis_iter, diff_iter) + for basisrp, diff_rorp in collated_iter: + RORPIter.patchonce_action(base_rp, basisrp, diff_rorp).execute() + + def patchonce_action(base_rp, basisrp, diff_rorp): + """Return action patching basisrp using diff_rorp""" + assert diff_rorp, "Missing diff index %s" % basisrp.index + if not diff_rorp.lstat(): + return RobustAction(lambda: None, basisrp.delete, lambda e: None) + + if basisrp and basisrp.isreg() and diff_rorp.isreg(): + assert diff_rorp.get_attached_filetype() == 'diff' + return Rdiff.patch_with_attribs_action(basisrp, diff_rorp) + else: # Diff contains whole file, just copy it over + if not basisrp: basisrp = base_rp.new_index(diff_rorp.index) + return Robust.copy_with_attribs_action(diff_rorp, basisrp) + +MakeStatic(RORPIter) + + + +class IndexedTuple: + """Like a tuple, but has .index + + This is used by CollateIterator above, and can be passed to the + IterTreeReducer. + + """ + def __init__(self, index, sequence): + self.index = index + self.data = tuple(sequence) + + def __len__(self): return len(self.data) + + def __getitem__(self, key): + """This only works for numerical keys (faster that way)""" + return self.data[key] + + def __cmp__(self, other): + assert isinstance(other, IndexedTuple) + if self.index < other.index: return -1 + elif self.index == other.index: return 0 + else: return 1 + + def __eq__(self, other): + if isinstance(other, IndexedTuple): + return self.index == other.index and self.data == other.data + elif type(other) is types.TupleType: + return self.data == other + else: return None + + def __str__(self): + assert len(self.data) == 2 + return "(%s, %s).%s" % (str(self.data[0]), str(self.data[1]), + str(self.index)) diff --git a/rdiff-backup/rdiff_backup/rpath.py b/rdiff-backup/rdiff_backup/rpath.py new file mode 100644 index 0000000..4e6cc8f --- /dev/null +++ b/rdiff-backup/rdiff_backup/rpath.py @@ -0,0 +1,704 @@ +execfile("connection.py") +import os, stat, re, sys, shutil + +####################################################################### +# +# rpath - Wrapper class around a real path like "/usr/bin/env" +# +# The RPath and associated classes make some function calls more +# convenient (e.g. RPath.getperms()) and also make working with files +# on remote systems transparent. +# + +class RPathException(Exception): pass + +class RPathStatic: + """Contains static methods for use with RPaths""" + def copyfileobj(inputfp, outputfp): + """Copies file inputfp to outputfp in blocksize intervals""" + blocksize = Globals.blocksize + while 1: + inbuf = inputfp.read(blocksize) + if not inbuf: break + outputfp.write(inbuf) + + def cmpfileobj(fp1, fp2): + """True if file objects fp1 and fp2 contain same data""" + blocksize = Globals.blocksize + while 1: + buf1 = fp1.read(blocksize) + buf2 = fp2.read(blocksize) + if buf1 != buf2: return None + elif not buf1: return 1 + + def check_for_files(*rps): + """Make sure that all the rps exist, raise error if not""" + for rp in rps: + if not rp.lstat(): + raise RPathException("File %s does not exist" % rp.path) + + def move(rpin, rpout): + """Move rpin to rpout, renaming if possible""" + try: RPath.rename(rpin, rpout) + except os.error: + RPath.copy(rpin, rpout) + rpin.delete() + + def copy(rpin, rpout): + """Copy RPath rpin to rpout. Works for symlinks, dirs, etc.""" + Log("Regular copying %s to %s" % (rpin.index, rpout.path), 6) + if not rpin.lstat(): + raise RPathException, ("File %s does not exist" % rpin.index) + + if rpout.lstat(): + if rpin.isreg() or not RPath.cmp(rpin, rpout): + rpout.delete() # easier to write that compare + else: return + + if rpin.isreg(): RPath.copy_reg_file(rpin, rpout) + elif rpin.isdir(): rpout.mkdir() + elif rpin.issym(): rpout.symlink(rpin.readlink()) + elif rpin.ischardev(): + major, minor = rpin.getdevnums() + rpout.makedev("c", major, minor) + elif rpin.isblkdev(): + major, minor = rpin.getdevnums() + rpout.makedev("b", major, minor) + elif rpin.isfifo(): rpout.mkfifo() + elif rpin.issock(): Log("Found socket, ignoring", 1) + else: raise RPathException("File %s has unknown type" % rpin.path) + + def copy_reg_file(rpin, rpout): + """Copy regular file rpin to rpout, possibly avoiding connection""" + try: + if rpout.conn is rpin.conn: + rpout.conn.shutil.copyfile(rpin.path, rpout.path) + rpout.data = {'type': rpin.data['type']} + return + except AttributeError: pass + rpout.write_from_fileobj(rpin.open("rb")) + + def cmp(rpin, rpout): + """True if rpin has the same data as rpout + + cmp does not compare file ownership, permissions, or times, or + examine the contents of a directory. + + """ + RPath.check_for_files(rpin, rpout) + if rpin.isreg(): + if not rpout.isreg(): return None + fp1, fp2 = rpin.open("rb"), rpout.open("rb") + result = RPathStatic.cmpfileobj(fp1, fp2) + if fp1.close() or fp2.close(): + raise RPathException("Error closing file") + return result + elif rpin.isdir(): return rpout.isdir() + elif rpin.issym(): + return rpout.issym() and (rpin.readlink() == rpout.readlink()) + elif rpin.ischardev(): + return rpout.ischardev() and \ + (rpin.getdevnums() == rpout.getdevnums()) + elif rpin.isblkdev(): + return rpout.isblkdev() and \ + (rpin.getdevnums() == rpout.getdevnums()) + elif rpin.isfifo(): return rpout.isfifo() + elif rpin.issock(): return rpout.issock() + else: raise RPathException("File %s has unknown type" % rpin.path) + + def copy_attribs(rpin, rpout): + """Change file attributes of rpout to match rpin + + Only changes the chmoddable bits, uid/gid ownership, and + timestamps, so both must already exist. + + """ + Log("Copying attributes from %s to %s" % (rpin.index, rpout.path), 7) + RPath.check_for_files(rpin, rpout) + if rpin.issym(): return # symlinks have no valid attributes + if Globals.change_ownership: apply(rpout.chown, rpin.getuidgid()) + rpout.chmod(rpin.getperms()) + if not rpin.isdev(): rpout.setmtime(rpin.getmtime()) + + def cmp_attribs(rp1, rp2): + """True if rp1 has the same file attributes as rp2 + + Does not compare file access times. If not changing + ownership, do not check user/group id. + + """ + RPath.check_for_files(rp1, rp2) + if Globals.change_ownership and rp1.getuidgid() != rp2.getuidgid(): + result = None + elif rp1.getperms() != rp2.getperms(): result = None + elif rp1.issym() and rp2.issym(): # Don't check times for some types + result = 1 + elif rp1.isblkdev() and rp2.isblkdev(): result = 1 + elif rp1.ischardev() and rp2.ischardev(): result = 1 + else: result = (rp1.getmtime() == rp2.getmtime()) + Log("Compare attribs %s and %s: %s" % (rp1.path, rp2.path, result), 7) + return result + + def copy_with_attribs(rpin, rpout): + """Copy file and then copy over attributes""" + RPath.copy(rpin, rpout) + RPath.copy_attribs(rpin, rpout) + + def quick_cmp_with_attribs(rp1, rp2): + """Quicker version of cmp_with_attribs + + Instead of reading all of each file, assume that regular files + are the same if the attributes compare. + + """ + if not RPath.cmp_attribs(rp1, rp2): return None + if rp1.isreg() and rp2.isreg() and (rp1.getlen() == rp2.getlen()): + return 1 + return RPath.cmp(rp1, rp2) + + def cmp_with_attribs(rp1, rp2): + """Combine cmp and cmp_attribs""" + return RPath.cmp_attribs(rp1, rp2) and RPath.cmp(rp1, rp2) + + def rename(rp_source, rp_dest): + """Rename rp_source to rp_dest""" + assert rp_source.conn is rp_dest.conn + Log("Renaming %s to %s" % (rp_source.path, rp_dest.path), 7) + rp_source.conn.os.rename(rp_source.path, rp_dest.path) + rp_dest.data = rp_source.data + rp_source.data = {'type': None} + + def tupled_lstat(filename): + """Like os.lstat, but return only a tuple, or None if os.error + + Later versions of os.lstat return a special lstat object, + which can confuse the pickler and cause errors in remote + operations. + + """ + try: return tuple(os.lstat(filename)) + except os.error: return None + + def cmp_recursive(rp1, rp2): + """True if rp1 and rp2 are at the base of same directories + + Includes only attributes, no file data. This function may not + be used in rdiff-backup but it comes in handy in the unit + tests. + + """ + rp1.setdata() + rp2.setdata() + dsiter1, dsiter2 = map(DestructiveStepping.Iterate_with_Finalizer, + [rp1, rp2], [1, None]) + result = Iter.equal(dsiter1, dsiter2, 1) + for i in dsiter1: pass # make sure all files processed anyway + for i in dsiter2: pass + return result + +MakeStatic(RPathStatic) + + +class RORPath(RPathStatic): + """Read Only RPath - carry information about a path + + These contain information about a file, and possible the file's + data, but do not have a connection and cannot be written to or + changed. The advantage of these objects is that they can be + communicated by encoding their index and data dictionary. + + """ + def __init__(self, index, data = None): + self.index = index + if data: self.data = data + else: self.data = {'type':None} # signify empty file + self.file = None + + def __eq__(self, other): + """Signal two files equivalent""" + if not Globals.change_ownership or self.issym() and other.issym(): + # Don't take file ownership into account when comparing + data1, data2 = self.data.copy(), other.data.copy() + for d in (data1, data2): + for key in ('uid', 'gid'): + if d.has_key(key): del d[key] + return self.index == other.index and data1 == data2 + else: return self.index == other.index and self.data == other.data + + def __str__(self): + """Pretty print file statistics""" + return "Index: %s\nData: %s" % (self.index, self.data) + + def __getstate__(self): + """Return picklable state + + This is necessary in case the RORPath is carrying around a + file object, which can't/shouldn't be pickled. + + """ + return (self.index, self.data) + + def __setstate__(self, rorp_state): + """Reproduce RORPath from __getstate__ output""" + self.index, self.data = rorp_state + + def make_placeholder(self): + """Make rorp into a placeholder + + This object doesn't contain any information about the file, + but, when passed along, may show where the previous stages are + in their processing. It is the RORPath equivalent of fiber. + + """ + self.data = {'placeholder': + ("It is actually good for placeholders to use" + "up a bit of memory, so the buffers get flushed" + "more often when placeholders move through." + "See the get_dissimilar docs for more info.")} + + def isplaceholder(self): + """True if the object is a placeholder""" + return self.data.has_key('placeholder') + + def lstat(self): + """Returns type of file + + The allowable types are None if the file doesn't exist, 'reg' + for a regular file, 'dir' for a directory, 'dev' for a device + file, 'fifo' for a fifo, 'sock' for a socket, and 'sym' for a + symlink. + + """ + return self.data['type'] + gettype = lstat + + def isdir(self): + """True if self is a dir""" + return self.data['type'] == 'dir' + + def isreg(self): + """True if self is a regular file""" + return self.data['type'] == 'reg' + + def issym(self): + """True if path is of a symlink""" + return self.data['type'] == 'sym' + + def isfifo(self): + """True if path is a fifo""" + return self.data['type'] == 'fifo' + + def ischardev(self): + """True if path is a character device file""" + return self.data['type'] == 'dev' and self.data['devnums'][0] == 'c' + + def isblkdev(self): + """True if path is a block device file""" + return self.data['type'] == 'dev' and self.data['devnums'][0] == 'b' + + def isdev(self): + """True if path is a device file""" + return self.data['type'] == 'dev' + + def issock(self): + """True if path is a socket""" + return self.data['type'] == 'sock' + + def getperms(self): + """Return permission block of file""" + return self.data['perms'] + + def getsize(self): + """Return length of file in bytes""" + return self.data['size'] + + def getuidgid(self): + """Return userid/groupid of file""" + return self.data['uid'], self.data['gid'] + + def getatime(self): + """Return access time in seconds""" + return self.data['atime'] + + def getmtime(self): + """Return modification time in seconds""" + return self.data['mtime'] + + def readlink(self): + """Wrapper around os.readlink()""" + return self.data['linkname'] + + def getdevnums(self): + """Return a devices major/minor numbers from dictionary""" + return self.data['devnums'][1:] + + def setfile(self, file): + """Right now just set self.file to be the already opened file""" + assert file and not self.file + def closing_hook(): self.file_already_open = None + self.file = RPathFileHook(file, closing_hook) + self.file_already_open = None + + def get_attached_filetype(self): + """If there is a file attached, say what it is + + Currently the choices are 'snapshot' meaning an exact copy of + something, and 'diff' for an rdiff style diff. + + """ + return self.data['filetype'] + + def set_attached_filetype(self, type): + """Set the type of the attached file""" + self.data['filetype'] = type + + def open(self, mode): + """Return file type object if any was given using self.setfile""" + if mode != "rb": raise RPathException("Bad mode %s" % mode) + if self.file_already_open: + raise RPathException("Attempt to open same file twice") + self.file_already_open = 1 + return self.file + + def close_if_necessary(self): + """If file is present, discard data and close""" + if self.file: + while self.file.read(Globals.blocksize): pass + assert not self.file.close(), \ + "Error closing file\ndata = %s\nindex = %s\n" % (self.data, + self.index) + self.file_already_open = None + + +class RPath(RORPath): + """Remote Path class - wrapper around a possibly non-local pathname + + This class contains a dictionary called "data" which should + contain all the information about the file sufficient for + identification (i.e. if two files have the the same (==) data + dictionary, they are the same file). + + """ + regex_chars_to_quote = re.compile("[\\\\\\\"\\$`]") + + def __init__(self, connection, base, index = (), data = None): + """RPath constructor + + connection = self.conn is the Connection the RPath will use to + make system calls, and index is the name of the rpath used for + comparison, and should be a tuple consisting of the parts of + the rpath after the base split up. For instance ("foo", + "bar") for "foo/bar" (no base), and ("local", "bin") for + "/usr/local/bin" if the base is "/usr". + + """ + self.conn = connection + self.index = index + self.base = base + self.path = apply(os.path.join, (base,) + self.index) + self.file = None + if data: self.data = data + else: self.setdata() + + def __str__(self): + return "Path: %s\nIndex: %s\nData: %s" % (self.path, self.index, + self.data) + + def __getstate__(self): + """Return picklable state + + The connection must be local because we can't pickle a + connection. Data and any attached file also won't be saved. + + """ + assert self.conn is Globals.local_connection + return (self.index, self.base, self.data) + + def __setstate__(self, rpath_state): + """Reproduce RPath from __getstate__ output""" + self.index, self.base, self.data = rpath_state + + def setdata(self): + """Create the data dictionary""" + statblock = self.conn.RPathStatic.tupled_lstat(self.path) + if statblock is None: + self.data = {'type':None} + return + data = {} + mode = statblock[stat.ST_MODE] + + if stat.S_ISREG(mode): + type = 'reg' + data['size'] = statblock[stat.ST_SIZE] + elif stat.S_ISDIR(mode): type = 'dir' + elif stat.S_ISCHR(mode): + type = 'dev' + data['devnums'] = ('c',) + self._getdevnums() + elif stat.S_ISBLK(mode): + type = 'dev' + data['devnums'] = ('b',) + self._getdevnums() + elif stat.S_ISFIFO(mode): type = 'fifo' + elif stat.S_ISLNK(mode): + type = 'sym' + data['linkname'] = self.conn.os.readlink(self.path) + elif stat.S_ISSOCK(mode): type = 'sock' + else: raise RPathException("Unknown type for %s" % self.path) + data['type'] = type + data['perms'] = stat.S_IMODE(mode) + data['uid'] = statblock[stat.ST_UID] + data['gid'] = statblock[stat.ST_GID] + + if not (type == 'sym' or type == 'dev'): + # mtimes on symlinks and dev files don't work consistently + data['mtime'] = long(statblock[stat.ST_MTIME]) + + if Globals.preserve_atime and not type == 'sym': + data['atime'] = long(statblock[stat.ST_ATIME]) + self.data = data + + def check_consistency(self): + """Raise an error if consistency of rp broken + + This is useful for debugging when the cache and disk get out + of sync and you need to find out where it happened. + + """ + temptype = self.data['type'] + self.setdata() + assert temptype == self.data['type'], \ + "\nName: %s\nOld: %s --> New: %s\n" % \ + (self.path, temptype, self.data['type']) + + def _getdevnums(self): + """Return tuple for special file (major, minor)""" + assert self.conn is Globals.local_connection + if Globals.exclude_device_files: + # No point in finding numbers because it will be excluded anyway + return () + s = os.lstat(self.path).st_rdev + return (s >> 8, s & 0xff) + + def chmod(self, permissions): + """Wrapper around os.chmod""" + self.conn.os.chmod(self.path, permissions) + self.data['perms'] = permissions + + def settime(self, accesstime, modtime): + """Change file modification times""" + Log("Setting time of %s to %d" % (self.path, modtime), 7) + self.conn.os.utime(self.path, (accesstime, modtime)) + self.data['atime'] = accesstime + self.data['mtime'] = modtime + + def setmtime(self, modtime): + """Set only modtime (access time to present)""" + Log("Setting time of %s to %d" % (self.path, modtime), 7) + self.conn.os.utime(self.path, (time.time(), modtime)) + self.data['mtime'] = modtime + + def chown(self, uid, gid): + """Set file's uid and gid""" + self.conn.os.chown(self.path, uid, gid) + self.data['uid'] = uid + self.data['gid'] = gid + + def mkdir(self): + Log("Making directory " + self.path, 6) + self.conn.os.mkdir(self.path) + self.setdata() + + def rmdir(self): + Log("Removing directory " + self.path, 6) + self.conn.os.rmdir(self.path) + self.data = {'type': None} + + def listdir(self): + """Return list of string paths returned by os.listdir""" + return self.conn.os.listdir(self.path) + + def symlink(self, linktext): + """Make symlink at self.path pointing to linktext""" + self.conn.os.symlink(linktext, self.path) + self.setdata() + assert self.issym() + + def mkfifo(self): + """Make a fifo at self.path""" + self.conn.os.mkfifo(self.path) + self.setdata() + assert self.isfifo() + + def touch(self): + """Make sure file at self.path exists""" + Log("Touching " + self.path, 7) + self.conn.open(self.path, "w").close() + self.setdata() + assert self.isreg() + + def hasfullperms(self): + """Return true if current process has full permissions on the file""" + if self.isowner(): return self.getperms() % 01000 >= 0700 + elif self.isgroup(): return self.getperms() % 0100 >= 070 + else: return self.getperms() % 010 >= 07 + + def readable(self): + """Return true if current process has read permissions on the file""" + if self.isowner(): return self.getperms() % 01000 >= 0400 + elif self.isgroup(): return self.getperms() % 0100 >= 040 + else: return self.getperms() % 010 >= 04 + + def executable(self): + """Return true if current process has execute permissions""" + if self.isowner(): return self.getperms() % 0200 >= 0100 + elif self.isgroup(): return self.getperms() % 020 >= 010 + else: return self.getperms() % 02 >= 01 + + def isowner(self): + """Return true if current process is owner of rp or root""" + uid = self.conn.Globals.get('process_uid') + return uid == 0 or uid == self.data['uid'] + + def isgroup(self): + """Return true if current process is in group of rp""" + return self.conn.Globals.get('process_gid') == self.data['gid'] + + def delete(self): + """Delete file at self.path + + The destructive stepping allows this function to delete + directories even if they have files and we lack permissions. + + """ + Log("Deleting %s" % self.path, 7) + self.setdata() + if not self.lstat(): return # must have been deleted in meantime + elif self.isdir(): + def helper(dsrp, base_init_output, branch_reduction): + if dsrp.isdir(): dsrp.rmdir() + else: dsrp.delete() + dsiter = DestructiveStepping.Iterate_from(self, None) + itm = IterTreeReducer(lambda x: None, lambda x,y: None, None, + helper) + for dsrp in dsiter: itm(dsrp) + itm.getresult() + else: self.conn.os.unlink(self.path) + self.setdata() + + def quote(self): + """Return quoted self.path for use with os.system()""" + return '"%s"' % self.regex_chars_to_quote.sub( + lambda m: "\\"+m.group(0), self.path) + + def normalize(self): + """Return RPath canonical version of self.path + + This just means that redundant /'s will be removed, including + the trailing one, even for directories. ".." components will + be retained. + + """ + newpath = "/".join(filter(lambda x: x and x != ".", + self.path.split("/"))) + if self.path[0] == "/": newpath = "/" + newpath + elif not newpath: newpath = "." + return self.__class__(self.conn, newpath, ()) + + def dirsplit(self): + """Returns a tuple of strings (dirname, basename) + + Basename is never '' unless self is root, so it is unlike + os.path.basename. If path is just above root (so dirname is + root), then dirname is ''. In all other cases dirname is not + the empty string. Also, dirsplit depends on the format of + self, so basename could be ".." and dirname could be a + subdirectory. For an atomic relative path, dirname will be + '.'. + + """ + normed = self.normalize() + if normed.path.find("/") == -1: return (".", normed.path) + comps = normed.path.split("/") + return "/".join(comps[:-1]), comps[-1] + + def append(self, ext): + """Return new RPath with same connection by adjoing ext""" + return self.__class__(self.conn, self.base, self.index + (ext,)) + + def new_index(self, index): + """Return similar RPath but with new index""" + return self.__class__(self.conn, self.base, index) + + def open(self, mode): + """Return open file. Supports modes "w" and "r".""" + return self.conn.open(self.path, mode) + + def write_from_fileobj(self, fp): + """Reads fp and writes to self.path. Closes both when done""" + Log("Writing file object to " + self.path, 7) + assert not self.lstat(), "File %s already exists" % self.path + outfp = self.open("wb") + RPath.copyfileobj(fp, outfp) + if fp.close() or outfp.close(): + raise RPathException("Error closing file") + self.setdata() + + def isincfile(self): + """Return true if path looks like an increment file""" + dotsplit = self.path.split(".") + if len(dotsplit) < 3: return None + timestring, ext = dotsplit[-2:] + if Time.stringtotime(timestring) is None: return None + return (ext == "snapshot" or ext == "dir" or + ext == "missing" or ext == "diff") + + def getinctype(self): + """Return type of an increment file""" + return self.path.split(".")[-1] + + def getinctime(self): + """Return timestring of an increment file""" + return self.path.split(".")[-2] + + def getincbase(self): + """Return the base filename of an increment file in rp form""" + if self.index: + return self.__class__(self.conn, self.base, self.index[:-1] + + ((".".join(self.index[-1].split(".")[:-2])),)) + else: return self.__class__(self.conn, + ".".join(self.base.split(".")[:-2]), ()) + + def getincbase_str(self): + """Return the base filename string of an increment file""" + return self.getincbase().dirsplit()[1] + + def makedev(self, type, major, minor): + """Make a special file with specified type, and major/minor nums""" + cmdlist = ['mknod', self.path, type, str(major), str(minor)] + if self.conn.os.spawnvp(os.P_WAIT, 'mknod', cmdlist) != 0: + RPathException("Error running %s" % cmdlist) + if type == 'c': datatype = 'chr' + elif type == 'b': datatype = 'blk' + else: raise RPathException + self.data = {'type': datatype, 'devnums': (type, major, minor)} + + def getRORPath(self, include_contents = None): + """Return read only version of self""" + rorp = RORPath(self.index, self.data) + if include_contents: rorp.setfile(self.open("rb")) + return rorp + + +class RPathFileHook: + """Look like a file, but add closing hook""" + def __init__(self, file, closing_thunk): + self.file = file + self.closing_thunk = closing_thunk + + def read(self, length = -1): return self.file.read(length) + def write(self, buf): return self.file.write(buf) + + def close(self): + """Close file and then run closing thunk""" + result = self.file.close() + self.closing_thunk() + return result diff --git a/rdiff-backup/rdiff_backup/static.py b/rdiff-backup/rdiff_backup/static.py new file mode 100644 index 0000000..2e97cd0 --- /dev/null +++ b/rdiff-backup/rdiff_backup/static.py @@ -0,0 +1,30 @@ +execfile("globals.py") + +####################################################################### +# +# static - MakeStatic and MakeClass +# +# These functions are used to make all the instance methods in a class +# into static or class methods. +# + +class StaticMethodsError(Exception): + pass + +def MakeStatic(cls): + """turn instance methods into static ones + + The methods (that don't begin with _) of any class that + subclasses this will be turned into static methods. + + """ + for name in dir(cls): + if name[0] != "_": + cls.__dict__[name] = staticmethod(cls.__dict__[name]) + + +def MakeClass(cls): + """Turn instance methods into classmethods. Ignore _ like above""" + for name in dir(cls): + if name[0] != "_": + cls.__dict__[name] = classmethod(cls.__dict__[name]) diff --git a/rdiff-backup/src/Make b/rdiff-backup/src/Make new file mode 100755 index 0000000..cadf9ea --- /dev/null +++ b/rdiff-backup/src/Make @@ -0,0 +1,37 @@ +#!/usr/bin/env python + +"""Read component files of rdiff-backup, and glue them together after +removing unnecessary bits.""" + +import os + +def mystrip(filename): + """Open filename, read input, strip appropriately, and return contents""" + fp = open(filename, "r") + lines = fp.readlines() + fp.close() + + i = 0 + while(lines[i][:60] != + "############################################################"): + i = i+1 + + return "".join(lines[i:]).strip() + "\n\n\n" + + + +files = ["globals.py", "static.py", "lazy.py", "log.py", "ttime.py", + "iterfile.py", "rlist.py", "rdiff.py", "connection.py", + "rpath.py", "robust.py", "rorpiter.py", + "destructive_stepping.py", "increment.py", "restore.py", + "manage.py", "filelist.py", "highlevel.py", + "setconnections.py", "main.py"] + +os.system("cp header.py rdiff-backup") + +outfp = open("rdiff-backup", "a") +for file in files: + outfp.write(mystrip(file)) +outfp.close() + +os.system("chmod 755 rdiff-backup") diff --git a/rdiff-backup/src/connection.py b/rdiff-backup/src/connection.py new file mode 100644 index 0000000..83fc874 --- /dev/null +++ b/rdiff-backup/src/connection.py @@ -0,0 +1,467 @@ +execfile("rdiff.py") +import types, os, tempfile, cPickle, shutil, traceback + +####################################################################### +# +# connection - Code that deals with remote execution +# + +class ConnectionError(Exception): + pass + +class ConnectionQuit(Exception): + pass + + +class Connection: + """Connection class - represent remote execution + + The idea is that, if c is an instance of this class, c.foo will + return the object on the remote side. For functions, c.foo will + return a function that, when called, executes foo on the remote + side, sending over the arguments and sending back the result. + + """ + def __repr__(self): return self.__str__() + + +class LocalConnection(Connection): + """Local connection + + This is a dummy connection class, so that LC.foo just evaluates to + foo using global scope. + + """ + def __init__(self): + """This prevents two instances of LocalConnection""" + assert not Globals.local_connection + self.conn_number = 0 # changed by SetConnections for server + + def __getattr__(self, name): + try: return globals()[name] + except KeyError: + builtins = globals()["__builtins__"] + try: + if type(builtins) is types.ModuleType: + return builtins.__dict__[name] + else: return builtins[name] + except KeyError: raise NameError, name + + def __setattr__(self, name, value): + globals()[name] = value + + def __delattr__(self, name): + del globals()[name] + + def __str__(self): return "LocalConnection" + + def reval(self, function_string, *args): + return apply(eval(function_string), args) + + def quit(self): pass + +Globals.local_connection = LocalConnection() +Globals.connections.append(Globals.local_connection) +# Following changed by server in SetConnections +Globals.connection_dict[0] = Globals.local_connection + + +class ConnectionRequest: + """Simple wrapper around a PipeConnection request""" + def __init__(self, function_string, num_args): + self.function_string = function_string + self.num_args = num_args + + def __str__(self): + return "ConnectionRequest: %s with %d arguments" % \ + (self.function_string, self.num_args) + + +class LowLevelPipeConnection(Connection): + """Routines for just sending objects from one side of pipe to another + + Each thing sent down the pipe is paired with a request number, + currently limited to be between 0 and 255. The size of each thing + should be less than 2^56. + + Each thing also has a type, indicated by one of the following + characters: + + o - generic object + i - iterator/generator of RORPs + f - file object + b - string + q - quit signal + t - TempFile + R - RPath + r - RORPath only + c - PipeConnection object + + """ + def __init__(self, inpipe, outpipe): + """inpipe is a file-type open for reading, outpipe for writing""" + self.inpipe = inpipe + self.outpipe = outpipe + + def __str__(self): + """Return string version + + This is actually an important function, because otherwise + requests to represent this object would result in "__str__" + being executed on the other side of the connection. + + """ + return "LowLevelPipeConnection" + + def _put(self, obj, req_num): + """Put an object into the pipe (will send raw if string)""" + Log.conn("sending", obj, req_num) + if type(obj) is types.StringType: self._putbuf(obj, req_num) + elif isinstance(obj, Connection): self._putconn(obj, req_num) + elif isinstance(obj, TempFile): self._puttempfile(obj, req_num) + elif isinstance(obj, RPath): self._putrpath(obj, req_num) + elif isinstance(obj, RORPath): self._putrorpath(obj, req_num) + elif ((hasattr(obj, "read") or hasattr(obj, "write")) + and hasattr(obj, "close")): self._putfile(obj, req_num) + elif hasattr(obj, "next"): self._putiter(obj, req_num) + else: self._putobj(obj, req_num) + + def _putobj(self, obj, req_num): + """Send a generic python obj down the outpipe""" + self._write("o", cPickle.dumps(obj, 1), req_num) + + def _putbuf(self, buf, req_num): + """Send buffer buf down the outpipe""" + self._write("b", buf, req_num) + + def _putfile(self, fp, req_num): + """Send a file to the client using virtual files""" + self._write("f", str(VirtualFile.new(fp)), req_num) + + def _putiter(self, iterator, req_num): + """Put an iterator through the pipe""" + self._write("i", str(VirtualFile.new(RORPIter.ToFile(iterator))), + req_num) + + def _puttempfile(self, tempfile, req_num): + """Put a tempfile into pipe. See _putrpath""" + tf_repr = (tempfile.conn.conn_number, tempfile.base, + tempfile.index, tempfile.data) + self._write("t", cPickle.dumps(tf_repr, 1), req_num) + + def _putrpath(self, rpath, req_num): + """Put an rpath into the pipe + + The rpath's connection will be encoded as its conn_number. It + and the other information is put in a tuple. + + """ + rpath_repr = (rpath.conn.conn_number, rpath.base, + rpath.index, rpath.data) + self._write("R", cPickle.dumps(rpath_repr, 1), req_num) + + def _putrorpath(self, rorpath, req_num): + """Put an rorpath into the pipe + + This is only necessary because if there is a .file attached, + it must be excluded from the pickling + + """ + rorpath_repr = (rorpath.index, rorpath.data) + self._write("r", cPickle.dumps(rorpath_repr, 1), req_num) + + def _putconn(self, pipeconn, req_num): + """Put a connection into the pipe + + A pipe connection is represented just as the integer (in + string form) of its connection number it is *connected to*. + + """ + self._write("c", str(pipeconn.conn_number), req_num) + + def _putquit(self): + """Send a string that takes down server""" + self._write("q", "", 255) + + def _write(self, headerchar, data, req_num): + """Write header and then data to the pipe""" + self.outpipe.write(headerchar + chr(req_num) + self._l2s(len(data))) + self.outpipe.write(data) + self.outpipe.flush() + + def _read(self, length): + """Read length bytes from inpipe, returning result""" + return self.inpipe.read(length) + + def _s2l(self, s): + """Convert string to long int""" + assert len(s) == 7 + l = 0L + for i in range(7): l = l*256 + ord(s[i]) + return l + + def _l2s(self, l): + """Convert long int to string""" + s = "" + for i in range(7): + l, remainder = divmod(l, 256) + s = chr(remainder) + s + assert remainder == 0 + return s + + def _get(self): + """Read an object from the pipe and return (req_num, value)""" + header_string = self.inpipe.read(9) + assert len(header_string) == 9, \ + "Error reading from pipe (problem probably originated remotely)" + try: + format_string, req_num, length = (header_string[0], + ord(header_string[1]), + self._s2l(header_string[2:])) + except IndexError: raise ConnectionError() + if format_string == "o": result = cPickle.loads(self._read(length)) + elif format_string == "b": result = self._read(length) + elif format_string == "f": + result = VirtualFile(self, int(self._read(length))) + elif format_string == "i": + result = RORPIter.FromFile(BufferedRead( + VirtualFile(self, int(self._read(length))))) + elif format_string == "t": + result = self._gettempfile(self._read(length)) + elif format_string == "r": + result = self._getrorpath(self._read(length)) + elif format_string == "R": result = self._getrpath(self._read(length)) + elif format_string == "c": + result = Globals.connection_dict[int(self._read(length))] + else: + assert format_string == "q", header_string + raise ConnectionQuit("Received quit signal") + Log.conn("received", result, req_num) + return (req_num, result) + + def _getrorpath(self, raw_rorpath_buf): + """Reconstruct RORPath object from raw data""" + index, data = cPickle.loads(raw_rorpath_buf) + return RORPath(index, data) + + def _gettempfile(self, raw_tf_buf): + """Return TempFile object indicated by raw_tf_buf""" + conn_number, base, index, data = cPickle.loads(raw_tf_buf) + return TempFile(Globals.connection_dict[conn_number], + base, index, data) + + def _getrpath(self, raw_rpath_buf): + """Return RPath object indicated by raw_rpath_buf""" + conn_number, base, index, data = cPickle.loads(raw_rpath_buf) + return RPath(Globals.connection_dict[conn_number], base, index, data) + + def _close(self): + """Close the pipes associated with the connection""" + self.outpipe.close() + self.inpipe.close() + + +class PipeConnection(LowLevelPipeConnection): + """Provide server and client functions for a Pipe Connection + + Both sides act as modules that allows for remote execution. For + instance, self.conn.pow(2,8) will execute the operation on the + server side. + + The only difference between the client and server is that the + client makes the first request, and the server listens first. + + """ + def __init__(self, inpipe, outpipe, conn_number = 0): + """Init PipeConnection + + conn_number should be a unique (to the session) integer to + identify the connection. For instance, all connections to the + client have conn_number 0. Other connections can use this + number to route commands to the correct process. + + """ + LowLevelPipeConnection.__init__(self, inpipe, outpipe) + self.conn_number = conn_number + self.unused_request_numbers = {} + for i in range(256): self.unused_request_numbers[i] = None + + def __str__(self): return "PipeConnection %d" % self.conn_number + + def get_response(self, desired_req_num): + """Read from pipe, responding to requests until req_num. + + Sometimes after a request is sent, the other side will make + another request before responding to the original one. In + that case, respond to the request. But return once the right + response is given. + + """ + while 1: + try: req_num, object = self._get() + except ConnectionQuit: + self._put("quitting", self.get_new_req_num()) + return + if req_num == desired_req_num: return object + else: + assert isinstance(object, ConnectionRequest) + self.answer_request(object, req_num) + + def answer_request(self, request, req_num): + """Put the object requested by request down the pipe""" + del self.unused_request_numbers[req_num] + argument_list = [] + for i in range(request.num_args): + arg_req_num, arg = self._get() + assert arg_req_num == req_num + argument_list.append(arg) + try: result = apply(eval(request.function_string), argument_list) + except: result = self.extract_exception() + self._put(result, req_num) + self.unused_request_numbers[req_num] = None + + def extract_exception(self): + """Return active exception""" + Log("Sending back exception: \n" + + "".join(traceback.format_tb(sys.exc_info()[2])), 2) + return sys.exc_info()[1] + + def Server(self): + """Start server's read eval return loop""" + Globals.server = 1 + Globals.connections.append(self) + Log("Starting server", 6) + self.get_response(-1) + + def reval(self, function_string, *args): + """Execute command on remote side + + The first argument should be a string that evaluates to a + function, like "pow", and the remaining are arguments to that + function. + + """ + req_num = self.get_new_req_num() + self._put(ConnectionRequest(function_string, len(args)), req_num) + for arg in args: self._put(arg, req_num) + result = self.get_response(req_num) + self.unused_request_numbers[req_num] = None + if isinstance(result, Exception): raise result + else: return result + + def get_new_req_num(self): + """Allot a new request number and return it""" + if not self.unused_request_numbers: + raise ConnectionError("Exhaused possible connection numbers") + req_num = self.unused_request_numbers.keys()[0] + del self.unused_request_numbers[req_num] + return req_num + + def quit(self): + """Close the associated pipes and tell server side to quit""" + assert not Globals.server + self._putquit() + self._get() + self._close() + + def __getattr__(self, name): + """Intercept attributes to allow for . invocation""" + return EmulateCallable(self, name) + + +class RedirectedConnection(Connection): + """Represent a connection more than one move away + + For instance, suppose things are connected like this: S1---C---S2. + If Server1 wants something done by Server2, it will have to go + through the Client. So on S1's side, S2 will be represented by a + RedirectedConnection. + + """ + def __init__(self, conn_number, routing_number = 0): + """RedirectedConnection initializer + + Returns a RedirectedConnection object for the given + conn_number, where commands are routed through the connection + with the given routing_number. 0 is the client, so the + default shouldn't have to be changed. + + """ + self.conn_number = conn_number + self.routing_number = routing_number + self.routing_conn = Globals.connection_dict[routing_number] + + def __str__(self): + return "RedirectedConnection %d,%d" % (self.conn_number, + self.routing_number) + + def __getattr__(self, name): + return EmulateCallable(self.routing_conn, + "Globals.get_dict_val('connection_dict', %d).%s" + % (self.conn_number, name)) + + +class EmulateCallable: + """This is used by PipeConnection in calls like conn.os.chmod(foo)""" + def __init__(self, connection, name): + self.connection = connection + self.name = name + def __call__(self, *args): + return apply(self.connection.reval, (self.name,) + args) + def __getattr__(self, attr_name): + return EmulateCallable(self.connection, + "%s.%s" % (self.name, attr_name)) + + +class VirtualFile: + """When the client asks for a file over the connection, it gets this + + The returned instance then forwards requests over the connection. + The class's dictionary is used by the server to associate each + with a unique file number. + + """ + #### The following are used by the server + vfiles = {} + counter = 0 + + def getbyid(cls, id): + return cls.vfiles[id] + getbyid = classmethod(getbyid) + + def readfromid(cls, id, length): + return cls.vfiles[id].read(length) + readfromid = classmethod(readfromid) + + def writetoid(cls, id, buffer): + return cls.vfiles[id].write(buffer) + writetoid = classmethod(writetoid) + + def closebyid(cls, id): + fp = cls.vfiles[id] + del cls.vfiles[id] + return fp.close() + closebyid = classmethod(closebyid) + + def new(cls, fileobj): + """Associate a new VirtualFile with a read fileobject, return id""" + count = cls.counter + cls.vfiles[count] = fileobj + cls.counter = count + 1 + return count + new = classmethod(new) + + + #### And these are used by the client + def __init__(self, connection, id): + self.connection = connection + self.id = id + + def read(self, length = -1): + return self.connection.VirtualFile.readfromid(self.id, length) + + def write(self, buf): + return self.connection.VirtualFile.writetoid(self.id, buf) + + def close(self): + return self.connection.VirtualFile.closebyid(self.id) diff --git a/rdiff-backup/src/destructive_stepping.py b/rdiff-backup/src/destructive_stepping.py new file mode 100644 index 0000000..80d274e --- /dev/null +++ b/rdiff-backup/src/destructive_stepping.py @@ -0,0 +1,250 @@ +from __future__ import generators +execfile("rorpiter.py") + +####################################################################### +# +# destructive-stepping - Deal with side effects from traversing trees +# + +class DSRPath(RPath): + """Destructive Stepping RPath + + Sometimes when we traverse the directory tree, even when we just + want to read files, we have to change things, like the permissions + of a file or directory in order to read it, or the file's access + times. This class is like an RPath, but the permission and time + modifications are delayed, so that they can be done at the very + end when they won't be disturbed later. + + """ + def __init__(self, *args): + self.perms_delayed = self.times_delayed = None + RPath.__init__(self, *args) + + def __getstate__(self): + """Return picklable state. See RPath __getstate__.""" + assert self.conn is Globals.local_connection # Can't pickle a conn + pickle_dict = {} + for attrib in ['index', 'data', 'perms_delayed', 'times_delayed', + 'newperms', 'newtimes', 'path', 'base']: + if self.__dict__.has_key(attrib): + pickle_dict[attrib] = self.__dict__[attrib] + return pickle_dict + + def __setstate__(self, pickle_dict): + """Set state from object produced by getstate""" + self.conn = Globals.local_connection + for attrib in pickle_dict.keys(): + self.__dict__[attrib] = pickle_dict[attrib] + + def delay_perm_writes(self): + """Signal that permission writing should be delayed until the end""" + self.perms_delayed = 1 + self.newperms = None + + def delay_time_changes(self): + """Signal that time changes should also be delayed until the end""" + self.times_delayed = 1 + self.newtimes = None + + def chmod(self, permissions): + """Change permissions, delaying if self.perms_delayed is set""" + if self.perms_delayed: + self.newperms = 1 + self.data['perms'] = permissions + else: RPath.chmod(self, permissions) + + def chmod_bypass(self, permissions): + """Change permissions without updating the data dictionary""" + self.conn.os.chmod(self.path, permissions) + self.perms_delayed = self.newperms = 1 + + def remember_times(self): + """Mark times as changed so they can be restored later""" + self.times_delayed = self.newtimes = 1 + + def settime(self, accesstime, modtime): + """Change times, delaying if self.times_delayed is set""" + if self.times_delayed: + self.newtimes = 1 + self.data['atime'] = accesstime + self.data['mtime'] = modtime + else: RPath.settime(self, accesstime, modtime) + + def settime_bypass(self, accesstime, modtime): + """Change times without updating data dictionary""" + self.conn.os.utime(self.path, (accesstime, modtime)) + + def setmtime(self, modtime): + """Change mtime, delaying if self.times_delayed is set""" + if self.times_delayed: + self.newtimes = 1 + self.data['mtime'] = modtime + else: RPath.setmtime(self, modtime) + + def setmtime_bypass(self, modtime): + """Change mtime without updating data dictionary""" + self.conn.os.utime(self.path, (time.time(), modtime)) + + def restoretimes(self): + """Write times in self.data back to file""" + RPath.settime(self, self.data['atime'], self.data['mtime']) + + def restoreperms(self): + """Write permissions in self.data back to file""" + RPath.chmod(self, self.data['perms']) + + def write_changes(self): + """Write saved up permission/time changes""" + if not self.lstat(): return # File has been deleted in meantime + + if self.perms_delayed and self.newperms: + self.conn.os.chmod(self.path, self.getperms()) + if self.times_delayed: + if self.data.has_key('atime'): + self.settime_bypass(self.getatime(), self.getmtime()) + elif self.newtimes and self.data.has_key('mtime'): + self.setmtime_bypass(self.getmtime()) + + +class DestructiveStepping: + """Destructive stepping""" + def initialize(dsrpath, source): + """Change permissions of dsrpath, possibly delay writes + + Abort if we need to access something and can't. If the file + is on the source partition, just log warning and return true. + Return false if everything good to go. + + """ + if not source or Globals.change_source_perms: + dsrpath.delay_perm_writes() + + def warn(err): + Log("Received error '%s' when dealing with file %s, skipping..." + % (err, dsrpath.path), 1) + + def abort(): + Log.FatalError("Missing access to file %s - aborting." % + dsrpath.path) + + def try_chmod(perms): + """Try to change the perms. If fail, return error.""" + try: dsrpath.chmod_bypass(perms) + except os.error, err: return err + return None + + if dsrpath.isreg() and not dsrpath.readable(): + if source: + if Globals.change_source_perms and dsrpath.isowner(): + err = try_chmod(0400) + if err: + warn(err) + return 1 + else: + warn("No read permissions") + return 1 + elif not Globals.change_mirror_perms or try_chmod(0600): abort() + elif dsrpath.isdir(): + if source and (not dsrpath.readable() or not dsrpath.executable()): + if Globals.change_source_perms and dsrpath.isowner(): + err = try_chmod(0500) + if err: + warn(err) + return 1 + else: + warn("No read or exec permissions") + return 1 + elif not source and not dsrpath.hasfullperms(): + if Globals.change_mirror_perms: try_chmod(0700) + + # Permissions above; now try to preserve access times if necessary + if (source and (Globals.preserve_atime or + Globals.change_source_perms) or + not source): + # These are the circumstances under which we will have to + # touch up a file's times after we are done with it + dsrpath.remember_times() + return None + + def Finalizer(initial_state = None): + """Return a finalizer that can work on an iterator of dsrpaths + + The reason we have to use an IterTreeReducer is that some files + should be updated immediately, but for directories we sometimes + need to update all the files in the directory before finally + coming back to it. + + """ + return IterTreeReducer(lambda x: None, lambda x,y: None, None, + lambda dsrpath, x, y: dsrpath.write_changes(), + initial_state) + + def isexcluded(dsrp, source): + """Return true if given DSRPath is excluded/ignored + + If source = 1, treat as source file, otherwise treat as + destination file. + + """ + if Globals.exclude_device_files and dsrp.isdev(): return 1 + + if source: exclude_regexps = Globals.exclude_regexps + else: exclude_regexps = Globals.exclude_mirror_regexps + + for regexp in exclude_regexps: + if regexp.match(dsrp.path): + Log("Excluding %s" % dsrp.path, 6) + return 1 + return None + + def Iterate_from(baserp, source, starting_index = None): + """Iterate dsrps from baserp, skipping any matching exclude_regexps + + includes only dsrps with indicies greater than starting_index + if starting_index is not None. + + """ + def helper_starting_from(dsrpath): + """Like helper, but only start iterating after starting_index""" + if dsrpath.index > starting_index: + # Past starting_index, revert to normal helper + for dsrp in helper(dsrpath): yield dsrp + elif dsrpath.index == starting_index[:len(dsrpath.index)]: + # May encounter starting index on this branch + if (not DestructiveStepping.isexcluded(dsrpath, source) and + not DestructiveStepping.initialize(dsrpath, source)): + if dsrpath.isdir(): + dir_listing = dsrpath.listdir() + dir_listing.sort() + for filename in dir_listing: + for dsrp in helper_starting_from( + dsrpath.append(filename)): + yield dsrp + + def helper(dsrpath): + if (not DestructiveStepping.isexcluded(dsrpath, source) and + not DestructiveStepping.initialize(dsrpath, source)): + yield dsrpath + if dsrpath.isdir(): + dir_listing = dsrpath.listdir() + dir_listing.sort() + for filename in dir_listing: + for dsrp in helper(dsrpath.append(filename)): + yield dsrp + + base_dsrpath = DSRPath(baserp.conn, baserp.base, + baserp.index, baserp.data) + if starting_index is None: return helper(base_dsrpath) + else: return helper_starting_from(base_dsrpath) + + def Iterate_with_Finalizer(baserp, source): + """Like Iterate_from, but finalize each dsrp afterwards""" + finalize = DestructiveStepping.Finalizer() + for dsrp in DestructiveStepping.Iterate_from(baserp, source): + yield dsrp + finalize(dsrp) + finalize.getresult() + + +MakeStatic(DestructiveStepping) diff --git a/rdiff-backup/src/filelist.py b/rdiff-backup/src/filelist.py new file mode 100644 index 0000000..7a660c3 --- /dev/null +++ b/rdiff-backup/src/filelist.py @@ -0,0 +1,106 @@ +from __future__ import generators +execfile("manage.py") + +####################################################################### +# +# filelist - Some routines that help with operations over files listed +# in standard input instead of over whole directories. +# + +class FilelistError(Exception): pass + +class Filelist: + """Many of these methods have analogs in highlevel.py""" + def File2Iter(fp, baserp): + """Convert file obj with one pathname per line into rpiter + + Closes fp when done. Given files are added to baserp. + + """ + while 1: + line = fp.readline() + if not line: break + if line[-1] == "\n": line = line[:-1] # strip trailing newline + if not line: continue # skip blank lines + elif line[0] == "/": raise FilelistError( + "Read in absolute file name %s." % line) + yield baserp.append(line) + assert not fp.close(), "Error closing filelist fp" + + def Mirror(src_rpath, dest_rpath, rpiter): + """Copy files in fileiter from src_rpath to dest_rpath""" + sigiter = dest_rpath.conn.Filelist.get_sigs(dest_rpath, rpiter) + diffiter = Filelist.get_diffs(src_rpath, sigiter) + dest_rpath.conn.Filelist.patch(dest_rpath, diffiter) + dest_rpath.setdata() + + def Mirror_and_increment(src_rpath, dest_rpath, inc_rpath): + """Mirror + put increment in tree based at inc_rpath""" + sigiter = dest_rpath.conn.Filelist.get_sigs(dest_rpath, rpiter) + diffiter = Filelist.get_diffs(src_rpath, sigiter) + dest_rpath.conn.Filelist.patch_and_increment(dest_rpath, diffiter, + inc_rpath) + dest_rpath.setdata() + + def get_sigs(dest_rpbase, rpiter): + """Get signatures of file analogs in rpiter + + This is meant to be run on the destination side. Only the + extention part of the rps in rpiter will be used; the base is + ignored. + + """ + def dest_iter(src_iter): + for src_rp in src_iter: yield dest_rpbase.new_index(src_rp.index) + return RORPIter.Signatures(dest_iter()) + + def get_diffs(src_rpbase, sigiter): + """Get diffs based on sigiter and files in src_rpbase + + This should be run on the local side. + + """ + for sig_rorp in sigiter: + new_rp = src_rpbase.new_index(sig_rorp.index) + yield RORPIter.diffonce(sig_rorp, new_rp) + + def patch(dest_rpbase, diffiter): + """Process diffs in diffiter and update files in dest_rbpase. + + Run remotely. + + """ + for diff_rorp in diffiter: + basisrp = dest_rpbase.new_index(diff_rorp.index) + if basisrp.lstat(): Filelist.make_subdirs(basisrp) + Log("Processing %s" % basisrp.path, 7) + RORPIter.patchonce(dest_rpbase, basisrp, diff_rorp) + + def patch_and_increment(dest_rpbase, diffiter, inc_rpbase): + """Apply diffs in diffiter to dest_rpbase, and increment to inc_rpbase + + Also to be run remotely. + + """ + for diff_rorp in diffiter: + basisrp = dest_rpbase.new_index(diff_rorp.index) + if diff_rorp.lstat(): Filelist.make_subdirs(basisrp) + Log("Processing %s" % basisrp.path, 7) + # XXX This isn't done yet... + + def make_subdirs(rpath): + """Make sure that all the directories under the rpath exist + + This function doesn't try to get the permissions right on the + underlying directories, just do the minimum to make sure the + file can be created. + + """ + dirname = rpath.dirsplit()[0] + if dirname == '.' or dirname == '': return + dir_rp = RPath(rpath.conn, dirname) + Filelist.make_subdirs(dir_rp) + if not dir_rp.lstat(): dir_rp.mkdir() + + +MakeStatic(Filelist) diff --git a/rdiff-backup/src/globals.py b/rdiff-backup/src/globals.py new file mode 100644 index 0000000..d9cd64a --- /dev/null +++ b/rdiff-backup/src/globals.py @@ -0,0 +1,172 @@ +import re, os + +####################################################################### +# +# globals - aggregate some configuration options +# + +class Globals: + + # The current version of rdiff-backup + version = "0.6.0" + + # This determines how many bytes to read at a time when copying + blocksize = 32768 + + # This is used by the BufferedRead class to determine how many + # bytes to request from the underlying file per read(). Larger + # values may save on connection overhead and latency. + conn_bufsize = 4096 + + # True if script is running as a server + server = None + + # uid and gid of the owner of the rdiff-backup process. This can + # vary depending on the connection. + process_uid = os.getuid() + process_gid = os.getgid() + + # If true, when copying attributes, also change target's uid/gid + change_ownership = None + + # If true, change the permissions of unwriteable mirror files + # (such as directories) so that they can be written, and then + # change them back. + change_mirror_perms = 1 + + # If true, temporarily change permissions of unreadable files in + # the source directory to make sure we can read all files. + change_source_perms = None + + # If true, try to reset the atimes of the source partition. + preserve_atime = None + + # This is a list of compiled regular expressions. If one of them + # matches a file in the source area, do not process that file. + exclude_regexps = [] + + # Another list of compiled regexps; this time the file is excluded + # if it matches something in the destination area. + exclude_mirror_regexps = [] + + # If this is true, rdiff-backup will exclude any dev files it + # sees, in the same way it excludes files matching the exclude + # regexps. + exclude_device_files = None + + # This will be set as soon as the LocalConnection class loads + local_connection = None + + # If this is true, instead of processing whole directory, just + # examine files read in from standard input. + include_from_stdin = None + + # All connections should be added to the following list, so + # further global changes can be propagated to the remote systems. + # The first element should be Globals.local_connection. For a + # server, the second is the connection to the client. + connections = [] + + # Each process should have a connection number unique to the + # session. The client has connection number 0. + connection_number = 0 + + # Dictionary pairing connection numbers with connections. Set in + # SetConnections for all connections. + connection_dict = {} + + # True if the script is the end that reads the source directory + # for backups. It is true for purely local sessions. + isbackup_reader = None + + # Connection of the real backup reader (for which isbackup_reader + # is true) + backup_reader = None + + # True if the script is the end that writes to the increment and + # mirror directories. True for purely local sessions. + isbackup_writer = None + + # Connection of the backup writer + backup_writer = None + + # This list is used by the set function below. When a new + # connection is created with init_connection, its Globals class + # will match this one for all the variables mentioned in this + # list. + changed_settings = [] + + # rdiff-backup will try to checkpoint its state every + # checkpoint_interval seconds. Then when resuming, at most this + # amount of time is lost. + checkpoint_interval = 20 + + # The RPath of the rdiff-backup-data directory. + rbdir = None + + # Indicates if a resume or a lack of resume is forced. This + # should be None for the default. 0 means don't resume, and 1 + # means resume. + resume = None + + # If there has been an aborted backup fewer than this many seconds + # ago, attempt to resume it where it left off instead of starting + # a new one. + resume_window = 7200 + + # This string is used when recognizing and creating time strings. + # If the time_separator is ":", then W3 datetime strings like + # 2001-12-07T04:22:01-07:00 are produced. It can be set to "_" to + # make filenames that don't contain colons, which aren't allowed + # under MS windows NT. + time_separator = ":" + + def get(cls, name): + """Return the value of something in this class""" + return cls.__dict__[name] + get = classmethod(get) + + def set(cls, name, val): + """Set the value of something in this class + + Use this instead of writing the values directly if the setting + matters to remote sides. This function updates the + changed_settings list, so other connections know to copy the + changes. + + """ + cls.changed_settings.append(name) + cls.__dict__[name] = val + set = classmethod(set) + + def set_integer(cls, name, val): + """Like set, but make sure val is an integer""" + try: intval = int(val) + except ValueError: + Log.FatalError("Variable %s must be set to an integer -\n" + "received %s instead." % (name, val)) + cls.set(name, intval) + set_integer = classmethod(set_integer) + + def get_dict_val(cls, name, key): + """Return val from dictionary in this class""" + return cls.__dict__[name][key] + get_dict_val = classmethod(get_dict_val) + + def set_dict_val(cls, name, key, val): + """Set value for dictionary in this class""" + cls.__dict__[name][key] = val + set_dict_val = classmethod(set_dict_val) + + def add_regexp(cls, regstr, mirror=None): + """Add a regular expression to the exclude list""" + for conn in Globals.connections: + conn.Globals.add_regexp_local(regstr, mirror) + add_regexp = classmethod(add_regexp) + + def add_regexp_local(cls, regstr, mirror): + """Add the regex only to the local Globals class""" + compiled = re.compile(regstr) + if mirror: Globals.exclude_mirror_regexps.append(compiled) + else: Globals.exclude_regexps.append(compiled) + add_regexp_local = classmethod(add_regexp_local) diff --git a/rdiff-backup/src/header.py b/rdiff-backup/src/header.py new file mode 100644 index 0000000..31b3ff0 --- /dev/null +++ b/rdiff-backup/src/header.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python +# +# rdiff-backup -- Mirror files while keeping incremental changes +# Version 0.6.0 released March 14, 2002 +# Copyright (C) 2001 Ben Escoto +# +# This program is licensed under the GNU General Public License (GPL). +# Distributions of rdiff-backup usually include a copy of the GPL in a +# file called COPYING. The GPL is also available online at +# http://www.gnu.org/copyleft/gpl.html. +# +# Please send mail to me or the mailing list if you find bugs or have +# any suggestions. + +from __future__ import nested_scopes, generators +import os, stat, time, sys, getopt, re, cPickle, types, shutil, sha, marshal, traceback, popen2, tempfile + + diff --git a/rdiff-backup/src/highlevel.py b/rdiff-backup/src/highlevel.py new file mode 100644 index 0000000..55fe007 --- /dev/null +++ b/rdiff-backup/src/highlevel.py @@ -0,0 +1,288 @@ +from __future__ import generators +execfile("filelist.py") + +####################################################################### +# +# highlevel - High level functions for mirroring, mirror & inc, etc. +# + +class SkipFileException(Exception): + """Signal that the current file should be skipped but then continue + + This exception will often be raised when there is problem reading + an individual file, but it makes sense for the rest of the backup + to keep going. + + """ + pass + + +class HighLevel: + """High level static functions + + The design of some of these functions is represented on the + accompanying diagram. + + """ + def Mirror(src_rpath, dest_rpath, checkpoint = 1, session_info = None): + """Turn dest_rpath into a copy of src_rpath + + Checkpoint true means to checkpoint periodically, otherwise + not. If session_info is given, try to resume Mirroring from + that point. + + """ + SourceS = src_rpath.conn.HLSourceStruct + DestS = dest_rpath.conn.HLDestinationStruct + + SourceS.set_session_info(session_info) + DestS.set_session_info(session_info) + src_init_dsiter = SourceS.split_initial_dsiter(src_rpath) + dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter) + diffiter = SourceS.get_diffs_and_finalize(dest_sigiter) + DestS.patch_and_finalize(dest_rpath, diffiter, checkpoint) + + dest_rpath.setdata() + + def Mirror_and_increment(src_rpath, dest_rpath, inc_rpath, + session_info = None): + """Mirror + put increments in tree based at inc_rpath""" + SourceS = src_rpath.conn.HLSourceStruct + DestS = dest_rpath.conn.HLDestinationStruct + + SourceS.set_session_info(session_info) + DestS.set_session_info(session_info) + if not session_info: dest_rpath.conn.SaveState.touch_last_file() + src_init_dsiter = SourceS.split_initial_dsiter(src_rpath) + dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter) + diffiter = SourceS.get_diffs_and_finalize(dest_sigiter) + DestS.patch_increment_and_finalize(dest_rpath, diffiter, inc_rpath) + + dest_rpath.setdata() + inc_rpath.setdata() + + def Restore(rest_time, mirror_base, baseinc_tup, target_base): + """Like Restore.RestoreRecursive but check arguments""" + if not isinstance(target_base, DSRPath): + target_base = DSRPath(target_base.conn, target_base.base, + target_base.index, target_base.data) + Restore.RestoreRecursive(rest_time, mirror_base, + baseinc_tup, target_base) + +MakeStatic(HighLevel) + + +class HLSourceStruct: + """Hold info used by HL on the source side""" + _session_info = None # set to si if resuming + def set_session_info(cls, session_info): + cls._session_info = session_info + + def iterate_from(cls, rpath): + """Supply more aruments to DestructiveStepping.Iterate_from""" + if cls._session_info: + return DestructiveStepping.Iterate_from(rpath, 1, + cls._session_info.last_index) + else: return DestructiveStepping.Iterate_from(rpath, 1) + + def split_initial_dsiter(cls, rpath): + """Set iterators of all dsrps from rpath, returning one""" + dsiter = cls.iterate_from(rpath) + initial_dsiter1, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2) + return initial_dsiter1 + + def get_diffs_and_finalize(cls, sigiter): + """Return diffs and finalize any dsrp changes remaining + + Return a rorpiterator with files included of signatures of + dissimilar files. This is the last operation run on the local + filestream, so finalize dsrp writes. + + """ + collated = RORPIter.CollateIterators(cls.initial_dsiter2, sigiter) + finalizer = DestructiveStepping.Finalizer() + def diffs(): + for dsrp, dest_sig in collated: + try: + if dest_sig: + if dest_sig.isplaceholder(): yield dest_sig + else: yield RORPIter.diffonce(dest_sig, dsrp) + if dsrp: finalizer(dsrp) + except (IOError, OSError, RdiffException): + Log.exception() + Log("Error processing %s, skipping" % + str(dest_sig.index), 2) + finalizer.getresult() + return diffs() + +MakeClass(HLSourceStruct) + + +class HLDestinationStruct: + """Hold info used by HL on the destination side""" + _session_info = None # set to si if resuming + def set_session_info(cls, session_info): + cls._session_info = session_info + + def iterate_from(cls, rpath): + """Supply more arguments to DestructiveStepping.Iterate_from""" + if cls._session_info: + return DestructiveStepping.Iterate_from(rpath, None, + cls._session_info.last_index) + else: return DestructiveStepping.Iterate_from(rpath, None) + + def split_initial_dsiter(cls, rpath): + """Set initial_dsiters (iteration of all dsrps from rpath)""" + dsiter = cls.iterate_from(rpath) + result, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2) + return result + + def get_dissimilar(cls, baserp, src_init_iter, dest_init_iter): + """Get dissimilars + + Returns an iterator which enumerates the dsrps which are + different on the source and destination ends. The dsrps do + not necessarily exist on the destination end. + + Also, to prevent the system from getting backed up on the + remote end, if we don't get enough dissimilars, stick in a + placeholder every so often, like fiber. The more + placeholders, the more bandwidth used, but if there aren't + enough, lots of memory will be used because files will be + accumulating on the source side. How much will accumulate + will depend on the Globals.conn_bufsize value. + + """ + collated = RORPIter.CollateIterators(src_init_iter, dest_init_iter) + def generate_dissimilar(): + counter = 0 + for src_rorp, dest_dsrp in collated: + if not dest_dsrp: + dsrp = DSRPath(baserp.conn, baserp.base, src_rorp.index) + if dsrp.lstat(): + Log("Warning: Found unexpected destination file %s." + % dsrp.path, 2) + if DestructiveStepping.isexcluded(dsrp, None): continue + counter = 0 + yield dsrp + elif not src_rorp or not src_rorp == dest_dsrp: + counter = 0 + yield dest_dsrp + else: # source and destinition both exist and are same + if counter == 20: + placeholder = RORPath(src_rorp.index) + placeholder.make_placeholder() + counter = 0 + yield placeholder + else: counter += 1 + return generate_dissimilar() + + def get_sigs(cls, baserp, src_init_iter): + """Return signatures of all dissimilar files""" + dest_iters1 = cls.split_initial_dsiter(baserp) + dissimilars = cls.get_dissimilar(baserp, src_init_iter, dest_iters1) + return RORPIter.Signatures(dissimilars) + + def get_dsrp(cls, dest_rpath, index): + """Return initialized dsrp based on dest_rpath with given index""" + dsrp = DSRPath(dest_rpath.conn, dest_rpath.base, index) + DestructiveStepping.initialize(dsrp, None) + return dsrp + + def get_finalizer(cls): + """Return finalizer, starting from session info if necessary""" + init_state = cls._session_info and cls._session_info.finalizer_state + return DestructiveStepping.Finalizer(init_state) + + def get_ITR(cls, inc_rpath): + """Return ITR, starting from state if necessary""" + init_state = cls._session_info and cls._session_info.ITR_state + return Inc.make_patch_increment_ITR(inc_rpath, init_state) + + def patch_and_finalize(cls, dest_rpath, diffs, checkpoint = 1): + """Apply diffs and finalize""" + collated = RORPIter.CollateIterators(diffs, cls.initial_dsiter2) + finalizer = cls.get_finalizer() + dsrp = None + + def error_checked(): + """Inner writing loop, check this for errors""" + indexed_tuple = collated.next() + Log("Processing %s" % str(indexed_tuple), 7) + diff_rorp, dsrp = indexed_tuple + if not dsrp: + dsrp = cls.get_dsrp(dest_rpath, diff_rorp.index) + DestructiveStepping.initialize(dsrp, None) + if diff_rorp and not diff_rorp.isplaceholder(): + RORPIter.patchonce_action(None, dsrp, diff_rorp).execute() + finalizer(dsrp) + return dsrp + + try: + while 1: + try: dsrp = cls.check_skip_error(error_checked) + except StopIteration: break + if checkpoint: SaveState.checkpoint_mirror(finalizer, dsrp) + except: cls.handle_last_error(dsrp, finalizer) + finalizer.getresult() + if checkpoint: SaveState.checkpoint_remove() + + def patch_increment_and_finalize(cls, dest_rpath, diffs, inc_rpath): + """Apply diffs, write increment if necessary, and finalize""" + collated = RORPIter.CollateIterators(diffs, cls.initial_dsiter2) + finalizer, ITR = cls.get_finalizer(), cls.get_ITR(inc_rpath) + dsrp = None + + def error_checked(): + """Inner writing loop, catch variety of errors from this""" + indexed_tuple = collated.next() + Log("Processing %s" % str(indexed_tuple), 7) + diff_rorp, dsrp = indexed_tuple + if not dsrp: + dsrp = cls.get_dsrp(dest_rpath, indexed_tuple.index) + DestructiveStepping.initialize(dsrp, None) + indexed_tuple = IndexedTuple(indexed_tuple.index, + (diff_rorp, dsrp)) + if diff_rorp and diff_rorp.isplaceholder(): + indexed_tuple = IndexedTuple(indexed_tuple.index, + (None, dsrp)) + ITR(indexed_tuple) + finalizer(dsrp) + return dsrp + + try: + while 1: + try: dsrp = cls.check_skip_error(error_checked) + except StopIteration: break + SaveState.checkpoint_inc_backup(ITR, finalizer, dsrp) + except: cls.handle_last_error(dsrp, finalizer, ITR) + ITR.getresult() + finalizer.getresult() + SaveState.checkpoint_remove() + + def check_skip_error(cls, thunk): + """Run thunk, catch certain errors skip files""" + try: return thunk() + except (IOError, OSError, SkipFileException), exp: + Log.exception() + if (not isinstance(exp, IOError) or + (isinstance(exp, IOError) and + (exp[0] in [2, # Means that a file is missing + 5, # Reported by docv (see list) + 13, # Permission denied IOError + 26] # Requested by Campbell (see list) - + # happens on some NT systems + ))): + Log("Skipping file", 2) + return None + else: raise + + def handle_last_error(cls, dsrp, finalizer, ITR = None): + """If catch fatal error, try to checkpoint before exiting""" + Log.exception(1) + if ITR: SaveState.checkpoint_inc_backup(ITR, finalizer, dsrp, 1) + else: SaveState.checkpoint_mirror(finalizer, dsrp, 1) + SaveState.touch_last_file_definitive() + raise + +MakeClass(HLDestinationStruct) diff --git a/rdiff-backup/src/increment.py b/rdiff-backup/src/increment.py new file mode 100644 index 0000000..4ed6a39 --- /dev/null +++ b/rdiff-backup/src/increment.py @@ -0,0 +1,180 @@ +execfile("destructive_stepping.py") + +####################################################################### +# +# increment - Provides Inc class, which writes increment files +# +# This code is what writes files ending in .diff, .snapshot, etc. +# + +class Inc: + """Class containing increment functions""" + def Increment_action(new, mirror, incpref): + """Main file incrementing function, returns RobustAction + + new is the file on the active partition, + mirror is the mirrored file from the last backup, + incpref is the prefix of the increment file. + + This function basically moves mirror -> incpref. + + """ + if not (new and new.lstat() or mirror.lstat()): + return Robust.null_action # Files deleted in meantime, do nothing + + Log("Incrementing mirror file " + mirror.path, 5) + if ((new and new.isdir()) or mirror.isdir()) and not incpref.isdir(): + incpref.mkdir() + + if not mirror.lstat(): return Inc.makemissing_action(incpref) + elif mirror.isdir(): return Inc.makedir_action(mirror, incpref) + elif new.isreg() and mirror.isreg(): + return Inc.makediff_action(new, mirror, incpref) + else: return Inc.makesnapshot_action(mirror, incpref) + + def Increment(new, mirror, incpref): + Inc.Increment_action(new, mirror, incpref).execute() + + def makemissing_action(incpref): + """Signify that mirror file was missing""" + return RobustAction(lambda: None, + Inc.get_inc_ext(incpref, "missing").touch, + lambda exp: None) + + def makesnapshot_action(mirror, incpref): + """Copy mirror to incfile, since new is quite different""" + snapshotrp = Inc.get_inc_ext(incpref, "snapshot") + return Robust.copy_with_attribs_action(mirror, snapshotrp) + + def makediff_action(new, mirror, incpref): + """Make incfile which is a diff new -> mirror""" + diff = Inc.get_inc_ext(incpref, "diff") + return Robust.chain([Rdiff.write_delta_action(new, mirror, diff), + Robust.copy_attribs_action(mirror, diff)]) + + def makedir_action(mirrordir, incpref): + """Make file indicating directory mirrordir has changed""" + dirsign = Inc.get_inc_ext(incpref, "dir") + def final(): + dirsign.touch() + RPath.copy_attribs(mirrordir, dirsign) + return RobustAction(lambda: None, final, dirsign.delete) + + def get_inc_ext(rp, typestr): + """Return RPath/DSRPath like rp but with inc/time extension + + If the file exists, then probably a previous backup has been + aborted. We then keep asking FindTime to get a time later + than the one that already has an inc file. + + """ + def get_newinc(timestr): + """Get new increment rp with given time suffix""" + addtostr = lambda s: "%s.%s.%s" % (s, timestr, typestr) + if rp.index: + return rp.__class__(rp.conn, rp.base, rp.index[:-1] + + (addtostr(rp.index[-1]),)) + else: return rp.__class__(rp.conn, addtostr(rp.base), rp.index) + + inctime = 0 + while 1: + inctime = Resume.FindTime(rp.index, inctime) + incrp = get_newinc(Time.timetostring(inctime)) + if not incrp.lstat(): return incrp + + def make_patch_increment_ITR(inc_rpath, initial_state = None): + """Return IterTreeReducer that patches and increments + + This has to be an ITR because directories that have files in + them changed are flagged with an increment marker. There are + four possibilities as to the order: + + 1. Normal file -> Normal file: right away + 2. Directory -> Directory: wait until files in the directory + are processed, as we won't know whether to add a marker + until the end. + 3. Normal file -> Directory: right away, so later files will + have a directory to go into. + 4. Directory -> Normal file: Wait until the end, so we can + process all the files in the directory. + + """ + def base_init(indexed_tuple): + """Patch if appropriate, return (a,b) tuple + + a is true if found directory and thus didn't take action + + if a is false, b is true if some changes were made + + if a is true, b is the rp of a temporary file used to hold + the diff_rorp's data (for dir -> normal file change), and + false if none was necessary. + + """ + diff_rorp, dsrp = indexed_tuple + incpref = inc_rpath.new_index(indexed_tuple.index) + if dsrp.isdir(): return init_dir(dsrp, diff_rorp, incpref) + else: return init_non_dir(dsrp, diff_rorp, incpref) + + def init_dir(dsrp, diff_rorp, incpref): + """Initial processing of a directory + + Make the corresponding directory right away, but wait + until the end to write the replacement. However, if the + diff_rorp contains data, we must write it locally before + continuing, or else that data will be lost in the stream. + + """ + if not (incpref.lstat() and incpref.isdir()): incpref.mkdir() + if diff_rorp and diff_rorp.isreg() and diff_rorp.file: + tf = TempFileManager.new(dsrp) + RPathStatic.copy_with_attribs(diff_rorp, tf) + tf.set_attached_filetype(diff_rorp.get_attached_filetype()) + return (1, tf) + else: return (1, None) + + def init_non_dir(dsrp, diff_rorp, incpref): + """Initial processing of non-directory + + If a reverse diff is called for it is generated by apply + the forwards diff first on a temporary file. + + """ + if diff_rorp: + if dsrp.isreg() and diff_rorp.isreg(): + tf = TempFileManager.new(dsrp) + def init_thunk(): + Rdiff.patch_with_attribs_action(dsrp, diff_rorp, + tf).execute() + Inc.Increment_action(tf, dsrp, incpref).execute() + Robust.make_tf_robustaction(init_thunk, (tf,), + (dsrp,)).execute() + else: + Robust.chain([Inc.Increment_action(diff_rorp, dsrp, + incpref), + RORPIter.patchonce_action( + None, dsrp, diff_rorp)]).execute() + return (None, 1) + return (None, None) + + def base_final(base_tuple, base_init_tuple, changed): + """Patch directory if not done, return true iff made change""" + if base_init_tuple[0]: # was directory + diff_rorp, dsrp = base_tuple + if changed or diff_rorp: + if base_init_tuple[1]: diff_rorp = base_init_tuple[1] + Inc.Increment(diff_rorp, dsrp, + inc_rpath.new_index(base_tuple.index)) + if diff_rorp: + RORPIter.patchonce_action(None, dsrp, + diff_rorp).execute() + if isinstance(diff_rorp, TempFile): diff_rorp.delete() + return 1 + return None + else: # changed iff base_init_tuple says it was + return base_init_tuple[1] + + return IterTreeReducer(base_init, lambda x,y: x or y, None, + base_final, initial_state) + +MakeStatic(Inc) diff --git a/rdiff-backup/src/iterfile.py b/rdiff-backup/src/iterfile.py new file mode 100644 index 0000000..21629b2 --- /dev/null +++ b/rdiff-backup/src/iterfile.py @@ -0,0 +1,235 @@ +execfile("ttime.py") +import cPickle + +####################################################################### +# +# iterfile - Convert an iterator to a file object and vice-versa +# + +class IterFileException(Exception): pass + +class UnwrapFile: + """Contains some basic methods for parsing a file containing an iter""" + def __init__(self, file): + self.file = file + + def _s2l(self, s): + """Convert string to long int""" + assert len(s) == 7 + l = 0L + for i in range(7): l = l*256 + ord(s[i]) + return l + + def _get(self): + """Return pair (type, data) next in line on the file + + type is a single character which is either "o" for object, "f" + for file, "c" for a continution of a file, or None if no more + data can be read. Data is either the file's data, if type is + "c" or "f", or the actual object if the type is "o". + + """ + header = self.file.read(8) + if not header: return None, None + assert len(header) == 8, "Header is only %d bytes" % len(header) + type, length = header[0], self._s2l(header[1:]) + buf = self.file.read(length) + if type == "o": return type, cPickle.loads(buf) + else: return type, buf + + +class IterWrappingFile(UnwrapFile): + """An iterator generated from a file. + + Initialize with a file type object, and then it will return the + elements of the file in order. + + """ + def __init__(self, file): + UnwrapFile.__init__(self, file) + self.currently_in_file = None + + def __iter__(self): return self + + def next(self): + if self.currently_in_file: + self.currently_in_file.close() # no error checking by this point + type, data = self._get() + if not type: raise StopIteration + if type == "o": return data + elif type == "f": + file = IterVirtualFile(self, data) + if data: self.currently_in_file = file + else: self.currently_in_file = None + return file + else: raise IterFileException("Bad file type %s" % type) + + +class IterVirtualFile(UnwrapFile): + """Another version of a pretend file + + This is returned by IterWrappingFile when a file is embedded in + the main file that the IterWrappingFile is based around. + + """ + def __init__(self, iwf, initial_data): + """Initializer + + initial_data is the data from the first block of the file. + iwf is the iter wrapping file that spawned this + IterVirtualFile. + + """ + UnwrapFile.__init__(self, iwf.file) + self.iwf = iwf + self.bufferlist = [initial_data] + self.bufferlen = len(initial_data) + self.closed = None + + def check_consistency(self): + l = len("".join(self.bufferlist)) + assert l == self.bufferlen, \ + "Length of IVF bufferlist doesn't match (%s, %s)" % \ + (l, self.bufferlen) + + def read(self, length): + assert not self.closed + if self.iwf.currently_in_file: + while length >= self.bufferlen: + if not self.addtobuffer(): break + + real_len = min(length, self.bufferlen) + combined_buffer = "".join(self.bufferlist) + assert len(combined_buffer) == self.bufferlen, \ + (len(combined_buffer), self.bufferlen) + self.bufferlist = [combined_buffer[real_len:]] + self.bufferlen = self.bufferlen - real_len + return combined_buffer[:real_len] + + def addtobuffer(self): + """Read a chunk from the file and add it to the buffer""" + assert self.iwf.currently_in_file + type, data = self._get() + assert type == "c", "Type is %s instead of c" % type + if data: + self.bufferlen = self.bufferlen + len(data) + self.bufferlist.append(data) + return 1 + else: + self.iwf.currently_in_file = None + return None + + def close(self): + """Currently just reads whats left and discards it""" + while self.iwf.currently_in_file: + self.addtobuffer() + self.bufferlist = [] + self.bufferlen = 0 + self.closed = 1 + + +class FileWrappingIter: + """A file interface wrapping around an iterator + + This is initialized with an iterator, and then converts it into a + stream of characters. The object will evaluate as little of the + iterator as is necessary to provide the requested bytes. + + The actual file is a sequence of marshaled objects, each preceded + by 8 bytes which identifies the following the type of object, and + specifies its length. File objects are not marshalled, but the + data is written in chunks of Globals.blocksize, and the following + blocks can identify themselves as continuations. + + """ + def __init__(self, iter): + """Initialize with iter""" + self.iter = iter + self.bufferlist = [] + self.bufferlen = 0L + self.currently_in_file = None + self.closed = None + + def read(self, length): + """Return next length bytes in file""" + assert not self.closed + while self.bufferlen < length: + if not self.addtobuffer(): break + + combined_buffer = "".join(self.bufferlist) + assert len(combined_buffer) == self.bufferlen + real_len = min(self.bufferlen, length) + self.bufferlen = self.bufferlen - real_len + self.bufferlist = [combined_buffer[real_len:]] + return combined_buffer[:real_len] + + def addtobuffer(self): + """Updates self.bufferlist and self.bufferlen, adding on a chunk + + Returns None if we have reached the end of the iterator, + otherwise return true. + + """ + if self.currently_in_file: + buf = "c" + self.addfromfile() + else: + try: currentobj = self.iter.next() + except StopIteration: return None + if hasattr(currentobj, "read") and hasattr(currentobj, "close"): + self.currently_in_file = currentobj + buf = "f" + self.addfromfile() + else: + pickle = cPickle.dumps(currentobj, 1) + buf = "o" + self._l2s(len(pickle)) + pickle + + self.bufferlist.append(buf) + self.bufferlen = self.bufferlen + len(buf) + return 1 + + def addfromfile(self): + """Read a chunk from the current file and return it""" + buf = self.currently_in_file.read(Globals.blocksize) + if not buf: + assert not self.currently_in_file.close() + self.currently_in_file = None + return self._l2s(len(buf)) + buf + + def _l2s(self, l): + """Convert long int to string of 7 characters""" + s = "" + for i in range(7): + l, remainder = divmod(l, 256) + s = chr(remainder) + s + assert remainder == 0 + return s + + def close(self): self.closed = 1 + + +class BufferedRead: + """Buffer the .read() calls to the given file + + This is used to lessen overhead and latency when a file is sent + over a connection. + + """ + def __init__(self, file): + self.file = file + self.buffer = "" + self.bufsize = Globals.conn_bufsize + + def read(self, l = -1): + if l < 0: # Read as much as possible + result = self.buffer + self.file.read() + self.buffer = "" + return result + + if len(self.buffer) < l: # Try to make buffer as long as l + self.buffer += self.file.read(max(self.bufsize, + l - len(self.buffer))) + actual_size = min(l, len(self.buffer)) + result = self.buffer[:actual_size] + self.buffer = self.buffer[actual_size:] + return result + + def close(self): return self.file.close() diff --git a/rdiff-backup/src/lazy.py b/rdiff-backup/src/lazy.py new file mode 100644 index 0000000..28e92c3 --- /dev/null +++ b/rdiff-backup/src/lazy.py @@ -0,0 +1,343 @@ +from __future__ import generators +execfile("static.py") +import os, stat, types + +####################################################################### +# +# lazy - Define some lazy data structures and functions acting on them +# + +class Iter: + """Hold static methods for the manipulation of lazy iterators""" + + def filter(predicate, iterator): + """Like filter in a lazy functional programming language""" + for i in iterator: + if predicate(i): yield i + + def map(function, iterator): + """Like map in a lazy functional programming language""" + for i in iterator: yield function(i) + + def foreach(function, iterator): + """Run function on each element in iterator""" + for i in iterator: function(i) + + def cat(*iters): + """Lazily concatenate iterators""" + for iter in iters: + for i in iter: yield i + + def cat2(iter_of_iters): + """Lazily concatenate iterators, iterated by big iterator""" + for iter in iter_of_iters: + for i in iter: yield i + + def empty(iter): + """True if iterator has length 0""" + for i in iter: return None + return 1 + + def equal(iter1, iter2, verbose = None, operator = lambda x, y: x == y): + """True if iterator 1 has same elements as iterator 2 + + Use equality operator, or == if it is unspecified. + + """ + for i1 in iter1: + try: i2 = iter2.next() + except StopIteration: + if verbose: print "End when i1 = %s" % i1 + return None + if not operator(i1, i2): + if verbose: print "%s not equal to %s" % (i1, i2) + return None + try: i2 = iter2.next() + except StopIteration: return 1 + if verbose: print "End when i2 = %s" % i2 + return None + + def Or(iter): + """True if any element in iterator is true. Short circuiting""" + i = None + for i in iter: + if i: return i + return i + + def And(iter): + """True if all elements in iterator are true. Short circuiting""" + i = 1 + for i in iter: + if not i: return i + return i + + def len(iter): + """Return length of iterator""" + i = 0 + while 1: + try: iter.next() + except StopIteration: return i + i = i+1 + + def foldr(f, default, iter): + """foldr the "fundamental list recursion operator"?""" + try: next = iter.next() + except StopIteration: return default + return f(next, Iter.foldr(f, default, iter)) + + def foldl(f, default, iter): + """the fundamental list iteration operator..""" + while 1: + try: next = iter.next() + except StopIteration: return default + default = f(default, next) + + def multiplex(iter, num_of_forks, final_func = None, closing_func = None): + """Split a single iterater into a number of streams + + The return val will be a list with length num_of_forks, each + of which will be an iterator like iter. final_func is the + function that will be called on each element in iter just as + it is being removed from the buffer. closing_func is called + when all the streams are finished. + + """ + if num_of_forks == 2 and not final_func and not closing_func: + im2 = IterMultiplex2(iter) + return (im2.yielda(), im2.yieldb()) + if not final_func: final_func = lambda i: None + if not closing_func: closing_func = lambda: None + + # buffer is a list of elements that some iterators need and others + # don't + buffer = [] + + # buffer[forkposition[i]] is the next element yieled by iterator + # i. If it is -1, yield from the original iter + starting_forkposition = [-1] * num_of_forks + forkposition = starting_forkposition[:] + called_closing_func = [None] + + def get_next(fork_num): + """Return the next element requested by fork_num""" + if forkposition[fork_num] == -1: + try: buffer.insert(0, iter.next()) + except StopIteration: + # call closing_func if necessary + if (forkposition == starting_forkposition and + not called_closing_func[0]): + closing_func() + called_closing_func[0] = None + raise StopIteration + for i in range(num_of_forks): forkposition[i] += 1 + + return_val = buffer[forkposition[fork_num]] + forkposition[fork_num] -= 1 + + blen = len(buffer) + if not (blen-1) in forkposition: + # Last position in buffer no longer needed + assert forkposition[fork_num] == blen-2 + final_func(buffer[blen-1]) + del buffer[blen-1] + return return_val + + def make_iterator(fork_num): + while(1): yield get_next(fork_num) + + return tuple(map(make_iterator, range(num_of_forks))) + +MakeStatic(Iter) + + +class IterMultiplex2: + """Multiplex an iterator into 2 parts + + This is a special optimized case of the Iter.multiplex function, + used when there is no closing_func or final_func, and we only want + to split it into 2. By profiling, this is a time sensitive class. + + """ + def __init__(self, iter): + self.a_leading_by = 0 # How many places a is ahead of b + self.buffer = [] + self.iter = iter + + def yielda(self): + """Return first iterator""" + buf, iter = self.buffer, self.iter + while(1): + if self.a_leading_by >= 0: # a is in front, add new element + elem = iter.next() # exception will be passed + buf.append(elem) + else: elem = buf.pop(0) # b is in front, subtract an element + self.a_leading_by += 1 + yield elem + + def yieldb(self): + """Return second iterator""" + buf, iter = self.buffer, self.iter + while(1): + if self.a_leading_by <= 0: # b is in front, add new element + elem = iter.next() # exception will be passed + buf.append(elem) + else: elem = buf.pop(0) # a is in front, subtract an element + self.a_leading_by -= 1 + yield elem + + +class IterTreeReducer: + """Tree style reducer object for iterator + + The indicies of a RORPIter form a tree type structure. This class + can be used on each element of an iter in sequence and the result + will be as if the corresponding tree was reduced. This tries to + bridge the gap between the tree nature of directories, and the + iterator nature of the connection between hosts and the temporal + order in which the files are processed. + + The elements of the iterator are required to have a tuple-style + .index, called "indexed elem" below. + + """ + def __init__(self, base_init, branch_reducer, + branch_base, base_final, initial_state = None): + """ITR initializer + + base_init is a function of one argument, an indexed elem. It + is called immediately on any elem in the iterator. It should + return some value type A. + + branch_reducer and branch_base are used to form a value on a + bunch of reduced branches, in the way that a linked list of + type C can be folded to form a value type B. + + base_final is called when leaving a tree. It takes three + arguments, the indexed elem, the output (type A) of base_init, + the output of branch_reducer on all the branches (type B) and + returns a value type C. + + """ + self.base_init = base_init + self.branch_reducer = branch_reducer + self.base_final = base_final + self.branch_base = branch_base + + if initial_state: self.setstate(initial_state) + else: + self.state = IterTreeReducerState(branch_base) + self.subreducer = None + + def setstate(self, state): + """Update with new state, recursive if necessary""" + self.state = state + if state.substate: self.subreducer = self.newinstance(state.substate) + else: self.subreducer = None + + def getstate(self): return self.state + + def getresult(self): + """Return results of calculation""" + if not self.state.calculated: self.calculate_final_val() + return self.state.final_val + + def intree(self, index): + """Return true if index is still in current tree""" + return self.state.base_index == index[:len(self.state.base_index)] + + def newinstance(self, state = None): + """Return reducer of same type as self + + If state is None, sets substate of self.state, otherwise + assume this is already set. + + """ + new = self.__class__(self.base_init, self.branch_reducer, + self.branch_base, self.base_final, state) + if state is None: self.state.substate = new.state + return new + + def process_w_subreducer(self, indexed_elem): + """Give object to subreducer, if necessary update branch_val""" + if not self.subreducer: + self.subreducer = self.newinstance() + if not self.subreducer(indexed_elem): + self.state.branch_val = self.branch_reducer(self.state.branch_val, + self.subreducer.getresult()) + self.subreducer = self.newinstance() + assert self.subreducer(indexed_elem) + + def calculate_final_val(self): + """Set final value""" + if self.subreducer: + self.state.branch_val = self.branch_reducer(self.state.branch_val, + self.subreducer.getresult()) + if self.state.current_index is None: + # No input, set None as default value + self.state.final_val = None + else: + self.state.final_val = self.base_final(self.state.base_elem, + self.state.base_init_val, + self.state.branch_val) + self.state.calculated = 1 + + def __call__(self, indexed_elem): + """Process elem, current position in iterator + + Returns true if elem successfully processed, false if elem is + not in the current tree and thus the final result is + available. + + """ + index = indexed_elem.index + assert type(index) is types.TupleType + + if self.state.current_index is None: # must be at base + self.state.base_init_val = self.base_init(indexed_elem) + # Do most crash-prone op first, so we don't leave inconsistent + self.state.current_index = index + self.state.base_index = index + self.state.base_elem = indexed_elem + return 1 + elif not index > self.state.current_index: + Log("Warning: oldindex %s >= newindex %s" % + (self.state.current_index, index), 2) + + if not self.intree(index): + self.calculate_final_val() + return None + else: + self.process_w_subreducer(indexed_elem) + self.state.current_index = index + return 1 + + +class IterTreeReducerState: + """Holds the state for IterTreeReducers + + An IterTreeReducer cannot be pickled directly because it holds + some anonymous functions. This class contains the relevant data + that is likely to be picklable, so the ITR can be saved and loaded + if the associated functions are known. + + """ + def __init__(self, branch_base): + """ITRS initializer + + Class variables: + self.current_index - last index processing started on, or None + self.base_index - index of first element processed + self.base_elem - first element processed + self.branch_val - default branch reducing value + + self.calculated - true iff the final value has been calculated + self.base_init_val - return value of base_init function + self.final_val - Final value once it's calculated + self.substate - IterTreeReducerState when subreducer active + + """ + self.current_index = None + self.calculated = None + self.branch_val = branch_base + self.substate = None + diff --git a/rdiff-backup/src/log.py b/rdiff-backup/src/log.py new file mode 100644 index 0000000..5416fd2 --- /dev/null +++ b/rdiff-backup/src/log.py @@ -0,0 +1,142 @@ +import time, sys +execfile("lazy.py") + +####################################################################### +# +# log - Manage logging +# + +class LoggerError(Exception): pass + +class Logger: + """All functions which deal with logging""" + def __init__(self): + self.log_file_open = None + self.log_file_local = None + self.verbosity = self.term_verbosity = 3 + # termverbset is true if the term_verbosity has been explicity set + self.termverbset = None + + def setverbosity(self, verbosity_string): + """Set verbosity levels. Takes a number string""" + try: self.verbosity = int(verbosity_string) + except ValueError: + Log.FatalError("Verbosity must be a number, received '%s' " + "instead." % verbosity_string) + if not self.termverbset: self.term_verbosity = self.verbosity + + def setterm_verbosity(self, termverb_string): + """Set verbosity to terminal. Takes a number string""" + try: self.term_verbosity = int(termverb_string) + except ValueError: + Log.FatalError("Terminal verbosity must be a number, received " + "'%s' insteaxd." % termverb_string) + self.termverbset = 1 + + def open_logfile(self, rpath): + """Inform all connections of an open logfile. + + rpath.conn will write to the file, and the others will pass + write commands off to it. + + """ + for conn in Globals.connections: + conn.Log.open_logfile_allconn(rpath.conn) + rpath.conn.Log.open_logfile_local(rpath) + + def open_logfile_allconn(self, log_file_conn): + """Run on all connections to signal log file is open""" + self.log_file_open = 1 + self.log_file_conn = log_file_conn + + def open_logfile_local(self, rpath): + """Open logfile locally - should only be run on one connection""" + assert self.log_file_conn is Globals.local_connection + self.log_file_local = 1 + self.logrp = rpath + self.logfp = rpath.open("a") + + def close_logfile(self): + """Close logfile and inform all connections""" + if self.log_file_open: + for conn in Globals.connections: + conn.Log.close_logfile_allconn() + self.log_file_conn.Log.close_logfile_local() + + def close_logfile_allconn(self): + """Run on every connection""" + self.log_file_open = None + + def close_logfile_local(self): + """Run by logging connection - close logfile""" + assert self.log_file_conn is Globals.local_connection + assert not self.logfp.close() + + def format(self, message, verbosity): + """Format the message, possibly adding date information""" + if verbosity < 9: return message + "\n" + else: return "%s %s\n" % (time.asctime(time.localtime(time.time())), + message) + + def __call__(self, message, verbosity): + """Log message that has verbosity importance""" + if verbosity <= self.verbosity: self.log_to_file(message) + if verbosity <= self.term_verbosity: + self.log_to_term(message, verbosity) + + def log_to_file(self, message): + """Write the message to the log file, if possible""" + if self.log_file_open: + if self.log_file_local: + self.logfp.write(self.format(message, self.verbosity)) + else: self.log_file_conn.Log.log_to_file(message) + + def log_to_term(self, message, verbosity): + """Write message to stdout/stderr""" + if verbosity <= 2 or Globals.server: termfp = sys.stderr + else: termfp = sys.stdout + termfp.write(self.format(message, self.term_verbosity)) + + def conn(self, direction, result, req_num): + """Log some data on the connection + + The main worry with this function is that something in here + will create more network traffic, which will spiral to + infinite regress. So, for instance, logging must only be done + to the terminal, because otherwise the log file may be remote. + + """ + if self.term_verbosity < 9: return + if type(result) is types.StringType: result_repr = repr(result) + else: result_repr = str(result) + if Globals.server: conn_str = "Server" + else: conn_str = "Client" + self.log_to_term("%s %s (%d): %s" % + (conn_str, direction, req_num, result_repr), 9) + + def FatalError(self, message): + self("Fatal Error: " + message, 1) + Globals.Main.cleanup() + sys.exit(1) + + def exception(self, only_terminal = 0): + """Log an exception and traceback at verbosity 2 + + If only_terminal is None, log normally. If it is 1, then only + log to disk if log file is local (self.log_file_open = 1). If + it is 2, don't log to disk at all. + + """ + assert only_terminal in (0, 1, 2) + if (only_terminal == 0 or + (only_terminal == 1 and self.log_file_open)): + logging_func = self.__call__ + else: logging_func = self.log_to_term + + exc_info = sys.exc_info() + logging_func("Exception %s raised of class %s" % + (exc_info[1], exc_info[0]), 2) + logging_func("".join(traceback.format_tb(exc_info[2])), 2) + + +Log = Logger() diff --git a/rdiff-backup/src/main.py b/rdiff-backup/src/main.py new file mode 100755 index 0000000..24455f6 --- /dev/null +++ b/rdiff-backup/src/main.py @@ -0,0 +1,401 @@ +#!/usr/bin/python + +execfile("highlevel.py") +import getopt, sys, re + +####################################################################### +# +# main - Start here: Read arguments, set global settings, etc. +# + +class Main: + def __init__(self): + self.action = None + self.remote_cmd, self.remote_schema = None, None + self.force = None + self.exclude_regstrs = ["/proc"] + self.exclude_mirror_regstrs = [] + + def parse_cmdlineoptions(self): + """Parse argument list and set global preferences""" + try: optlist, self.args = getopt.getopt(sys.argv[1:], "blmv:Vs", + ["backup-mode", "version", "verbosity=", "exclude=", + "exclude-mirror=", "server", "test-server", + "remote-cmd=", "mirror-only", "force", + "change-source-perms", "list-increments", + "remove-older-than=", "remote-schema=", + "include-from-stdin", "terminal-verbosity=", + "exclude-device-files", "resume", "no-resume", + "resume-window=", "windows-time-format", + "checkpoint-interval="]) + except getopt.error: + self.commandline_error("Error parsing commandline options") + + for opt, arg in optlist: + if opt == "-b" or opt == "--backup-mode": self.action = "backup" + elif opt == "--change-source-perms": + Globals.set('change_source_perms', 1) + elif opt == "--checkpoint-interval": + Globals.set_integer('checkpoint_interval', arg) + elif opt == "--exclude": self.exclude_regstrs.append(arg) + elif opt == "--exclude-device-files": + Globals.set('exclude_device_files', 1) + elif opt == "--exclude-mirror": + self.exclude_mirror_regstrs.append(arg) + elif opt == "--force": self.force = 1 + elif opt == "--include-from-stdin": Globals.include_from_stdin = 1 + elif opt == "-l" or opt == "--list-increments": + self.action = "list-increments" + elif opt == "-m" or opt == "--mirror-only": self.action = "mirror" + elif opt == '--no-resume': Globals.resume = 0 + elif opt == "--remote-cmd": self.remote_cmd = arg + elif opt == "--remote-schema": self.remote_schema = arg + elif opt == "--remove-older-than": + self.remove_older_than_string = arg + self.action = "remove-older-than" + elif opt == '--resume': Globals.resume = 1 + elif opt == '--resume-window': + Globals.set_integer('resume_window', arg) + elif opt == "-s" or opt == "--server": self.action = "server" + elif opt == "--terminal-verbosity": + Log.setterm_verbosity(arg) + elif opt == "--test-server": self.action = "test-server" + elif opt == "-V" or opt == "--version": + print "rdiff-backup " + Globals.version + sys.exit(0) + elif opt == "-v" or opt == "--verbosity": + Log.setverbosity(arg) + elif opt == '--windows-time-format': + Globals.set('time_separator', "_") + else: Log.FatalError("Unknown option %s" % opt) + + def set_action(self): + """Check arguments and try to set self.action""" + l = len(self.args) + if not self.action: + if l == 0: self.commandline_error("No arguments given") + elif l == 1: self.action = "restore" + elif l == 2: + if RPath(Globals.local_connection, self.args[0]).isincfile(): + self.action = "restore" + else: self.action = "backup" + else: self.commandline_error("Too many arguments given") + + if l == 0 and self.action != "server" and self.action != "test-server": + self.commandline_error("No arguments given") + if l > 0 and self.action == "server": + self.commandline_error("Too many arguments given") + if l < 2 and (self.action == "backup" or self.action == "mirror"): + self.commandline_error("Two arguments are required " + "(source, destination).") + if l == 2 and (self.action == "list-increments" or + self.action == "remove-older-than"): + self.commandline_error("Only use one argument, " + "the root of the backup directory") + if l > 2: self.commandline_error("Too many arguments given") + + def commandline_error(self, message): + sys.stderr.write("Error: %s\n" % message) + sys.stderr.write("See the rdiff-backup manual page for instructions\n") + sys.exit(1) + + def misc_setup(self, rps): + """Set default change ownership flag, umask, excludes""" + if ((len(rps) == 2 and rps[1].conn.os.getuid() == 0) or + (len(rps) < 2 and os.getuid() == 0)): + # Allow change_ownership if destination connection is root + for conn in Globals.connections: + conn.Globals.set('change_ownership', 1) + for rp in rps: rp.setdata() # Update with userinfo + + os.umask(077) + for regex_string in self.exclude_regstrs: + Globals.add_regexp(regex_string, None) + for regex_string in self.exclude_mirror_regstrs: + Globals.add_regexp(regex_string, 1) + + def take_action(self, rps): + """Do whatever self.action says""" + if self.action == "server": + PipeConnection(sys.stdin, sys.stdout).Server() + elif self.action == "backup": self.Backup(rps[0], rps[1]) + elif self.action == "restore": apply(self.Restore, rps) + elif self.action == "mirror": self.Mirror(rps[0], rps[1]) + elif self.action == "test-server": + SetConnections.TestConnections() + elif self.action == "list-increments": + self.ListIncrements(rps[0]) + elif self.action == "remove-older-than": + self.RemoveOlderThan(rps[0]) + else: raise AssertionError("Unknown action " + self.action) + + def cleanup(self): + """Do any last minute cleaning before exiting""" + Log("Cleaning up", 6) + Log.close_logfile() + if not Globals.server: SetConnections.CloseConnections() + + def Main(self): + """Start everything up!""" + self.parse_cmdlineoptions() + self.set_action() + rps = SetConnections.InitRPs(self.args, + self.remote_schema, self.remote_cmd) + self.misc_setup(rps) + self.take_action(rps) + self.cleanup() + + + def Mirror(self, src_rp, dest_rp): + """Turn dest_path into a copy of src_path""" + Log("Mirroring %s to %s" % (src_rp.path, dest_rp.path), 5) + self.mirror_check_paths(src_rp, dest_rp) + HighLevel.Mirror(src_rp, dest_rp, None) # No checkpointing - no rbdir + + def mirror_check_paths(self, rpin, rpout): + """Check paths and return rpin, rpout""" + if not rpin.lstat(): + Log.FatalError("Source directory %s does not exist" % rpin.path) + if rpout.lstat() and not self.force: + Log.FatalError( +"""Destination %s exists so continuing could mess it up. Run +rdiff-backup with the --force option if you want to mirror anyway.""" % + rpout.path) + + + def Backup(self, rpin, rpout): + """Backup, possibly incrementally, src_path to dest_path.""" + SetConnections.BackupInitConnections(rpin.conn, rpout.conn) + self.backup_init_dirs(rpin, rpout) + Time.setcurtime() + RSI = Resume.ResumeCheck() + if self.prevtime: + Time.setprevtime(self.prevtime) + SaveState.init_filenames(1) + HighLevel.Mirror_and_increment(rpin, rpout, self.incdir, RSI) + else: + SaveState.init_filenames(None) + HighLevel.Mirror(rpin, rpout, 1, RSI) + self.backup_touch_curmirror(rpin, rpout) + + def backup_init_dirs(self, rpin, rpout): + """Make sure rpin and rpout are valid, init data dir and logging""" + if rpout.lstat() and not rpout.isdir(): + if not self.force: + Log.FatalError("Destination %s exists and is not a " + "directory" % rpout.path) + else: + Log("Deleting %s" % rpout.path, 3) + rpout.delete() + + if not rpin.lstat(): + Log.FatalError("Source directory %s does not exist" % rpin.path) + elif not rpin.isdir(): + Log.FatalError("Source %s is not a directory" % rpin.path) + + self.datadir = rpout.append("rdiff-backup-data") + SetConnections.UpdateGlobal('rbdir', self.datadir) + self.incdir = RPath(rpout.conn, os.path.join(self.datadir.path, + "increments")) + self.prevtime = self.backup_get_mirrortime() + + if rpout.lstat() and not self.datadir.lstat() and not self.force: + Log.FatalError( +"""Destination directory %s exists, but does not look like a +rdiff-backup directory. Running rdiff-backup like this could mess up +what is currently in it. If you want to overwrite it, run +rdiff-backup with the --force option.""" % rpout.path) + + if not rpout.lstat(): + try: rpout.mkdir() + except os.error: + Log.FatalError("Unable to create directory %s" % rpout.path) + if not self.datadir.lstat(): self.datadir.mkdir() + Globals.add_regexp(self.datadir.path, 1) + Globals.add_regexp(rpin.append("rdiff-backup-data").path, None) + if Log.verbosity > 0: + Log.open_logfile(self.datadir.append("backup.log")) + self.backup_warn_if_infinite_regress(rpin, rpout) + + def backup_warn_if_infinite_regress(self, rpin, rpout): + """Warn user if destination area contained in source area""" + if rpout.conn is rpin.conn: # it's meaningful to compare paths + if ((len(rpout.path) > len(rpin.path)+1 and + rpout.path[:len(rpin.path)] == rpin.path and + rpout.path[len(rpin.path)] == '/') or + (rpin.path == "." and rpout.path[0] != '/' and + rpout.path[:2] != '..')): + # Just a few heuristics, we don't have to get every case + if not DestructiveStepping.isexcluded(rpout, 1): + Log( +"""Warning: The destination directory '%s' may be contained in the +source directory '%s'. This could cause an infinite regress. You +may need to use the --exclude option.""" % (rpout.path, rpin.path), 2) + + def backup_get_mirrorrps(self): + """Return list of current_mirror rps""" + if not self.datadir.isdir(): return [] + mirrorfiles = filter(lambda f: f.startswith("current_mirror."), + self.datadir.listdir()) + mirrorrps = map(lambda x: self.datadir.append(x), mirrorfiles) + return filter(lambda rp: rp.isincfile(), mirrorrps) + + def backup_get_mirrortime(self): + """Return time in seconds of previous mirror, or None if cannot""" + mirrorrps = self.backup_get_mirrorrps() + if not mirrorrps: return None + if len(mirrorrps) > 1: + Log( +"""Warning: duplicate current_mirror files found. Perhaps something +went wrong during your last backup? Using """ + mirrorrps[-1].path, 2) + + timestr = self.datadir.append(mirrorrps[-1].path).getinctime() + return Time.stringtotime(timestr) + + def backup_touch_curmirror(self, rpin, rpout): + """Make a file like current_mirror.time.snapshot to record time + + Also updates rpout so mod times don't get messed up. + + """ + map(RPath.delete, self.backup_get_mirrorrps()) + mirrorrp = self.datadir.append("current_mirror.%s.%s" % + (Time.curtimestr, "snapshot")) + Log("Touching mirror marker %s" % mirrorrp.path, 6) + mirrorrp.touch() + RPath.copy_attribs(rpin, rpout) + + + def Restore(self, src_rp, dest_rp = None): + """Main restoring function - take src_path to dest_path""" + Log("Starting Restore", 5) + rpin, rpout = self.restore_check_paths(src_rp, dest_rp) + inc_tup = self.restore_get_inctup(rpin) + mirror_base = self.restore_get_mirror(rpin) + rtime = Time.stringtotime(rpin.getinctime()) + Log.open_logfile(self.datadir.append("restore.log")) + HighLevel.Restore(rtime, mirror_base, inc_tup, rpout) + + def restore_check_paths(self, rpin, rpout): + """Check paths and return pair of corresponding rps""" + if not rpin.lstat(): + Log.FatalError("Increment file %s does not exist" % src_path) + if not rpin.isincfile(): + Log.FatalError("""File %s does not look like an increment file. + +Try restoring from an increment file (the filenames look like +"foobar.2001-09-01T04:49:04-07:00.diff").""") + + if not rpout: rpout = RPath(Globals.local_connection, + rpin.getincbase_str()) + if rpout.lstat(): + Log.FatalError("Restore target %s already exists. " + "Will not overwrite." % rpout.path) + return rpin, rpout + + def restore_get_inctup(self, rpin): + """Return increment tuple (incrp, list of incs)""" + rpin_dir = rpin.dirsplit()[0] + if not rpin_dir: rpin_dir = "/" + rpin_dir_rp = RPath(rpin.conn, rpin_dir) + incbase = rpin.getincbase() + incbasename = incbase.dirsplit()[1] + inclist = filter(lambda rp: rp.isincfile() and + rp.getincbase_str() == incbasename, + map(rpin_dir_rp.append, rpin_dir_rp.listdir())) + return IndexedTuple((), (incbase, inclist)) + + def restore_get_mirror(self, rpin): + """Return mirror file and set the data dir + + The idea here is to keep backing up on the path until we find + something named "rdiff-backup-data". Then use that as a + reference to calculate the oldfile. This could fail if the + increment file is pointed to in a funny way, using symlinks or + somesuch. + + """ + pathcomps = os.path.join(rpin.conn.os.getcwd(), + rpin.getincbase().path).split("/") + for i in range(1, len(pathcomps)): + datadirrp = RPath(rpin.conn, "/".join(pathcomps[:i+1])) + if pathcomps[i] == "rdiff-backup-data" and datadirrp.isdir(): + break + else: Log.FatalError("Unable to find rdiff-backup-data dir") + + self.datadir = datadirrp + Globals.add_regexp(self.datadir.path, 1) + rootrp = RPath(rpin.conn, "/".join(pathcomps[:i])) + if not rootrp.lstat(): + Log.FatalError("Root of mirror area %s does not exist" % + rootrp.path) + else: Log("Using root mirror %s" % rootrp.path, 6) + + from_datadir = pathcomps[i+1:] + if len(from_datadir) == 1: result = rootrp + elif len(from_datadir) > 1: + result = RPath(rootrp.conn, apply(os.path.join, + [rootrp.path] + from_datadir[1:])) + else: raise RestoreError("Problem finding mirror file") + + Log("Using mirror file %s" % result.path, 6) + return result + + + def ListIncrements(self, rootrp): + """Print out a summary of the increments and their times""" + datadir = self.li_getdatadir(rootrp, + """Unable to open rdiff-backup-data dir. + +The argument to rdiff-backup -l or rdiff-backup --list-increments +should be the root of the target backup directory, of which +rdiff-backup-data is a subdirectory. So, if you ran + +rdiff-backup /home/foo /mnt/back/bar + +earlier, try: + +rdiff-backup -l /mnt/back/bar +""") + print Manage.describe_root_incs(datadir) + + def li_getdatadir(self, rootrp, errormsg): + """Return data dir if can find it, otherwise use errormsg""" + datadir = rootrp.append("rdiff-backup-data") + if not datadir.lstat() or not datadir.isdir(): + Log.FatalError(errormsg) + return datadir + + + def RemoveOlderThan(self, rootrp): + """Remove all increment files older than a certain time""" + datadir = self.li_getdatadir(rootrp, + """Unable to open rdiff-backup-data dir. + +Try finding the increments first using --list-increments.""") + time = self.rot_get_earliest_time() + timep = Time.timetopretty(time) + Log("Deleting increment(s) before %s" % timep, 4) + incobjs = filter(lambda x: x.time < time, Manage.get_incobjs(datadir)) + incobjs_time = ", ".join(map(IncObj.pretty_time, incobjs)) + if not incobjs: + Log.FatalError("No increments older than %s found" % timep) + elif len(incobjs) > 1 and not self.force: + Log.FatalError("Found %d relevant increments, dated %s.\n" + "If you want to delete multiple increments in this way, " + "use the --force." % (len(incobjs), incobjs_time)) + + Log("Deleting increment%sat %s" % (len(incobjs) == 1 and " " or "s ", + incobjs_time), 3) + Manage.delete_earlier_than(datadir, time) + + def rot_get_earliest_time(self): + """Return earliest time in seconds that will not be deleted""" + seconds = Time.intstringtoseconds(self.remove_older_than_string) + return time.time() - seconds + + + +if __name__ == "__main__": + Globals.Main = Main() + Globals.Main.Main() diff --git a/rdiff-backup/src/manage.py b/rdiff-backup/src/manage.py new file mode 100644 index 0000000..c0f4a85 --- /dev/null +++ b/rdiff-backup/src/manage.py @@ -0,0 +1,99 @@ +execfile("restore.py") + +####################################################################### +# +# manage - list, delete, and otherwise manage increments +# + +class ManageException(Exception): pass + +class Manage: + def get_incobjs(datadir): + """Return Increments objects given the rdiff-backup data directory""" + return map(IncObj, Manage.find_incrps_with_base(datadir, "increments")) + + def find_incrps_with_base(dir_rp, basename): + """Return list of incfiles with given basename in dir_rp""" + rps = map(dir_rp.append, dir_rp.listdir()) + incrps = filter(RPath.isincfile, rps) + result = filter(lambda rp: rp.getincbase_str() == basename, incrps) + Log("find_incrps_with_base: found %d incs" % len(result), 6) + return result + + def describe_root_incs(datadir): + """Return a string describing all the the root increments""" + result = [] + currentrps = Manage.find_incrps_with_base(datadir, "current_mirror") + if not currentrps: + Log("Warning: no current mirror marker found", 1) + elif len(currentrps) > 1: + Log("Warning: multiple mirror markers found", 1) + for rp in currentrps: + result.append("Found mirror marker %s" % rp.path) + result.append("Indicating latest mirror taken at %s" % + Time.stringtopretty(rp.getinctime())) + result.append("---------------------------------------------" + "-------------") + + # Sort so they are in reverse order by time + time_w_incobjs = map(lambda io: (-io.time, io), + Manage.get_incobjs(datadir)) + time_w_incobjs.sort() + incobjs = map(lambda x: x[1], time_w_incobjs) + result.append("Found %d increments:" % len(incobjs)) + result.append("\n------------------------------------------\n".join( + map(IncObj.full_description, incobjs))) + return "\n".join(result) + + def delete_earlier_than(baserp, time): + """Deleting increments older than time in directory baserp + + time is in seconds. It will then delete any empty directories + in the tree. To process the entire backup area, the + rdiff-backup-data directory should be the root of the tree. + + """ + def yield_files(rp): + yield rp + if rp.isdir(): + for filename in rp.listdir(): + for sub_rp in yield_files(rp.append(filename)): + yield sub_rp + + for rp in yield_files(baserp): + if ((rp.isincfile() and + Time.stringtotime(rp.getinctime()) < time) or + (rp.isdir() and not rp.listdir())): + Log("Deleting increment file %s" % rp.path, 5) + rp.delete() + +MakeStatic(Manage) + + +class IncObj: + """Increment object - represent a completed increment""" + def __init__(self, incrp): + """IncObj initializer + + incrp is an RPath of a path like increments.TIMESTR.dir + standing for the root of the increment. + + """ + if not incrp.isincfile(): + raise ManageException("%s is not an inc file" % incrp.path) + self.incrp = incrp + self.time = Time.stringtotime(incrp.getinctime()) + + def getbaserp(self): + """Return rp of the incrp without extensions""" + return self.incrp.getincbase() + + def pretty_time(self): + """Return a formatted version of inc's time""" + return Time.timetopretty(self.time) + + def full_description(self): + """Return string describing increment""" + s = ["Increment file %s" % self.incrp.path, + "Date: %s" % self.pretty_time()] + return "\n".join(s) diff --git a/rdiff-backup/src/rdiff.py b/rdiff-backup/src/rdiff.py new file mode 100644 index 0000000..c27d4f2 --- /dev/null +++ b/rdiff-backup/src/rdiff.py @@ -0,0 +1,175 @@ +execfile("rlist.py") +import os, popen2 + +####################################################################### +# +# rdiff - Invoke rdiff utility to make signatures, deltas, or patch +# + +class RdiffException(Exception): pass + +class Rdiff: + """Contains static methods for rdiff operations + + All these operations should be done in a relatively safe manner + using RobustAction and the like. + + """ + def get_signature(rp): + """Take signature of rpin file and return in file object""" + Log("Getting signature of %s" % rp.path, 7) + return rp.conn.RdiffPopen(['rdiff', 'signature', rp.path]) + + def get_delta_sigfileobj(sig_fileobj, rp_new): + """Like get_delta but signature is in a file object""" + sig_tf = TempFileManager.new(rp_new, None) + sig_tf.write_from_fileobj(sig_fileobj) + rdiff_popen_obj = Rdiff.get_delta(sig_tf, rp_new) + rdiff_popen_obj.set_thunk(sig_tf.delete) + return rdiff_popen_obj + + def get_delta(rp_signature, rp_new): + """Take signature rp and new rp, return delta file object""" + assert rp_signature.conn is rp_new.conn + Log("Getting delta of %s with signature %s" % + (rp_new.path, rp_signature.path), 7) + return rp_new.conn.RdiffPopen(['rdiff', 'delta', + rp_signature.path, rp_new.path]) + + def write_delta_action(basis, new, delta): + """Return action writing delta which brings basis to new""" + sig_tf = TempFileManager.new(new, None) + delta_tf = TempFileManager.new(delta) + def init(): + Log("Writing delta %s from %s -> %s" % + (basis.path, new.path, delta.path), 7) + sig_tf.write_from_fileobj(Rdiff.get_signature(basis)) + delta_tf.write_from_fileobj(Rdiff.get_delta(sig_tf, new)) + sig_tf.delete() + return Robust.make_tf_robustaction(init, (sig_tf, delta_tf), + (None, delta)) + + def write_delta(basis, new, delta): + """Write rdiff delta which brings basis to new""" + Rdiff.write_delta_action(basis, new, delta).execute() + + def patch_action(rp_basis, rp_delta, rp_out = None, out_tf = None): + """Return RobustAction which patches rp_basis with rp_delta + + If rp_out is None, put output in rp_basis. Will use TempFile + out_tf it is specified. + + """ + if not rp_out: rp_out = rp_basis + else: assert rp_out.conn is rp_basis.conn + if not (isinstance(rp_delta, RPath) and isinstance(rp_basis, RPath) + and rp_basis.conn is rp_delta.conn): + return Rdiff.patch_fileobj_action(rp_basis, rp_delta.open('rb'), + rp_out, out_tf) + + if out_tf is None: out_tf = TempFileManager.new(rp_out) + def init(): + Log("Patching %s using %s to %s via %s" % + (rp_basis.path, rp_delta.path, rp_out.path, out_tf.path), 7) + cmdlist = ["rdiff", "patch", rp_basis.path, + rp_delta.path, out_tf.path] + return_val = rp_basis.conn.os.spawnvp(os.P_WAIT, 'rdiff', cmdlist) + out_tf.setdata() + if return_val != 0 or not out_tf.lstat(): + RdiffException("Error running %s" % cmdlist) + return Robust.make_tf_robustaction(init, (out_tf,), (rp_out,)) + + def patch_fileobj_action(rp_basis, delta_fileobj, + rp_out = None, out_tf = None): + """Like patch_action but diff is given in fileobj form + + Nest a writing of a tempfile with the actual patching to + create a new action. We have to nest so that the tempfile + will be around until the patching finishes. + + """ + if not rp_out: rp_out = rp_basis + delta_tf = TempFileManager.new(rp_out, None) + def init(): delta_tf.write_from_fileobj(delta_fileobj) + return Robust.chain_nested([RobustAction(init, delta_tf.delete, + lambda exp: delta_tf.delete), + Rdiff.patch_action(rp_basis, delta_tf, + rp_out, out_tf)]) + + def patch_with_attribs_action(rp_basis, rp_delta, rp_out = None): + """Like patch_action, but also transfers attributs from rp_delta""" + if not rp_out: rp_out = rp_basis + tf = TempFileManager.new(rp_out) + return Robust.chain_nested( + [Rdiff.patch_action(rp_basis, rp_delta, rp_out, tf), + Robust.copy_attribs_action(rp_delta, tf)]) + + def copy_action(rpin, rpout): + """Use rdiff to copy rpin to rpout, conserving bandwidth""" + if not rpin.isreg() or not rpout.isreg() or rpin.conn is rpout.conn: + # rdiff not applicable, fallback to regular copying + return Robust.copy_action(rpin, rpout) + + Log("Rdiff copying %s to %s" % (rpin.path, rpout.path), 6) + delta_tf = TempFileManager.new(rpout, None) + return Robust.chain(Rdiff.write_delta_action(rpout, rpin, delta_tf), + Rdiff.patch_action(rpout, delta_tf), + RobustAction(lambda: None, delta_tf.delete, + lambda exp: delta_tf.delete)) + +MakeStatic(Rdiff) + + +class RdiffPopen: + """Spawn process and treat stdout as file object + + Instead of using popen, which evaluates arguments with the shell + and thus may lead to security holes (thanks to Jamie Heilman for + this point), use the popen2 class and discard stdin. + + When closed, this object checks to make sure the process exited + cleanly, and executes closing_thunk. + + """ + def __init__(self, cmdlist, closing_thunk = None): + """RdiffFilehook initializer + + fileobj is the file we are emulating + thunk is called with no parameters right after the file is closed + + """ + assert type(cmdlist) is types.ListType + self.p3obj = popen2.Popen3(cmdlist) + self.fileobj = self.p3obj.fromchild + self.closing_thunk = closing_thunk + self.cmdlist = cmdlist + + def set_thunk(self, closing_thunk): + """Set closing_thunk if not already""" + assert not self.closing_thunk + self.closing_thunk = closing_thunk + + def read(self, length = -1): return self.fileobj.read(length) + + def close(self): + closeval = self.fileobj.close() + if self.closing_thunk: self.closing_thunk() + exitval = self.p3obj.poll() + if exitval == 0: return closeval + elif exitval == 256: + Log("Failure probably because %s couldn't be found in PATH." + % self.cmdlist[0], 2) + assert 0, "rdiff not found" + elif exitval == -1: + # There may a race condition where a process closes + # but doesn't provide its exitval fast enough. + Log("Waiting for process to close", 8) + time.sleep(0.2) + exitval = self.p3obj.poll() + if exitval == 0: return closeval + raise RdiffException("%s exited with non-zero value %d" % + (self.cmdlist, exitval)) + + + + diff --git a/rdiff-backup/src/restore.py b/rdiff-backup/src/restore.py new file mode 100644 index 0000000..1f7d24e --- /dev/null +++ b/rdiff-backup/src/restore.py @@ -0,0 +1,158 @@ +from __future__ import generators +execfile("increment.py") +import tempfile + +####################################################################### +# +# restore - Read increment files and restore to original +# + +class RestoreError(Exception): pass + +class Restore: + def RestoreFile(rest_time, rpbase, inclist, rptarget): + """Non-recursive restore function + + rest_time is the time in seconds to restore to, + rpbase is the base name of the file being restored, + inclist is a list of rpaths containing all the relevant increments, + and rptarget is the rpath that will be written with the restored file. + + """ + inclist = Restore.sortincseq(rest_time, inclist) + if not inclist and not (rpbase and rpbase.lstat()): + return # no increments were applicable + Log("Restoring %s with increments %s to %s" % + (rpbase and rpbase.path, + Restore.inclist2str(inclist), rptarget.path), 5) + if not inclist or inclist[0].getinctype() == "diff": + assert rpbase and rpbase.lstat(), \ + "No base to go with incs %s" % Restore.inclist2str(inclist) + RPath.copy_with_attribs(rpbase, rptarget) + for inc in inclist: Restore.applyinc(inc, rptarget) + + def inclist2str(inclist): + """Return string version of inclist for logging""" + return ",".join(map(lambda x: x.path, inclist)) + + def sortincseq(rest_time, inclist): + """Sort the inc sequence, and throw away irrelevant increments""" + incpairs = map(lambda rp: (Time.stringtotime(rp.getinctime()), rp), + inclist) + # Only consider increments at or after the time being restored + incpairs = filter(lambda pair: pair[0] >= rest_time, incpairs) + + # Now throw away older unnecessary increments + incpairs.sort() + i = 0 + while(i < len(incpairs)): + # Only diff type increments require later versions + if incpairs[i][1].getinctype() != "diff": break + i = i+1 + incpairs = incpairs[:i+1] + + # Return increments in reversed order + incpairs.reverse() + return map(lambda pair: pair[1], incpairs) + + def applyinc(inc, target): + """Apply increment rp inc to targetrp target""" + Log("Applying increment %s to %s" % (inc.path, target.path), 6) + inctype = inc.getinctype() + if inctype == "diff": + if not target.lstat(): + raise RestoreError("Bad increment sequence at " + inc.path) + Rdiff.patch_action(target, inc).execute() + elif inctype == "dir": + if not target.isdir(): + if target.lstat(): + raise RestoreError("File %s already exists" % target.path) + target.mkdir() + elif inctype == "missing": return + elif inctype == "snapshot": RPath.copy(inc, target) + else: raise RestoreError("Unknown inctype %s" % inctype) + RPath.copy_attribs(inc, target) + + def RestoreRecursive(rest_time, mirror_base, baseinc_tup, target_base): + """Recursive restore function. + + rest_time is the time in seconds to restore to; + mirror_base is an rpath of the mirror directory corresponding + to the one to be restored; + baseinc_tup is the inc tuple (incdir, list of incs) to be + restored; + and target_base in the dsrp of the target directory. + + """ + assert isinstance(target_base, DSRPath) + collated = RORPIter.CollateIterators( + DestructiveStepping.Iterate_from(mirror_base, None), + Restore.yield_inc_tuples(baseinc_tup)) + mirror_finalizer = DestructiveStepping.Finalizer() + target_finalizer = DestructiveStepping.Finalizer() + + for mirror, inc_tup in collated: + if not inc_tup: + inclist = [] + target = target_base.new_index(mirror.index) + else: + inclist = inc_tup[1] + target = target_base.new_index(inc_tup.index) + DestructiveStepping.initialize(target, None) + Restore.RestoreFile(rest_time, mirror, inclist, target) + target_finalizer(target) + if mirror: mirror_finalizer(mirror) + target_finalizer.getresult() + mirror_finalizer.getresult() + + def yield_inc_tuples(inc_tuple): + """Iterate increment tuples starting with inc_tuple + + An increment tuple is an IndexedTuple (pair). The first will + be the rpath of a directory, and the second is a list of all + the increments associated with that directory. If there are + increments that do not correspond to a directory, the first + element will be None. All the rpaths involved correspond to + files in the increment directory. + + """ + oldindex, rpath = inc_tuple.index, inc_tuple[0] + yield inc_tuple + if not rpath or not rpath.isdir(): return + + inc_list_dict = {} # Index tuple lists by index + dirlist = rpath.listdir() + + def affirm_dict_indexed(index): + """Make sure the inc_list_dict has given index""" + if not inc_list_dict.has_key(index): + inc_list_dict[index] = [None, []] + + def add_to_dict(filename): + """Add filename to the inc tuple dictionary""" + rp = rpath.append(filename) + if rp.isincfile(): + basename = rp.getincbase_str() + affirm_dict_indexed(basename) + inc_list_dict[basename][1].append(rp) + elif rp.isdir(): + affirm_dict_indexed(filename) + inc_list_dict[filename][0] = rp + + def list2tuple(index): + """Return inc_tuple version of dictionary entry by index""" + inclist = inc_list_dict[index] + if not inclist[1]: return None # no increments, so ignore + return IndexedTuple(oldindex + (index,), inclist) + + for filename in dirlist: add_to_dict(filename) + keys = inc_list_dict.keys() + keys.sort() + for index in keys: + new_inc_tuple = list2tuple(index) + if not new_inc_tuple: continue + elif new_inc_tuple[0]: # corresponds to directory + for i in Restore.yield_inc_tuples(new_inc_tuple): yield i + else: yield new_inc_tuple + +MakeStatic(Restore) diff --git a/rdiff-backup/src/rlist.py b/rdiff-backup/src/rlist.py new file mode 100644 index 0000000..c0f8ee9 --- /dev/null +++ b/rdiff-backup/src/rlist.py @@ -0,0 +1,240 @@ +from __future__ import generators +import marshal, sha, types +execfile("iterfile.py") + +####################################################################### +# +# rlist - Define the CachingIter, and sig/diff/patch ops on iterators +# + +class CachingIter: + """Cache parts of an iter using a list + + Turn an iter into something that you can prepend elements into, + and also read from without apparently changing the state. + + """ + def __init__(self, iter_or_list): + if type(iter_or_list) is types.ListType: + self.iter = iter(iter_or_list) + else: self.iter = iter_or_list + self.next = self.iter.next + self.head = [] + + def __iter__(self): return self + + def _next(self): + """Take elements from the head list + + When there are elements waiting before the main iterator, this + is the next function. If not, iter.next returns to being next. + + """ + head = self.head + a = head[0] + del head[0] + if not head: self.next = self.iter.next + return a + + def nextrange(self, m): + """Return next m elements in list""" + l = head[:m] + del head[:m] + for i in xrange(m - len(l)): l.append(self.iter.next()) + return l + + def peek(self): + """Return next element without removing it from iterator""" + n = self.next() + self.push(n) + return n + + def push(self, elem): + """Insert an element into the iterator at the beginning""" + if not self.head: self.next = self._next + self.head.insert(0, elem) + + def pushrange(self, elem_list): + """Insert list of multiple elements at the beginning""" + if not self.head: self.next = self._next + self.head[:0] = elem_list + + def cache(self, m): + """Move next m elements from iter to internal list + + If m is None, append the entire rest of the iterator. + + """ + h, it = self.head, self.iter + if m is None: + for i in it: h.append(i) + else: + for i in xrange(m): h.append(it.next()) + + def __getitem__(self, key): + """Support a[i:j] style notation. Non destructive""" + if type(key) is types.SliceType: + if key.stop > len(self.head): self.cache(key.stop - len(self.head)) + return self.head[key.start, key.stop] + else: + if key >= len(self.head): self.cache(key + 1 - len(self.head)) + return self.head[key] + + + +class RListDelta: + """Note a difference from one iterator (A) to another (B) + + The min, max pairs are indicies which stand for the half-open + interval (min, max], and elemlist is a list of all the elements in + A which fall within this interval. + + These are produced by the function RList.Deltas(...) + + """ + def __init__(self, (min, max), elemlist): + self.min, self.max = min, max + self.elemlist = elemlist + + + +class RList: + """Tools for signatures, diffing, and patching an iterator + + This class requires that the iterators involved are yielding + objects that have .index and .data attributes. Two objects with + the same .data attribute are supposed to be equivalent. The + iterator must also yield the objects in increasing order with + respect to the .index attribute. + + """ + blocksize = 100 + + def Signatures(iter): + """Return iterator of signatures from stream of pairs + + Each signature is an ordered pair (last index sig applies to, + SHA digest of data) + + """ + i, s = 0, sha.new() + for iter_elem in iter: + s.update(marshal.dumps(iter_elem.data)) + i = i+1 + if i == RList.blocksize: + yield (iter_elem.index, s.digest()) + i, s = 0, sha.new() + if i != 0: yield (iter_elem.index, s.digest()) + + def sig_one_block(iter_or_list): + """Return the digest portion of a signature on given list""" + s = sha.new() + for iter_elem in iter_or_list: s.update(marshal.dumps(iter_elem.data)) + return s.digest() + + def Deltas(remote_sigs, iter): + """Return iterator of Delta objects that bring iter to remote""" + def get_before(index, iter): + """Return elements in iter whose index is before or equal index + iter needs to be pushable + """ + l = [] + while 1: + try: iter_elem = iter.next() + except StopIteration: return l + if iter_elem.index > index: break + l.append(iter_elem) + iter.push(iter_elem) + return l + + if not isinstance(iter, CachingIter): iter = CachingIter(iter) + oldindex = None + for (rs_index, rs_digest) in remote_sigs: + l = get_before(rs_index, iter) + if rs_digest != RList.sig_one_block(l): + yield RListDelta((oldindex, rs_index), l) + oldindex = rs_index + + def patch_once(basis, delta): + """Apply one delta to basis to return original iterator + + This returns original iterator up to and including the max range + of delta, then stop. basis should be pushable. + + """ + # Return elements of basis until start of delta range + for basis_elem in basis: + if basis_elem.index > delta.min: + basis.push(basis_elem) + break + yield basis_elem + + # Yield elements of delta... + for elem in delta.elemlist: yield elem + + # Finally, discard basis until end of delta range + for basis_elem in basis: + if basis_elem.index > delta.max: + basis.push(basis_elem) + break + + def Patch(basis, deltas): + """Apply a delta stream to basis iterator, yielding original""" + if not isinstance(basis, CachingIter): basis = CachingIter(basis) + for d in deltas: + for elem in RList.patch_once(basis, d): yield elem + for elem in basis: yield elem + + def get_difference_once(basis, delta): + """From one delta, find differences from basis + + Will return pairs (basis_elem, new_elem) where basis_elem is + the element from the basis iterator and new_elem is the + element from the other iterator. If either is missing None + will take its place. If both are present iff two have the + same index. + + """ + # Discard any elements of basis before delta starts + for basis_elem in basis: + if basis_elem.index > delta.min: + basis.push(basis_elem) + break + + # In range compare each one by one + di, boverflow, doverflow = 0, None, None + while 1: + # Set indicies and data, or mark if at end of range already + try: + basis_elem = basis.next() + if basis_elem.index > delta.max: + basis.push(basis_elem) + boverflow = 1 + except StopIteration: boverflow = 1 + if di >= len(delta.elemlist): doverflow = 1 + else: delta_elem = delta.elemlist[di] + + if boverflow and doverflow: break + elif boverflow: + yield (None, delta_elem) + di = di+1 + elif doverflow: yield (basis_elem, None) + + # Now can assume that everything is in range + elif basis_elem.index > delta_elem.index: + yield (None, delta_elem) + basis.push(basis_elem) + di = di+1 + elif basis_elem.index == delta_elem.index: + if basis_elem.data != delta_elem.data: + yield (basis_elem, delta_elem) + di = di+1 + else: yield (basis_elem, None) + + def Dissimilar(basis, deltas): + """Return iter of differences from delta iter and basis iter""" + if not isinstance(basis, CachingIter): basis = CachingIter(basis) + for d in deltas: + for triple in RList.get_difference_once(basis, d): yield triple + +MakeStatic(RList) diff --git a/rdiff-backup/src/robust.py b/rdiff-backup/src/robust.py new file mode 100644 index 0000000..c23ff6a --- /dev/null +++ b/rdiff-backup/src/robust.py @@ -0,0 +1,537 @@ +import tempfile +execfile("rpath.py") + +####################################################################### +# +# robust - code which prevents mirror from being corrupted, error-recovery +# +# Ideally no matter an instance of rdiff-backup gets aborted, no +# information should get lost. The target directory should be left in +# a coherent state, and later instances of rdiff-backup should clean +# things up so there is no sign that anything ever got aborted or +# failed. +# +# Thus, files should be updated in an atomic way as possible. Each +# file should be updated (and the corresponding diff files written) or +# not, and it should be clear which happened. In general, I don't +# think this is possible, since the creation of the diff files and the +# changing of updated files cannot be guarateed to happen together. +# It is possible, I think, to record various information to files +# which would allow a later process to figure out what the last +# operation was, but this would add several file operations to the +# processing of each file, and I don't think, would be a good +# tradeoff. +# +# The compromise reached here is that diff files should be created +# just before the mirror files are updated, and each file update +# should be done with a rename operation on a file in the same +# directory. Furthermore, every once in a while, rdiff-backup will +# record which file it just finished processing. If any fatal errors +# are caught, it will also record the last processed file. Future +# instances may not know exactly when the previous instance was +# aborted, but they will be able to narrow down the possibilities. + +class RobustAction: + """Represents a file operation to be accomplished later""" + def __init__(self, init_thunk, final_thunk, error_thunk): + """RobustAction initializer + + All the thunks are functions whose return value will be + ignored. init_thunk should not make any irreversible changes + but prepare for the writing of the important data. final_thunk + should be as short as possible and do the real work. + error_thunk is run if there is an error in init_thunk or + final_thunk. Errors in init_thunk should be corrected by + error_thunk as if nothing had been run in the first place. + The functions take no arguments except for error_thunk, which + receives the exception as its only argument. + + """ + self.init_thunk = init_thunk + self.final_thunk = final_thunk + self.error_thunk = error_thunk + + def execute(self): + """Actually run the operation""" + try: + self.init_thunk() + self.final_thunk() + except Exception, exp: # Catch all errors + Log.exception() + self.error_thunk(exp) + raise exp + + +class Robust: + """Contains various file operations made safer using tempfiles""" + null_action = RobustAction(lambda: None, lambda: None, lambda e: None) + def chain(robust_action_list): + """Return chain tying together a number of robust actions + + The whole chain will be aborted if some error occurs in + initialization stage of any of the component actions. + + """ + ras_with_completed_inits = [] + def init(): + for ra in robust_action_list: + ras_with_completed_inits.append(ra) + ra.init_thunk() + def final(): + for ra in robust_action_list: ra.final_thunk() + def error(exp): + for ra in ras_with_completed_inits: ra.error_thunk(exp) + return RobustAction(init, final, error) + + def chain_nested(robust_action_list): + """Like chain but final actions performed in reverse order""" + ras_with_completed_inits = [] + def init(): + for ra in robust_action_list: + ras_with_completed_inits.append(ra) + ra.init_thunk() + def final(): + ralist_copy = robust_action_list[:] + ralist_copy.reverse() + for ra in ralist_copy: ra.final_thunk() + def error(exp): + for ra in ras_with_completed_inits: ra.error_thunk(exp) + return RobustAction(init, final, error) + + def make_tf_robustaction(init_thunk, tempfiles, final_renames = None): + """Shortcut RobustAction creator when only tempfiles involved + + Often the robust action will just consist of some initial + stage, renaming tempfiles in the final stage, and deleting + them if there is an error. This function makes it easier to + create RobustActions of that type. + + """ + assert type(tempfiles) is types.TupleType, tempfiles + if final_renames is None: final = lambda: None + else: + assert len(tempfiles) == len(final_renames) + def final(): # rename tempfiles to final positions + for i in range(len(tempfiles)): + final_name = final_renames[i] + if final_name: + if final_name.isdir(): # Cannot rename over directory + final_name.delete() + tempfiles[i].rename(final_name) + def error(exp): + for tf in tempfiles: tf.delete() + return RobustAction(init_thunk, final, error) + + def copy_action(rorpin, rpout): + """Return robust action copying rorpin to rpout + + The source can be a rorp or an rpath. Does not recurse. If + directories copied, then just exit (output directory not + overwritten). + + """ + tfl = [None] # Need mutable object that init and final can access + def init(): + if not (rorpin.isdir() and rpout.isdir()): # already a dir + tfl[0] = TempFileManager.new(rpout) + if rorpin.isreg(): tfl[0].write_from_fileobj(rorpin.open("rb")) + else: RPath.copy(rorpin, tf) + def final(): + if tfl[0] and tfl[0].lstat(): + if rpout.isdir(): rpout.delete() + tfl[0].rename(rpout) + return RobustAction(init, final, lambda e: tfl[0] and tfl[0].delete()) + + def copy_with_attribs_action(rorpin, rpout): + """Like copy_action but also copy attributes""" + tfl = [None] # Need mutable object that init and final can access + def init(): + if not (rorpin.isdir() and rpout.isdir()): # already a dir + tfl[0] = TempFileManager.new(rpout) + if rorpin.isreg(): tfl[0].write_from_fileobj(rorpin.open("rb")) + else: RPath.copy(rorpin, tfl[0]) + if tfl[0].lstat(): # Some files, like sockets, won't be created + RPathStatic.copy_attribs(rorpin, tfl[0]) + def final(): + if rorpin.isdir() and rpout.isdir(): + RPath.copy_attribs(rorpin, rpout) + elif tfl[0] and tfl[0].lstat(): + if rpout.isdir(): rpout.delete() + tfl[0].rename(rpout) + return RobustAction(init, final, lambda e: tfl[0] and tfl[0].delete()) + + def copy_attribs_action(rorpin, rpout): + """Return action which just copies attributes + + Copying attributes is already pretty atomic, so just run + normal sequence. + + """ + def final(): RPath.copy_attribs(rorpin, rpout) + return RobustAction(lambda: None, final, lambda e: None) + + def symlink_action(rpath, linktext): + """Return symlink action by moving one file over another""" + tf = TempFileManager.new(rpath) + def init(): tf.symlink(linktext) + return Robust.make_tf_robustaction(init, (tf,), (rpath,)) + + def destructive_write_action(rp, s): + """Return action writing string s to rpath rp in robust way + + This will overwrite any data currently in rp. + + """ + tf = TempFileManager.new(rp) + def init(): + fp = tf.open("wb") + fp.write(s) + assert not fp.close() + tf.setdata() + return Robust.make_tf_robustaction(init, (tf,), (rp,)) + +MakeStatic(Robust) + + +class TempFileManager: + """Manage temp files""" + + # This is a connection-specific list of temp files, to be cleaned + # up before rdiff-backup exits. + _tempfiles = [] + + # To make collisions less likely, this gets put in the file name + # and incremented whenever a new file is requested. + _tfindex = 0 + + def new(cls, rp_base, same_dir = 1): + """Return new tempfile that isn't in use. + + If same_dir, tempfile will be in same directory as rp_base. + Otherwise, use tempfile module to get filename. + + """ + conn = rp_base.conn + if conn is not Globals.local_connection: + return conn.TempFileManager.new(rp_base, same_dir) + + def find_unused(conn, dir): + """Find an unused tempfile with connection conn in directory dir""" + while 1: + if cls._tfindex > 100000000: + Log("Resetting index", 2) + cls._tfindex = 0 + tf = TempFile(conn, os.path.join(dir, + "rdiff-backup.tmp.%d" % cls._tfindex)) + cls._tfindex = cls._tfindex+1 + if not tf.lstat(): return tf + + if same_dir: tf = find_unused(conn, rp_base.dirsplit()[0]) + else: tf = TempFile(conn, tempfile.mktemp()) + cls._tempfiles.append(tf) + return tf + + def remove_listing(cls, tempfile): + """Remove listing of tempfile""" + if Globals.local_connection is not tempfile.conn: + tempfile.conn.TempFileManager.remove_listing(tempfile) + elif tempfile in cls._tempfiles: cls._tempfiles.remove(tempfile) + + def delete_all(cls): + """Delete all remaining tempfiles""" + for tf in cls._tempfiles[:]: tf.delete() + +MakeClass(TempFileManager) + + +class TempFile(RPath): + """Like an RPath, but keep track of which ones are still here""" + def rename(self, rp_dest): + """Rename temp file to permanent location, possibly overwriting""" + if self.isdir() and not rp_dest.isdir(): + # Cannot move a directory directly over another file + rp_dest.delete() + if (isinstance(rp_dest, DSRPath) and rp_dest.perms_delayed + and not self.hasfullperms()): + # If we are moving to a delayed perm directory, delay + # permission change on destination. + rp_dest.chmod(self.getperms()) + self.chmod(0700) + RPathStatic.rename(self, rp_dest) + TempFileManager.remove_listing(self) + + def delete(self): + RPath.delete(self) + TempFileManager.remove_listing(self) + + +class SaveState: + """Save state in the middle of backups for resuming later""" + _last_file_sym = None # RPath of sym pointing to last file processed + _last_file_definitive_rp = None # Touch this if last file is really last + _last_checkpoint_time = 0 # time in seconds of last checkpoint + _checkpoint_rp = None # RPath of checkpoint data pickle + + def init_filenames(cls, incrementing): + """Set rpaths of markers. Assume rbdir already set. + + If incrementing, then indicate increment operation, otherwise + indicate mirror. + + """ + if not Globals.isbackup_writer: + return Globals.backup_writer.SaveState.init_filenames(incrementing) + + assert Globals.local_connection is Globals.rbdir.conn, \ + Globals.rbdir.conn + if incrementing: cls._last_file_sym = Globals.rbdir.append( + "last-file-incremented.%s.snapshot" % Time.curtimestr) + else: cls._last_file_sym = Globals.rbdir.append( + "last-file-mirrored.%s.snapshot" % Time.curtimestr) + cls._checkpoint_rp = Globals.rbdir.append( + "checkpoint-data.%s.snapshot" % Time.curtimestr) + cls._last_file_definitive_rp = Globals.rbdir.append( + "last-file-definitive.%s.snapshot" % Time.curtimestr) + + def touch_last_file(cls): + """Touch last file marker, indicating backup has begun""" + cls._last_file_sym.touch() + + def touch_last_file_definitive(cls): + """Create last-file-definitive marker + + When a backup gets aborted, there may be time to indicate the + last file successfully processed, and this should be touched. + Sometimes when the abort is hard, there may be a last file + indicated, but further files since then have been processed, + in which case this shouldn't be touched. + + """ + cls._last_file_definitive_rp.touch() + + def record_last_file_action(cls, last_file_rorp): + """Action recording last file to be processed as symlink in rbdir + + last_file_rorp is None means that no file is known to have + been processed. + + """ + if last_file_rorp: + symtext = apply(os.path.join, + ('increments',) + last_file_rorp.index) + return Robust.symlink_action(cls._last_file_sym, symtext) + else: return RobustAction(lambda: None, cls.touch_last_file, + lambda exp: None) + + def checkpoint_inc_backup(cls, ITR, finalizer, last_file_rorp, + override = None): + """Save states of tree reducer and finalizer during inc backup + + If override is true, checkpoint even if one isn't due. + + """ + if not override and not cls.checkpoint_needed(): return + assert cls._checkpoint_rp, "_checkpoint_rp not set yet" + + cls._last_checkpoint_time = time.time() + Log("Writing checkpoint time %s" % cls._last_checkpoint_time, 7) + state_string = cPickle.dumps((ITR.getstate(), finalizer.getstate())) + Robust.chain([Robust.destructive_write_action(cls._checkpoint_rp, + state_string), + cls.record_last_file_action(last_file_rorp)]).execute() + + def checkpoint_mirror(cls, finalizer, last_file_rorp, override = None): + """For a mirror, only finalizer and last_file should be saved""" + if not override and not cls.checkpoint_needed(): return + if not cls._checkpoint_rp: + Log("Warning, _checkpoint_rp not set yet", 2) + return + + cls._last_checkpoint_time = time.time() + Log("Writing checkpoint time %s" % cls._last_checkpoint_time, 7) + state_string = cPickle.dumps(finalizer.getstate()) + Robust.chain([Robust.destructive_write_action(cls._checkpoint_rp, + state_string), + cls.record_last_file_action(last_file_rorp)]).execute() + + def checkpoint_needed(cls): + """Returns true if another checkpoint is called for""" + return (time.time() > cls._last_checkpoint_time + + Globals.checkpoint_interval) + + def checkpoint_remove(cls): + """Remove all checkpointing data after successful operation""" + for rp in Resume.get_relevant_rps(): rp.delete() + +MakeClass(SaveState) + + +class Resume: + """Check for old aborted backups and resume if necessary""" + _session_info_list = None # List of ResumeSessionInfo's, sorted by time + def FindTime(cls, index, later_than = 0): + """For a given index, find the appropriate time to use for inc + + If it is clear which time to use (because it is determined by + definitive records, or there are no aborted backup, etc.) then + just return the appropriate time. Otherwise, if an aborted + backup was last checkpointed before the index, assume that it + didn't get there, and go for the older time. If an inc file + is already present, the function will be rerun with later time + specified. + + """ + if Time.prevtime > later_than: return Time.prevtime # usual case + + for si in cls.get_sis_covering_index(index): + if si.time > later_than: return si.time + raise SkipFileException("Index %s already covered, skipping" % + str(index)) + + def get_sis_covering_index(cls, index): + """Return sorted list of SessionInfos which may cover index + + Aborted backup may be relevant unless index is lower and we + are sure that it didn't go further. + + """ + return filter(lambda session_info: + not ((session_info.last_index is None or + session_info.last_index < index) and + session_info.last_definitive), + cls._session_info_list) + + def SetSessionInfo(cls): + """Read data directory and initialize _session_info""" + silist = [] + rp_quad_dict = cls.group_rps_by_time(cls.get_relevant_rps()) + times = rp_quad_dict.keys() + times.sort() + for time in times: + silist.append(cls.quad_to_si(time, rp_quad_dict[time])) + cls._session_info_list = silist + + def get_relevant_rps(cls): + """Return list of relevant rpaths in rbdata directory""" + relevant_bases = ['last-file-incremented', 'last-file-mirrored', + 'checkpoint-data', 'last-file-definitive'] + rps = map(Globals.rbdir.append, Globals.rbdir.listdir()) + return filter(lambda rp: rp.isincfile() + and rp.getincbase_str() in relevant_bases, rps) + + def group_rps_by_time(cls, rplist): + """Take list of rps return time dict {time: quadlist} + + Times in seconds are the keys, values are triples of rps + [last-file-incremented, last-file-mirrored, checkpoint-data, + last-is-definitive]. + + """ + result = {} + for rp in rplist: + time = Time.stringtotime(rp.getinctime()) + if result.has_key(time): quadlist = result[time] + else: quadlist = [None, None, None, None] + base_string = rp.getincbase_str() + if base_string == 'last-file-incremented': quadlist[0] = rp + elif base_string == 'last-file-mirrored': quadlist[1] = rp + elif base_string == 'last-file-definitive': quadlist[3] = 1 + else: + assert base_string == 'checkpoint-data' + quadlist[2] = rp + result[time] = quadlist + return result + + def quad_to_si(cls, time, quad): + """Take time, quadlist, return associated ResumeSessionInfo""" + increment_sym, mirror_sym, checkpoint_rp, last_definitive = quad + assert not (increment_sym and mirror_sym) # both shouldn't exist + ITR, finalizer = None, None + if increment_sym: + mirror = None + last_index = cls.sym_to_index(increment_sym) + if checkpoint_rp: + ITR, finalizer = cls.unpickle_checkpoint(checkpoint_rp) + elif mirror_sym: + mirror = 1 + last_index = cls.sym_to_index(mirror_sym) + if checkpoint_rp: + finalizer = cls.unpickle_checkpoint(checkpoint_rp) + return ResumeSessionInfo(mirror, time, last_index, last_definitive, + finalizer, ITR) + + def sym_to_index(cls, sym_rp): + """Read last file sym rp, return last file index + + If sym_rp is not a sym at all, return None, indicating that no + file index was ever conclusively processed. + + """ + if not sym_rp.issym(): return None + link_components = sym_rp.readlink().split("/") + assert link_components[0] == 'increments' + return tuple(link_components[1:]) + + def unpickle_checkpoint(cls, checkpoint_rp): + """Read data from checkpoint_rp and return unpickled data + + Return value is pair finalizer state for a mirror checkpoint, + and (patch increment ITR, finalizer state) for increment + checkpoint. + + """ + fp = checkpoint_rp.open("rb") + data = fp.read() + fp.close() + return cPickle.loads(data) + + def ResumeCheck(cls): + """Return relevant ResumeSessionInfo if there's one we should resume + + Also if find RSI to resume, reset current time to old resume + time. + + """ + cls.SetSessionInfo() + if not cls._session_info_list: + if Globals.resume == 1: + Log.FatalError("User specified resume, but no data on " + "previous backup found.") + else: return None + else: + si = cls._session_info_list[-1] + if (Globals.resume == 1 or + (time.time() <= (si.time + Globals.resume_window) and + not Globals.resume == 0)): + Log("Resuming aborted backup dated %s" % + Time.timetopretty(si.time), 2) + Time.setcurtime(si.time) + return si + else: + Log("Last backup dated %s was aborted, but we aren't " + "resuming it." % Time.timetopretty(si.time), 2) + return None + assert 0 + +MakeClass(Resume) + + +class ResumeSessionInfo: + """Hold information about a previously aborted session""" + def __init__(self, mirror, time, last_index, + last_definitive, finalizer_state = None, ITR_state = None): + """Class initializer + + time - starting time in seconds of backup + mirror - true if backup was a mirror, false if increment + last_index - Last confirmed index processed by backup, or None + last_definitive - True is we know last_index is really last + finalizer_state - finalizer reducer state if available + ITR_state - For increment, ITM reducer state (assume mirror if NA) + + """ + self.time = time + self.mirror = mirror + self.last_index = last_index + self.last_definitive = last_definitive + self.ITR_state, self.finalizer_state, = ITR_state, finalizer_state diff --git a/rdiff-backup/src/rorpiter.py b/rdiff-backup/src/rorpiter.py new file mode 100644 index 0000000..5740ef8 --- /dev/null +++ b/rdiff-backup/src/rorpiter.py @@ -0,0 +1,248 @@ +execfile("robust.py") +from __future__ import generators +import tempfile + +####################################################################### +# +# rorpiter - Operations on Iterators of Read Only Remote Paths +# + +class RORPIterException(Exception): pass + +class RORPIter: + """Functions relating to iterators of Read Only RPaths + + The main structure will be an iterator that yields RORPaths. + Every RORPath has a "raw" form that makes it more amenable to + being turned into a file. The raw form of the iterator yields + each RORPath in the form of the tuple (index, data_dictionary, + files), where files is the number of files attached (usually 1 or + 0). After that, if a file is attached, it yields that file. + + """ + def ToRaw(rorp_iter): + """Convert a rorp iterator to raw form""" + for rorp in rorp_iter: + if rorp.file: + yield (rorp.index, rorp.data, 1) + yield rorp.file + else: yield (rorp.index, rorp.data, 0) + + def FromRaw(raw_iter): + """Convert raw rorp iter back to standard form""" + for index, data, num_files in raw_iter: + rorp = RORPath(index, data) + if num_files: + assert num_files == 1, "Only one file accepted right now" + rorp.setfile(RORPIter.getnext(raw_iter)) + yield rorp + + def ToFile(rorp_iter): + """Return file version of iterator""" + return FileWrappingIter(RORPIter.ToRaw(rorp_iter)) + + def FromFile(fileobj): + """Recover rorp iterator from file interface""" + return RORPIter.FromRaw(IterWrappingFile(fileobj)) + + def IterateRPaths(base_rp): + """Return an iterator yielding RPaths with given base rp""" + yield base_rp + if base_rp.isdir(): + dirlisting = base_rp.listdir() + dirlisting.sort() + for filename in dirlisting: + for rp in RORPIter.IterateRPaths(base_rp.append(filename)): + yield rp + + def Signatures(rp_iter): + """Yield signatures of rpaths in given rp_iter""" + for rp in rp_iter: + if rp.isplaceholder(): yield rp + else: + rorp = rp.getRORPath() + if rp.isreg(): rorp.setfile(Rdiff.get_signature(rp)) + yield rorp + + def GetSignatureIter(base_rp): + """Return a signature iterator recurring over the base_rp""" + return RORPIter.Signatures(RORPIter.IterateRPaths(base_rp)) + + def CollateIterators(*rorp_iters): + """Collate RORPath iterators by index + + So it takes two or more iterators of rorps and returns an + iterator yielding tuples like (rorp1, rorp2) with the same + index. If one or the other lacks that index, it will be None + + """ + # overflow[i] means that iter[i] has been exhausted + # rorps[i] is None means that it is time to replenish it. + iter_num = len(rorp_iters) + if iter_num == 2: + return RORPIter.Collate2Iters(rorp_iters[0], rorp_iters[1]) + overflow = [None] * iter_num + rorps = overflow[:] + + def setrorps(overflow, rorps): + """Set the overflow and rorps list""" + for i in range(iter_num): + if not overflow[i] and rorps[i] is None: + try: rorps[i] = rorp_iters[i].next() + except StopIteration: + overflow[i] = 1 + rorps[i] = None + + def getleastindex(rorps): + """Return the first index in rorps, assuming rorps isn't empty""" + return min(map(lambda rorp: rorp.index, + filter(lambda x: x, rorps))) + + def yield_tuples(iter_num, overflow, rorps): + while 1: + setrorps(overflow, rorps) + if not None in overflow: break + + index = getleastindex(rorps) + yieldval = [] + for i in range(iter_num): + if rorps[i] and rorps[i].index == index: + yieldval.append(rorps[i]) + rorps[i] = None + else: yieldval.append(None) + yield IndexedTuple(index, yieldval) + return yield_tuples(iter_num, overflow, rorps) + + def Collate2Iters(riter1, riter2): + """Special case of CollateIterators with 2 arguments + + This does the same thing but is faster because it doesn't have + to consider the >2 iterator case. Profiler says speed is + important here. + + """ + relem1, relem2 = None, None + while 1: + if not relem1: + try: relem1 = riter1.next() + except StopIteration: + if relem2: yield IndexedTuple(index2, (None, relem2)) + for relem2 in riter2: + yield IndexedTuple(relem2.index, (None, relem2)) + break + index1 = relem1.index + if not relem2: + try: relem2 = riter2.next() + except StopIteration: + if relem1: yield IndexedTuple(index1, (relem1, None)) + for relem1 in riter1: + yield IndexedTuple(relem1.index, (relem1, None)) + break + index2 = relem2.index + + if index1 < index2: + yield IndexedTuple(index1, (relem1, None)) + relem1 = None + elif index1 == index2: + yield IndexedTuple(index1, (relem1, relem2)) + relem1, relem2 = None, None + else: # index2 is less + yield IndexedTuple(index2, (None, relem2)) + relem2 = None + + def getnext(iter): + """Return the next element of an iterator, raising error if none""" + try: next = iter.next() + except StopIteration: raise RORPIterException("Unexpected end to iter") + return next + + def GetDiffIter(sig_iter, new_iter): + """Return delta iterator from sig_iter to new_iter + + The accompanying file for each will be a delta as produced by + rdiff, unless the destination file does not exist, in which + case it will be the file in its entirety. + + sig_iter may be composed of rorps, but new_iter should have + full RPaths. + + """ + collated_iter = RORPIter.CollateIterators(sig_iter, new_iter) + for rorp, rp in collated_iter: yield RORPIter.diffonce(rorp, rp) + + def diffonce(sig_rorp, new_rp): + """Return one diff rorp, based from signature rorp and orig rp""" + if sig_rorp and sig_rorp.isreg() and new_rp and new_rp.isreg(): + diff_rorp = new_rp.getRORPath() + diff_rorp.setfile(Rdiff.get_delta_sigfileobj(sig_rorp.open("rb"), + new_rp)) + diff_rorp.set_attached_filetype('diff') + return diff_rorp + else: + # Just send over originial if diff isn't appropriate + if sig_rorp: sig_rorp.close_if_necessary() + if not new_rp: return RORPath(sig_rorp.index) + elif new_rp.isreg(): + diff_rorp = new_rp.getRORPath(1) + diff_rorp.set_attached_filetype('snapshot') + return diff_rorp + else: return new_rp.getRORPath() + + def PatchIter(base_rp, diff_iter): + """Patch the appropriate rps in basis_iter using diff_iter""" + basis_iter = RORPIter.IterateRPaths(base_rp) + collated_iter = RORPIter.CollateIterators(basis_iter, diff_iter) + for basisrp, diff_rorp in collated_iter: + RORPIter.patchonce_action(base_rp, basisrp, diff_rorp).execute() + + def patchonce_action(base_rp, basisrp, diff_rorp): + """Return action patching basisrp using diff_rorp""" + assert diff_rorp, "Missing diff index %s" % basisrp.index + if not diff_rorp.lstat(): + return RobustAction(lambda: None, basisrp.delete, lambda e: None) + + if basisrp and basisrp.isreg() and diff_rorp.isreg(): + assert diff_rorp.get_attached_filetype() == 'diff' + return Rdiff.patch_with_attribs_action(basisrp, diff_rorp) + else: # Diff contains whole file, just copy it over + if not basisrp: basisrp = base_rp.new_index(diff_rorp.index) + return Robust.copy_with_attribs_action(diff_rorp, basisrp) + +MakeStatic(RORPIter) + + + +class IndexedTuple: + """Like a tuple, but has .index + + This is used by CollateIterator above, and can be passed to the + IterTreeReducer. + + """ + def __init__(self, index, sequence): + self.index = index + self.data = tuple(sequence) + + def __len__(self): return len(self.data) + + def __getitem__(self, key): + """This only works for numerical keys (faster that way)""" + return self.data[key] + + def __cmp__(self, other): + assert isinstance(other, IndexedTuple) + if self.index < other.index: return -1 + elif self.index == other.index: return 0 + else: return 1 + + def __eq__(self, other): + if isinstance(other, IndexedTuple): + return self.index == other.index and self.data == other.data + elif type(other) is types.TupleType: + return self.data == other + else: return None + + def __str__(self): + assert len(self.data) == 2 + return "(%s, %s).%s" % (str(self.data[0]), str(self.data[1]), + str(self.index)) diff --git a/rdiff-backup/src/rpath.py b/rdiff-backup/src/rpath.py new file mode 100644 index 0000000..4e6cc8f --- /dev/null +++ b/rdiff-backup/src/rpath.py @@ -0,0 +1,704 @@ +execfile("connection.py") +import os, stat, re, sys, shutil + +####################################################################### +# +# rpath - Wrapper class around a real path like "/usr/bin/env" +# +# The RPath and associated classes make some function calls more +# convenient (e.g. RPath.getperms()) and also make working with files +# on remote systems transparent. +# + +class RPathException(Exception): pass + +class RPathStatic: + """Contains static methods for use with RPaths""" + def copyfileobj(inputfp, outputfp): + """Copies file inputfp to outputfp in blocksize intervals""" + blocksize = Globals.blocksize + while 1: + inbuf = inputfp.read(blocksize) + if not inbuf: break + outputfp.write(inbuf) + + def cmpfileobj(fp1, fp2): + """True if file objects fp1 and fp2 contain same data""" + blocksize = Globals.blocksize + while 1: + buf1 = fp1.read(blocksize) + buf2 = fp2.read(blocksize) + if buf1 != buf2: return None + elif not buf1: return 1 + + def check_for_files(*rps): + """Make sure that all the rps exist, raise error if not""" + for rp in rps: + if not rp.lstat(): + raise RPathException("File %s does not exist" % rp.path) + + def move(rpin, rpout): + """Move rpin to rpout, renaming if possible""" + try: RPath.rename(rpin, rpout) + except os.error: + RPath.copy(rpin, rpout) + rpin.delete() + + def copy(rpin, rpout): + """Copy RPath rpin to rpout. Works for symlinks, dirs, etc.""" + Log("Regular copying %s to %s" % (rpin.index, rpout.path), 6) + if not rpin.lstat(): + raise RPathException, ("File %s does not exist" % rpin.index) + + if rpout.lstat(): + if rpin.isreg() or not RPath.cmp(rpin, rpout): + rpout.delete() # easier to write that compare + else: return + + if rpin.isreg(): RPath.copy_reg_file(rpin, rpout) + elif rpin.isdir(): rpout.mkdir() + elif rpin.issym(): rpout.symlink(rpin.readlink()) + elif rpin.ischardev(): + major, minor = rpin.getdevnums() + rpout.makedev("c", major, minor) + elif rpin.isblkdev(): + major, minor = rpin.getdevnums() + rpout.makedev("b", major, minor) + elif rpin.isfifo(): rpout.mkfifo() + elif rpin.issock(): Log("Found socket, ignoring", 1) + else: raise RPathException("File %s has unknown type" % rpin.path) + + def copy_reg_file(rpin, rpout): + """Copy regular file rpin to rpout, possibly avoiding connection""" + try: + if rpout.conn is rpin.conn: + rpout.conn.shutil.copyfile(rpin.path, rpout.path) + rpout.data = {'type': rpin.data['type']} + return + except AttributeError: pass + rpout.write_from_fileobj(rpin.open("rb")) + + def cmp(rpin, rpout): + """True if rpin has the same data as rpout + + cmp does not compare file ownership, permissions, or times, or + examine the contents of a directory. + + """ + RPath.check_for_files(rpin, rpout) + if rpin.isreg(): + if not rpout.isreg(): return None + fp1, fp2 = rpin.open("rb"), rpout.open("rb") + result = RPathStatic.cmpfileobj(fp1, fp2) + if fp1.close() or fp2.close(): + raise RPathException("Error closing file") + return result + elif rpin.isdir(): return rpout.isdir() + elif rpin.issym(): + return rpout.issym() and (rpin.readlink() == rpout.readlink()) + elif rpin.ischardev(): + return rpout.ischardev() and \ + (rpin.getdevnums() == rpout.getdevnums()) + elif rpin.isblkdev(): + return rpout.isblkdev() and \ + (rpin.getdevnums() == rpout.getdevnums()) + elif rpin.isfifo(): return rpout.isfifo() + elif rpin.issock(): return rpout.issock() + else: raise RPathException("File %s has unknown type" % rpin.path) + + def copy_attribs(rpin, rpout): + """Change file attributes of rpout to match rpin + + Only changes the chmoddable bits, uid/gid ownership, and + timestamps, so both must already exist. + + """ + Log("Copying attributes from %s to %s" % (rpin.index, rpout.path), 7) + RPath.check_for_files(rpin, rpout) + if rpin.issym(): return # symlinks have no valid attributes + if Globals.change_ownership: apply(rpout.chown, rpin.getuidgid()) + rpout.chmod(rpin.getperms()) + if not rpin.isdev(): rpout.setmtime(rpin.getmtime()) + + def cmp_attribs(rp1, rp2): + """True if rp1 has the same file attributes as rp2 + + Does not compare file access times. If not changing + ownership, do not check user/group id. + + """ + RPath.check_for_files(rp1, rp2) + if Globals.change_ownership and rp1.getuidgid() != rp2.getuidgid(): + result = None + elif rp1.getperms() != rp2.getperms(): result = None + elif rp1.issym() and rp2.issym(): # Don't check times for some types + result = 1 + elif rp1.isblkdev() and rp2.isblkdev(): result = 1 + elif rp1.ischardev() and rp2.ischardev(): result = 1 + else: result = (rp1.getmtime() == rp2.getmtime()) + Log("Compare attribs %s and %s: %s" % (rp1.path, rp2.path, result), 7) + return result + + def copy_with_attribs(rpin, rpout): + """Copy file and then copy over attributes""" + RPath.copy(rpin, rpout) + RPath.copy_attribs(rpin, rpout) + + def quick_cmp_with_attribs(rp1, rp2): + """Quicker version of cmp_with_attribs + + Instead of reading all of each file, assume that regular files + are the same if the attributes compare. + + """ + if not RPath.cmp_attribs(rp1, rp2): return None + if rp1.isreg() and rp2.isreg() and (rp1.getlen() == rp2.getlen()): + return 1 + return RPath.cmp(rp1, rp2) + + def cmp_with_attribs(rp1, rp2): + """Combine cmp and cmp_attribs""" + return RPath.cmp_attribs(rp1, rp2) and RPath.cmp(rp1, rp2) + + def rename(rp_source, rp_dest): + """Rename rp_source to rp_dest""" + assert rp_source.conn is rp_dest.conn + Log("Renaming %s to %s" % (rp_source.path, rp_dest.path), 7) + rp_source.conn.os.rename(rp_source.path, rp_dest.path) + rp_dest.data = rp_source.data + rp_source.data = {'type': None} + + def tupled_lstat(filename): + """Like os.lstat, but return only a tuple, or None if os.error + + Later versions of os.lstat return a special lstat object, + which can confuse the pickler and cause errors in remote + operations. + + """ + try: return tuple(os.lstat(filename)) + except os.error: return None + + def cmp_recursive(rp1, rp2): + """True if rp1 and rp2 are at the base of same directories + + Includes only attributes, no file data. This function may not + be used in rdiff-backup but it comes in handy in the unit + tests. + + """ + rp1.setdata() + rp2.setdata() + dsiter1, dsiter2 = map(DestructiveStepping.Iterate_with_Finalizer, + [rp1, rp2], [1, None]) + result = Iter.equal(dsiter1, dsiter2, 1) + for i in dsiter1: pass # make sure all files processed anyway + for i in dsiter2: pass + return result + +MakeStatic(RPathStatic) + + +class RORPath(RPathStatic): + """Read Only RPath - carry information about a path + + These contain information about a file, and possible the file's + data, but do not have a connection and cannot be written to or + changed. The advantage of these objects is that they can be + communicated by encoding their index and data dictionary. + + """ + def __init__(self, index, data = None): + self.index = index + if data: self.data = data + else: self.data = {'type':None} # signify empty file + self.file = None + + def __eq__(self, other): + """Signal two files equivalent""" + if not Globals.change_ownership or self.issym() and other.issym(): + # Don't take file ownership into account when comparing + data1, data2 = self.data.copy(), other.data.copy() + for d in (data1, data2): + for key in ('uid', 'gid'): + if d.has_key(key): del d[key] + return self.index == other.index and data1 == data2 + else: return self.index == other.index and self.data == other.data + + def __str__(self): + """Pretty print file statistics""" + return "Index: %s\nData: %s" % (self.index, self.data) + + def __getstate__(self): + """Return picklable state + + This is necessary in case the RORPath is carrying around a + file object, which can't/shouldn't be pickled. + + """ + return (self.index, self.data) + + def __setstate__(self, rorp_state): + """Reproduce RORPath from __getstate__ output""" + self.index, self.data = rorp_state + + def make_placeholder(self): + """Make rorp into a placeholder + + This object doesn't contain any information about the file, + but, when passed along, may show where the previous stages are + in their processing. It is the RORPath equivalent of fiber. + + """ + self.data = {'placeholder': + ("It is actually good for placeholders to use" + "up a bit of memory, so the buffers get flushed" + "more often when placeholders move through." + "See the get_dissimilar docs for more info.")} + + def isplaceholder(self): + """True if the object is a placeholder""" + return self.data.has_key('placeholder') + + def lstat(self): + """Returns type of file + + The allowable types are None if the file doesn't exist, 'reg' + for a regular file, 'dir' for a directory, 'dev' for a device + file, 'fifo' for a fifo, 'sock' for a socket, and 'sym' for a + symlink. + + """ + return self.data['type'] + gettype = lstat + + def isdir(self): + """True if self is a dir""" + return self.data['type'] == 'dir' + + def isreg(self): + """True if self is a regular file""" + return self.data['type'] == 'reg' + + def issym(self): + """True if path is of a symlink""" + return self.data['type'] == 'sym' + + def isfifo(self): + """True if path is a fifo""" + return self.data['type'] == 'fifo' + + def ischardev(self): + """True if path is a character device file""" + return self.data['type'] == 'dev' and self.data['devnums'][0] == 'c' + + def isblkdev(self): + """True if path is a block device file""" + return self.data['type'] == 'dev' and self.data['devnums'][0] == 'b' + + def isdev(self): + """True if path is a device file""" + return self.data['type'] == 'dev' + + def issock(self): + """True if path is a socket""" + return self.data['type'] == 'sock' + + def getperms(self): + """Return permission block of file""" + return self.data['perms'] + + def getsize(self): + """Return length of file in bytes""" + return self.data['size'] + + def getuidgid(self): + """Return userid/groupid of file""" + return self.data['uid'], self.data['gid'] + + def getatime(self): + """Return access time in seconds""" + return self.data['atime'] + + def getmtime(self): + """Return modification time in seconds""" + return self.data['mtime'] + + def readlink(self): + """Wrapper around os.readlink()""" + return self.data['linkname'] + + def getdevnums(self): + """Return a devices major/minor numbers from dictionary""" + return self.data['devnums'][1:] + + def setfile(self, file): + """Right now just set self.file to be the already opened file""" + assert file and not self.file + def closing_hook(): self.file_already_open = None + self.file = RPathFileHook(file, closing_hook) + self.file_already_open = None + + def get_attached_filetype(self): + """If there is a file attached, say what it is + + Currently the choices are 'snapshot' meaning an exact copy of + something, and 'diff' for an rdiff style diff. + + """ + return self.data['filetype'] + + def set_attached_filetype(self, type): + """Set the type of the attached file""" + self.data['filetype'] = type + + def open(self, mode): + """Return file type object if any was given using self.setfile""" + if mode != "rb": raise RPathException("Bad mode %s" % mode) + if self.file_already_open: + raise RPathException("Attempt to open same file twice") + self.file_already_open = 1 + return self.file + + def close_if_necessary(self): + """If file is present, discard data and close""" + if self.file: + while self.file.read(Globals.blocksize): pass + assert not self.file.close(), \ + "Error closing file\ndata = %s\nindex = %s\n" % (self.data, + self.index) + self.file_already_open = None + + +class RPath(RORPath): + """Remote Path class - wrapper around a possibly non-local pathname + + This class contains a dictionary called "data" which should + contain all the information about the file sufficient for + identification (i.e. if two files have the the same (==) data + dictionary, they are the same file). + + """ + regex_chars_to_quote = re.compile("[\\\\\\\"\\$`]") + + def __init__(self, connection, base, index = (), data = None): + """RPath constructor + + connection = self.conn is the Connection the RPath will use to + make system calls, and index is the name of the rpath used for + comparison, and should be a tuple consisting of the parts of + the rpath after the base split up. For instance ("foo", + "bar") for "foo/bar" (no base), and ("local", "bin") for + "/usr/local/bin" if the base is "/usr". + + """ + self.conn = connection + self.index = index + self.base = base + self.path = apply(os.path.join, (base,) + self.index) + self.file = None + if data: self.data = data + else: self.setdata() + + def __str__(self): + return "Path: %s\nIndex: %s\nData: %s" % (self.path, self.index, + self.data) + + def __getstate__(self): + """Return picklable state + + The connection must be local because we can't pickle a + connection. Data and any attached file also won't be saved. + + """ + assert self.conn is Globals.local_connection + return (self.index, self.base, self.data) + + def __setstate__(self, rpath_state): + """Reproduce RPath from __getstate__ output""" + self.index, self.base, self.data = rpath_state + + def setdata(self): + """Create the data dictionary""" + statblock = self.conn.RPathStatic.tupled_lstat(self.path) + if statblock is None: + self.data = {'type':None} + return + data = {} + mode = statblock[stat.ST_MODE] + + if stat.S_ISREG(mode): + type = 'reg' + data['size'] = statblock[stat.ST_SIZE] + elif stat.S_ISDIR(mode): type = 'dir' + elif stat.S_ISCHR(mode): + type = 'dev' + data['devnums'] = ('c',) + self._getdevnums() + elif stat.S_ISBLK(mode): + type = 'dev' + data['devnums'] = ('b',) + self._getdevnums() + elif stat.S_ISFIFO(mode): type = 'fifo' + elif stat.S_ISLNK(mode): + type = 'sym' + data['linkname'] = self.conn.os.readlink(self.path) + elif stat.S_ISSOCK(mode): type = 'sock' + else: raise RPathException("Unknown type for %s" % self.path) + data['type'] = type + data['perms'] = stat.S_IMODE(mode) + data['uid'] = statblock[stat.ST_UID] + data['gid'] = statblock[stat.ST_GID] + + if not (type == 'sym' or type == 'dev'): + # mtimes on symlinks and dev files don't work consistently + data['mtime'] = long(statblock[stat.ST_MTIME]) + + if Globals.preserve_atime and not type == 'sym': + data['atime'] = long(statblock[stat.ST_ATIME]) + self.data = data + + def check_consistency(self): + """Raise an error if consistency of rp broken + + This is useful for debugging when the cache and disk get out + of sync and you need to find out where it happened. + + """ + temptype = self.data['type'] + self.setdata() + assert temptype == self.data['type'], \ + "\nName: %s\nOld: %s --> New: %s\n" % \ + (self.path, temptype, self.data['type']) + + def _getdevnums(self): + """Return tuple for special file (major, minor)""" + assert self.conn is Globals.local_connection + if Globals.exclude_device_files: + # No point in finding numbers because it will be excluded anyway + return () + s = os.lstat(self.path).st_rdev + return (s >> 8, s & 0xff) + + def chmod(self, permissions): + """Wrapper around os.chmod""" + self.conn.os.chmod(self.path, permissions) + self.data['perms'] = permissions + + def settime(self, accesstime, modtime): + """Change file modification times""" + Log("Setting time of %s to %d" % (self.path, modtime), 7) + self.conn.os.utime(self.path, (accesstime, modtime)) + self.data['atime'] = accesstime + self.data['mtime'] = modtime + + def setmtime(self, modtime): + """Set only modtime (access time to present)""" + Log("Setting time of %s to %d" % (self.path, modtime), 7) + self.conn.os.utime(self.path, (time.time(), modtime)) + self.data['mtime'] = modtime + + def chown(self, uid, gid): + """Set file's uid and gid""" + self.conn.os.chown(self.path, uid, gid) + self.data['uid'] = uid + self.data['gid'] = gid + + def mkdir(self): + Log("Making directory " + self.path, 6) + self.conn.os.mkdir(self.path) + self.setdata() + + def rmdir(self): + Log("Removing directory " + self.path, 6) + self.conn.os.rmdir(self.path) + self.data = {'type': None} + + def listdir(self): + """Return list of string paths returned by os.listdir""" + return self.conn.os.listdir(self.path) + + def symlink(self, linktext): + """Make symlink at self.path pointing to linktext""" + self.conn.os.symlink(linktext, self.path) + self.setdata() + assert self.issym() + + def mkfifo(self): + """Make a fifo at self.path""" + self.conn.os.mkfifo(self.path) + self.setdata() + assert self.isfifo() + + def touch(self): + """Make sure file at self.path exists""" + Log("Touching " + self.path, 7) + self.conn.open(self.path, "w").close() + self.setdata() + assert self.isreg() + + def hasfullperms(self): + """Return true if current process has full permissions on the file""" + if self.isowner(): return self.getperms() % 01000 >= 0700 + elif self.isgroup(): return self.getperms() % 0100 >= 070 + else: return self.getperms() % 010 >= 07 + + def readable(self): + """Return true if current process has read permissions on the file""" + if self.isowner(): return self.getperms() % 01000 >= 0400 + elif self.isgroup(): return self.getperms() % 0100 >= 040 + else: return self.getperms() % 010 >= 04 + + def executable(self): + """Return true if current process has execute permissions""" + if self.isowner(): return self.getperms() % 0200 >= 0100 + elif self.isgroup(): return self.getperms() % 020 >= 010 + else: return self.getperms() % 02 >= 01 + + def isowner(self): + """Return true if current process is owner of rp or root""" + uid = self.conn.Globals.get('process_uid') + return uid == 0 or uid == self.data['uid'] + + def isgroup(self): + """Return true if current process is in group of rp""" + return self.conn.Globals.get('process_gid') == self.data['gid'] + + def delete(self): + """Delete file at self.path + + The destructive stepping allows this function to delete + directories even if they have files and we lack permissions. + + """ + Log("Deleting %s" % self.path, 7) + self.setdata() + if not self.lstat(): return # must have been deleted in meantime + elif self.isdir(): + def helper(dsrp, base_init_output, branch_reduction): + if dsrp.isdir(): dsrp.rmdir() + else: dsrp.delete() + dsiter = DestructiveStepping.Iterate_from(self, None) + itm = IterTreeReducer(lambda x: None, lambda x,y: None, None, + helper) + for dsrp in dsiter: itm(dsrp) + itm.getresult() + else: self.conn.os.unlink(self.path) + self.setdata() + + def quote(self): + """Return quoted self.path for use with os.system()""" + return '"%s"' % self.regex_chars_to_quote.sub( + lambda m: "\\"+m.group(0), self.path) + + def normalize(self): + """Return RPath canonical version of self.path + + This just means that redundant /'s will be removed, including + the trailing one, even for directories. ".." components will + be retained. + + """ + newpath = "/".join(filter(lambda x: x and x != ".", + self.path.split("/"))) + if self.path[0] == "/": newpath = "/" + newpath + elif not newpath: newpath = "." + return self.__class__(self.conn, newpath, ()) + + def dirsplit(self): + """Returns a tuple of strings (dirname, basename) + + Basename is never '' unless self is root, so it is unlike + os.path.basename. If path is just above root (so dirname is + root), then dirname is ''. In all other cases dirname is not + the empty string. Also, dirsplit depends on the format of + self, so basename could be ".." and dirname could be a + subdirectory. For an atomic relative path, dirname will be + '.'. + + """ + normed = self.normalize() + if normed.path.find("/") == -1: return (".", normed.path) + comps = normed.path.split("/") + return "/".join(comps[:-1]), comps[-1] + + def append(self, ext): + """Return new RPath with same connection by adjoing ext""" + return self.__class__(self.conn, self.base, self.index + (ext,)) + + def new_index(self, index): + """Return similar RPath but with new index""" + return self.__class__(self.conn, self.base, index) + + def open(self, mode): + """Return open file. Supports modes "w" and "r".""" + return self.conn.open(self.path, mode) + + def write_from_fileobj(self, fp): + """Reads fp and writes to self.path. Closes both when done""" + Log("Writing file object to " + self.path, 7) + assert not self.lstat(), "File %s already exists" % self.path + outfp = self.open("wb") + RPath.copyfileobj(fp, outfp) + if fp.close() or outfp.close(): + raise RPathException("Error closing file") + self.setdata() + + def isincfile(self): + """Return true if path looks like an increment file""" + dotsplit = self.path.split(".") + if len(dotsplit) < 3: return None + timestring, ext = dotsplit[-2:] + if Time.stringtotime(timestring) is None: return None + return (ext == "snapshot" or ext == "dir" or + ext == "missing" or ext == "diff") + + def getinctype(self): + """Return type of an increment file""" + return self.path.split(".")[-1] + + def getinctime(self): + """Return timestring of an increment file""" + return self.path.split(".")[-2] + + def getincbase(self): + """Return the base filename of an increment file in rp form""" + if self.index: + return self.__class__(self.conn, self.base, self.index[:-1] + + ((".".join(self.index[-1].split(".")[:-2])),)) + else: return self.__class__(self.conn, + ".".join(self.base.split(".")[:-2]), ()) + + def getincbase_str(self): + """Return the base filename string of an increment file""" + return self.getincbase().dirsplit()[1] + + def makedev(self, type, major, minor): + """Make a special file with specified type, and major/minor nums""" + cmdlist = ['mknod', self.path, type, str(major), str(minor)] + if self.conn.os.spawnvp(os.P_WAIT, 'mknod', cmdlist) != 0: + RPathException("Error running %s" % cmdlist) + if type == 'c': datatype = 'chr' + elif type == 'b': datatype = 'blk' + else: raise RPathException + self.data = {'type': datatype, 'devnums': (type, major, minor)} + + def getRORPath(self, include_contents = None): + """Return read only version of self""" + rorp = RORPath(self.index, self.data) + if include_contents: rorp.setfile(self.open("rb")) + return rorp + + +class RPathFileHook: + """Look like a file, but add closing hook""" + def __init__(self, file, closing_thunk): + self.file = file + self.closing_thunk = closing_thunk + + def read(self, length = -1): return self.file.read(length) + def write(self, buf): return self.file.write(buf) + + def close(self): + """Close file and then run closing thunk""" + result = self.file.close() + self.closing_thunk() + return result diff --git a/rdiff-backup/src/setconnections.py b/rdiff-backup/src/setconnections.py new file mode 100644 index 0000000..07c6893 --- /dev/null +++ b/rdiff-backup/src/setconnections.py @@ -0,0 +1,205 @@ +execfile("highlevel.py") + +####################################################################### +# +# setconnections - Parse initial arguments and establish connections +# + +class SetConnectionsException(Exception): pass + +class SetConnections: + """Parse args and setup connections + + The methods in this class are used once by Main to parse file + descriptions like bescoto@folly.stanford.edu:/usr/bin/ls and to + set up the related connections. + + """ + # This is the schema that determines how rdiff-backup will open a + # pipe to the remote system. If the file is given as A:B, %s will + # be substituted with A in the schema. + __cmd_schema = 'ssh %s rdiff-backup --server' + + # This is a list of remote commands used to start the connections. + # The first is None because it is the local connection. + __conn_remote_cmds = [None] + + def InitRPs(cls, arglist, remote_schema = None, remote_cmd = None): + """Map the given file descriptions into rpaths and return list""" + if remote_schema: cls.__cmd_schema = remote_schema + if not arglist: return [] + desc_pairs = map(cls.parse_file_desc, arglist) + + if filter(lambda x: x[0], desc_pairs): # True if any host_info found + if remote_cmd: + Log.FatalError("The --remote-cmd flag is not compatible " + "with remote file descriptions.") + elif remote_schema: + Log("Remote schema option ignored - no remote file " + "descriptions.", 2) + + cmd_pairs = map(cls.desc2cmd_pairs, desc_pairs) + if remote_cmd: # last file description gets remote_cmd + cmd_pairs[-1] = (remote_cmd, cmd_pairs[-1][1]) + return map(cls.cmdpair2rp, cmd_pairs) + + def cmdpair2rp(cls, cmd_pair): + """Return RPath from cmd_pair (remote_cmd, filename)""" + cmd, filename = cmd_pair + if cmd: conn = cls.init_connection(cmd) + else: conn = Globals.local_connection + return RPath(conn, filename) + + def desc2cmd_pairs(cls, desc_pair): + """Return pair (remote_cmd, filename) from desc_pair""" + host_info, filename = desc_pair + if not host_info: return (None, filename) + else: return (cls.fill_schema(host_info), filename) + + def parse_file_desc(cls, file_desc): + """Parse file description returning pair (host_info, filename) + + In other words, bescoto@folly.stanford.edu::/usr/bin/ls => + ("bescoto@folly.stanford.edu", "/usr/bin/ls"). The + complication is to allow for quoting of : by a \. If the + string is not separated by :, then the host_info is None. + + """ + def check_len(i): + if i >= len(file_desc): + raise SetConnectionsException( + "Unexpected end to file description %s" % file_desc) + + host_info_list, i, last_was_quoted = [], 0, None + while 1: + if i == len(file_desc): + return (None, file_desc) + + if file_desc[i] == '\\': + i = i+1 + check_len(i) + last_was_quoted = 1 + elif (file_desc[i] == ":" and i > 0 and file_desc[i-1] == ":" + and not last_was_quoted): + host_info_list.pop() # Remove last colon from name + break + else: last_was_quoted = None + host_info_list.append(file_desc[i]) + i = i+1 + + check_len(i+1) + return ("".join(host_info_list), file_desc[i+1:]) + + def fill_schema(cls, host_info): + """Fills host_info into the schema and returns remote command""" + return cls.__cmd_schema % host_info + + def init_connection(cls, remote_cmd): + """Run remote_cmd, register connection, and then return it + + If remote_cmd is None, then the local connection will be + returned. This also updates some settings on the remote side, + like global settings, its connection number, and verbosity. + + """ + if not remote_cmd: return Globals.local_connection + + Log("Executing " + remote_cmd, 4) + stdin, stdout = os.popen2(remote_cmd) + conn_number = len(Globals.connections) + conn = PipeConnection(stdout, stdin, conn_number) + + cls.check_connection_version(conn) + Log("Registering connection %d" % conn_number, 7) + cls.init_connection_routing(conn, conn_number, remote_cmd) + cls.init_connection_settings(conn) + return conn + + def check_connection_version(cls, conn): + """Log warning if connection has different version""" + remote_version = conn.Globals.get('version') + if remote_version != Globals.version: + Log("Warning: Local version %s does not match remote version %s." + % (Globals.version, remote_version), 2) + + def init_connection_routing(cls, conn, conn_number, remote_cmd): + """Called by init_connection, establish routing, conn dict""" + Globals.connection_dict[conn_number] = conn + + conn.SetConnections.init_connection_remote(conn_number) + for other_remote_conn in Globals.connections[1:]: + conn.SetConnections.add_redirected_conn( + other_remote_conn.conn_number) + other_remote_conn.SetConnections.add_redirected_conn(conn_number) + + Globals.connections.append(conn) + cls.__conn_remote_cmds.append(remote_cmd) + + def init_connection_settings(cls, conn): + """Tell new conn about log settings and updated globals""" + conn.Log.setverbosity(Log.verbosity) + conn.Log.setterm_verbosity(Log.term_verbosity) + for setting_name in Globals.changed_settings: + conn.Globals.set(setting_name, Globals.get(setting_name)) + + def init_connection_remote(cls, conn_number): + """Run on server side to tell self that have given conn_number""" + Globals.connection_number = conn_number + Globals.local_connection.conn_number = conn_number + Globals.connection_dict[0] = Globals.connections[1] + Globals.connection_dict[conn_number] = Globals.local_connection + + def add_redirected_conn(cls, conn_number): + """Run on server side - tell about redirected connection""" + Globals.connection_dict[conn_number] = \ + RedirectedConnection(conn_number) + + def UpdateGlobal(cls, setting_name, val): + """Update value of global variable across all connections""" + for conn in Globals.connections: + conn.Globals.set(setting_name, val) + + def BackupInitConnections(cls, reading_conn, writing_conn): + """Backup specific connection initialization""" + reading_conn.Globals.set("isbackup_reader", 1) + writing_conn.Globals.set("isbackup_writer", 1) + cls.UpdateGlobal("backup_reader", reading_conn) + cls.UpdateGlobal("backup_writer", writing_conn) + + + def CloseConnections(cls): + """Close all connections. Run by client""" + assert not Globals.server + for conn in Globals.connections: conn.quit() + del Globals.connections[1:] # Only leave local connection + Globals.connection_dict = {0: Globals.local_connection} + Globals.backup_reader = Globals.isbackup_reader = \ + Globals.backup_writer = Globals.isbackup_writer = None + + def TestConnections(cls): + """Test connections, printing results""" + if len(Globals.connections) == 1: + print "No remote connections specified" + else: + for i in range(1, len(Globals.connections)): + cls.test_connection(i) + + def test_connection(cls, conn_number): + """Test connection. conn_number 0 is the local connection""" + print "Testing server started by: ", \ + cls.__conn_remote_cmds[conn_number] + conn = Globals.connections[conn_number] + try: + assert conn.pow(2,3) == 8 + assert conn.os.path.join("a", "b") == "a/b" + version = conn.reval("lambda: Globals.version") + except: + sys.stderr.write("Server tests failed\n") + raise + if not version == Globals.version: + print """Server may work, but there is a version mismatch: +Local version: %s +Remote version: %s""" % (Globals.version, version) + else: print "Server OK" + +MakeClass(SetConnections) diff --git a/rdiff-backup/src/static.py b/rdiff-backup/src/static.py new file mode 100644 index 0000000..2e97cd0 --- /dev/null +++ b/rdiff-backup/src/static.py @@ -0,0 +1,30 @@ +execfile("globals.py") + +####################################################################### +# +# static - MakeStatic and MakeClass +# +# These functions are used to make all the instance methods in a class +# into static or class methods. +# + +class StaticMethodsError(Exception): + pass + +def MakeStatic(cls): + """turn instance methods into static ones + + The methods (that don't begin with _) of any class that + subclasses this will be turned into static methods. + + """ + for name in dir(cls): + if name[0] != "_": + cls.__dict__[name] = staticmethod(cls.__dict__[name]) + + +def MakeClass(cls): + """Turn instance methods into classmethods. Ignore _ like above""" + for name in dir(cls): + if name[0] != "_": + cls.__dict__[name] = classmethod(cls.__dict__[name]) diff --git a/rdiff-backup/src/ttime.py b/rdiff-backup/src/ttime.py new file mode 100644 index 0000000..c8bb58e --- /dev/null +++ b/rdiff-backup/src/ttime.py @@ -0,0 +1,129 @@ +execfile("log.py") +import time, types + +####################################################################### +# +# ttime - Provide Time class, which contains time related functions. +# + +class TimeException(Exception): pass + +class Time: + """Functions which act on the time""" + _interval_conv_dict = {"s": 1, "m": 60, "h": 3600, + "D": 86400, "M": 30*86400, "Y": 365*86400} + + def setcurtime(cls, curtime = None): + """Sets the current time in curtime and curtimestr on all systems""" + t = curtime or time.time() + for conn in Globals.connections: + conn.Time.setcurtime_local(t, cls.timetostring(t)) + + def setcurtime_local(cls, timeinseconds, timestr): + """Only set the current time locally""" + cls.curtime = timeinseconds + cls.curtimestr = timestr + + def setprevtime(cls, timeinseconds): + """Sets the previous inc time in prevtime and prevtimestr""" + assert timeinseconds > 0, timeinseconds + timestr = cls.timetostring(timeinseconds) + for conn in Globals.connections: + conn.Time.setprevtime_local(timeinseconds, timestr) + + def setprevtime_local(cls, timeinseconds, timestr): + """Like setprevtime but only set the local version""" + cls.prevtime = timeinseconds + cls.prevtimestr = timestr + + def timetostring(cls, timeinseconds): + """Return w3 datetime compliant listing of timeinseconds""" + return time.strftime("%Y-%m-%dT%H" + Globals.time_separator + + "%M" + Globals.time_separator + "%S", + time.localtime(timeinseconds)) + cls.gettzd() + + def stringtotime(cls, timestring): + """Return time in seconds from w3 timestring + + If there is an error parsing the string, or it doesn't look + like a w3 datetime string, return None. + + """ + try: + date, daytime = timestring[:19].split("T") + year, month, day = map(int, date.split("-")) + hour, minute, second = map(int, + daytime.split(Globals.time_separator)) + assert 1900 < year < 2100, year + assert 1 <= month <= 12 + assert 1 <= day <= 31 + assert 0 <= hour <= 23 + assert 0 <= minute <= 59 + assert 0 <= second <= 61 # leap seconds + timetuple = (year, month, day, hour, minute, second, -1, -1, -1) + if time.daylight: + utc_in_secs = time.mktime(timetuple) - time.altzone + else: utc_in_secs = time.mktime(timetuple) - time.timezone + + return utc_in_secs + cls.tzdtoseconds(timestring[19:]) + except (TypeError, ValueError, AssertionError): return None + + def timetopretty(cls, timeinseconds): + """Return pretty version of time""" + return time.asctime(time.localtime(timeinseconds)) + + def stringtopretty(cls, timestring): + """Return pretty version of time given w3 time string""" + return cls.timetopretty(cls.stringtotime(timestring)) + + def intstringtoseconds(cls, interval_string): + """Convert a string expressing an interval to seconds""" + def error(): + raise TimeException('Bad interval string "%s"' % interval_string) + if len(interval_string) < 2: error() + try: num, ext = int(interval_string[:-1]), interval_string[-1] + except ValueError: error() + if not ext in cls._interval_conv_dict or num < 0: error() + return num*cls._interval_conv_dict[ext] + + def gettzd(cls): + """Return w3's timezone identification string. + + Expresed as [+/-]hh:mm. For instance, PST is -08:00. Zone is + coincides with what localtime(), etc., use. + + """ + if time.daylight: offset = -1 * time.altzone/60 + else: offset = -1 * time.timezone/60 + if offset > 0: prefix = "+" + elif offset < 0: prefix = "-" + else: return "Z" # time is already in UTC + + hours, minutes = map(abs, divmod(offset, 60)) + assert 0 <= hours <= 23 + assert 0 <= minutes <= 59 + return "%s%02d%s%02d" % (prefix, hours, + Globals.time_separator, minutes) + + def tzdtoseconds(cls, tzd): + """Given w3 compliant TZD, return how far ahead UTC is""" + if tzd == "Z": return 0 + assert len(tzd) == 6 # only accept forms like +08:00 for now + assert (tzd[0] == "-" or tzd[0] == "+") and \ + tzd[3] == Globals.time_separator + return -60 * (60 * int(tzd[:3]) + int(tzd[4:])) + + def cmp(cls, time1, time2): + """Compare time1 and time2 and return -1, 0, or 1""" + if type(time1) is types.StringType: + time1 = cls.stringtotime(time1) + assert time1 is not None + if type(time2) is types.StringType: + time2 = cls.stringtotime(time2) + assert time2 is not None + + if time1 < time2: return -1 + elif time1 == time2: return 0 + else: return 1 + +MakeClass(Time) diff --git a/rdiff-backup/testing/chdir-wrapper b/rdiff-backup/testing/chdir-wrapper new file mode 100755 index 0000000..413fcf2 --- /dev/null +++ b/rdiff-backup/testing/chdir-wrapper @@ -0,0 +1,15 @@ +#!/usr/bin/env python + +"""Used to emulate a remote connection by changing directories. + +If given an argument, will change to that directory, and then start +the server. Otherwise will start the server without a chdir. + +""" + +execfile("commontest.py") +rbexec("setconnections.py") + +if len(sys.argv) > 1: os.chdir(sys.argv[1]) +PipeConnection(sys.stdin, sys.stdout).Server() + diff --git a/rdiff-backup/testing/commontest.py b/rdiff-backup/testing/commontest.py new file mode 100644 index 0000000..5cd66d7 --- /dev/null +++ b/rdiff-backup/testing/commontest.py @@ -0,0 +1,19 @@ +"""commontest - Some functions and constants common to all test cases""" +import os + +SourceDir = "../src" +AbsCurdir = os.getcwd() # Absolute path name of current directory +AbsTFdir = AbsCurdir+"/testfiles" +MiscDir = "../misc" + +def rbexec(src_file): + """Changes to the source directory, execfile src_file, return""" + os.chdir(SourceDir) + execfile(src_file, globals()) + os.chdir(AbsCurdir) + +def Make(): + """Make sure the rdiff-backup script in the source dir is up-to-date""" + os.chdir(SourceDir) + os.system("python ./Make") + os.chdir(AbsCurdir) diff --git a/rdiff-backup/testing/connectiontest.py b/rdiff-backup/testing/connectiontest.py new file mode 100644 index 0000000..bcd98ea --- /dev/null +++ b/rdiff-backup/testing/connectiontest.py @@ -0,0 +1,201 @@ +import unittest, types, tempfile, os, sys +execfile("commontest.py") +rbexec("setconnections.py") + + +class LocalConnectionTest(unittest.TestCase): + """Test the dummy connection""" + lc = Globals.local_connection + + def testGetAttrs(self): + """Test getting of various attributes""" + assert type(self.lc.LocalConnection) is types.ClassType + try: self.lc.asotnuhaoseu + except NameError: pass + else: unittest.fail("NameError should be raised") + + def testSetattrs(self): + """Test setting of global attributes""" + self.lc.x = 5 + assert self.lc.x == 5 + self.lc.x = 7 + assert self.lc.x == 7 + + def testDelattrs(self): + """Testing deletion of attributes""" + self.lc.x = 5 + del self.lc.x + try: self.lc.x + except NameError: pass + else: unittest.fail("No exception raised") + + def testReval(self): + """Test string evaluation""" + assert self.lc.reval("pow", 2, 3) == 8 + + +class LowLevelPipeConnectionTest(unittest.TestCase): + """Test LLPC class""" + objs = ["Hello", ("Tuple", "of", "strings"), + [1, 2, 3, 4], 53.34235] + excts = [TypeError("te"), NameError("ne"), os.error("oe")] + filename = tempfile.mktemp() + + def testObjects(self): + """Try moving objects across connection""" + outpipe = open(self.filename, "w") + LLPC = LowLevelPipeConnection(None, outpipe) + for obj in self.objs: LLPC._putobj(obj, 3) + outpipe.close() + inpipe = open(self.filename, "r") + LLPC.inpipe = inpipe + for obj in self.objs: + gotten = LLPC._get() + assert gotten == (3, obj), gotten + inpipe.close + os.unlink(self.filename) + + def testBuf(self): + """Try moving a buffer""" + outpipe = open(self.filename, "w") + LLPC = LowLevelPipeConnection(None, outpipe) + inbuf = open("testfiles/various_file_types/regular_file", "r").read() + LLPC._putbuf(inbuf, 234) + outpipe.close() + inpipe = open(self.filename, "r") + LLPC.inpipe = inpipe + assert (234, inbuf) == LLPC._get() + inpipe.close() + os.unlink(self.filename) + + def testSendingExceptions(self): + """Exceptions should also be sent down pipe well""" + outpipe = open(self.filename, "w") + LLPC = LowLevelPipeConnection(None, outpipe) + for exception in self.excts: LLPC._putobj(exception, 0) + outpipe.close() + inpipe = open(self.filename, "r") + LLPC.inpipe = inpipe + for exception in self.excts: + incoming_exception = LLPC._get() + assert isinstance(incoming_exception[1], exception.__class__) + inpipe.close() + os.unlink(self.filename) + + +class PipeConnectionTest(unittest.TestCase): + """Test Pipe connection""" + regfilename = "testfiles/various_file_types/regular_file" + + def setUp(self): + """Must start a server for this""" + stdin, stdout = os.popen2("./server.py") + self.conn = PipeConnection(stdout, stdin) + #self.conn.Log.setverbosity(9) + #Log.setverbosity(9) + + def testBasic(self): + """Test some basic pipe functions""" + assert self.conn.ord("a") == 97 + assert self.conn.pow(2,3) == 8 + assert self.conn.reval("ord", "a") == 97 + + def testModules(self): + """Test module emulation""" + assert type(self.conn.tempfile.mktemp()) is types.StringType + assert self.conn.os.path.join("a", "b") == "a/b" + rp1 = RPath(self.conn, self.regfilename) + assert rp1.isreg() + + def testVirtualFiles(self): + """Testing virtual files""" + tempout = self.conn.open("testfiles/tempout", "w") + assert isinstance(tempout, VirtualFile) + regfilefp = open(self.regfilename, "r") + RPath.copyfileobj(regfilefp, tempout) + tempout.close() + regfilefp.close() + tempoutlocal = open("testfiles/tempout", "r") + regfilefp = open(self.regfilename, "r") + assert RPath.cmpfileobj(regfilefp, tempoutlocal) + tempoutlocal.close() + regfilefp.close() + os.unlink("testfiles/tempout") + + assert RPath.cmpfileobj(self.conn.open(self.regfilename, "r"), + open(self.regfilename, "r")) + + def testString(self): + """Test transmitting strings""" + assert "32" == self.conn.str(32) + assert 32 == self.conn.int("32") + + def testIterators(self): + """Test transmission of iterators""" + i = iter(map(RORPsubstitute, range(10))) + assert self.conn.hasattr(i, "next") + datastring = self.conn.reval("lambda i: i.next().data", i) + assert datastring == "Hello, there 0", datastring + + def testRPaths(self): + """Test transmission of rpaths""" + rp = RPath(self.conn, "testfiles/various_file_types/regular_file") + assert self.conn.reval("lambda rp: rp.data", rp) == rp.data + assert self.conn.reval("lambda rp: rp.conn is Globals.local_connection", rp) + + def testExceptions(self): + """Test exceptional results""" + self.assertRaises(os.error, self.conn.os.lstat, + "asoeut haosetnuhaoseu tn") + self.assertRaises(SyntaxError, self.conn.reval, + "aoetnsu aoehtnsu") + assert self.conn.pow(2,3) == 8 + + def tearDown(self): + """Bring down connection""" + self.conn.quit() + + +class RedirectedConnectionTest(unittest.TestCase): + """Test routing and redirection""" + def setUp(self): + """Must start two servers for this""" + #Log.setverbosity(9) + self.conna = SetConnections.init_connection("./server.py") + self.connb = SetConnections.init_connection("./server.py") + + def testBasic(self): + """Test basic operations with redirection""" + self.conna.Globals.set("tmp_connb", self.connb) + self.connb.Globals.set("tmp_conna", self.conna) + assert self.conna.Globals.get("tmp_connb") is self.connb + assert self.connb.Globals.get("tmp_conna") is self.conna + + self.conna.Test_SetConnGlobals(self.connb, "tmp_settest", 1) + assert self.connb.Globals.get("tmp_settest") + + assert self.conna.reval("Globals.get('tmp_connb').pow", 2, 3) == 8 + self.conna.reval("Globals.tmp_connb.reval", + "Globals.tmp_conna.Globals.set", "tmp_marker", 5) + assert self.conna.Globals.get("tmp_marker") == 5 + + def testRpaths(self): + """Test moving rpaths back and forth across connections""" + rp = RPath(self.conna, "foo") + self.connb.Globals.set("tmp_rpath", rp) + rp_returned = self.connb.Globals.get("tmp_rpath") + assert rp_returned.conn is rp.conn + assert rp_returned.path == rp.path + + def tearDown(self): + SetConnections.CloseConnections() + +class RORPsubstitute: + """Used in testIterators above to simulate a RORP""" + def __init__(self, i): + self.index = i + self.data = "Hello, there %d" % i + self.file = None + +if __name__ == "__main__": + unittest.main() diff --git a/rdiff-backup/testing/destructive_steppingtest.py b/rdiff-backup/testing/destructive_steppingtest.py new file mode 100644 index 0000000..528561d --- /dev/null +++ b/rdiff-backup/testing/destructive_steppingtest.py @@ -0,0 +1,72 @@ +from __future__ import generators +import unittest +execfile("commontest.py") +rbexec("destructive_stepping.py") + + + +class DSTest(unittest.TestCase): + def setUp(self): + self.lc = Globals.local_connection + self.noperms = RPath(self.lc, "testfiles/noperms") + Globals.change_source_perms = 1 + self.iteration_dir = RPath(self.lc, "testfiles/iteration-test") + + def testDSIter(self): + """Testing destructive stepping iterator from baserp""" + for i in range(2): + ds_iter = DestructiveStepping.Iterate_with_Finalizer( + self.noperms, 1) + noperms = ds_iter.next() + assert noperms.isdir() and noperms.getperms() == 0 + + bar = ds_iter.next() + assert bar.isreg() and bar.getperms() == 0 + barbuf = bar.open("rb").read() + assert len(barbuf) > 0 + + foo = ds_iter.next() + assert foo.isreg() and foo.getperms() == 0 + assert foo.getmtime() < 1000300000 + + fuz = ds_iter.next() + assert fuz.isreg() and fuz.getperms() == 0200 + fuzbuf = fuz.open("rb").read() + assert len(fuzbuf) > 0 + + self.assertRaises(StopIteration, ds_iter.next) + + def testIterate_from(self): + """Tests basic iteration by Iterate_from""" + iter = DestructiveStepping.Iterate_from(self.iteration_dir, 1) + l = [] + for rp in iter: l.append(rp.index) + assert l == [(), + ('1',), + ('2',), + ('3',), ('3','2'), ('3','3'), + ('4',), + ('5',), ('5','1'), ('5','2'), ('5','2','1'), + ('6',), ('6','3'), + ('6','3','1'), ('6','3','2'), ('6','4'), + ('7',)], l + + def testIterate_from_index(self): + """Test iteration from a given index""" + iter = DestructiveStepping.Iterate_from(self.iteration_dir, 1, ('3',)) + l = [] + for rp in iter: l.append(rp.index) + assert l == [('3','2'), ('3','3'), + ('4',), + ('5',), ('5','1'), ('5','2'), ('5','2','1'), + ('6',), ('6','3'), + ('6','3','1'), ('6','3','2'), ('6','4'), + ('7',)], l + iter = DestructiveStepping.Iterate_from(self.iteration_dir, 1, + ('6','3')) + l = [] + for rp in iter: l.append(rp.index) + assert l == [('6','3','1'), ('6','3','2'), ('6', '4'), + ('7',)], l + +if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/filelisttest.py b/rdiff-backup/testing/filelisttest.py new file mode 100644 index 0000000..f6166ed --- /dev/null +++ b/rdiff-backup/testing/filelisttest.py @@ -0,0 +1,35 @@ +import unittest, StringIO +execfile("commontest.py") +rbexec("filelist.py") + + +class FilelistTest(unittest.TestCase): + """Test Filelist class""" + def testFile2Iter(self): + """Test File2Iter function""" + filelist = """ +hello +goodbye +a/b/c + +test""" + baserp = RPath(Globals.local_connection, "/base") + i = Filelist.File2Iter(StringIO.StringIO(filelist), baserp) + assert i.next().path == "/base/hello" + assert i.next().path == "/base/goodbye" + assert i.next().path == "/base/a/b/c" + assert i.next().path == "/base/test" + self.assertRaises(StopIteration, i.next) + + def testmake_subdirs(self): + """Test Filelist.make_subdirs""" + self.assertRaises(os.error, os.lstat, "foo_delete_me") + Filelist.make_subdirs(RPath(Globals.local_connection, + "foo_delete_me/a/b/c/d")) + os.lstat("foo_delete_me") + os.lstat("foo_delete_me/a") + os.lstat("foo_delete_me/a/b") + os.lstat("foo_delete_me/a/b/c") + os.system("rm -rf foo_delete_me") + +if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/finaltest.py b/rdiff-backup/testing/finaltest.py new file mode 100644 index 0000000..c92a7d1 --- /dev/null +++ b/rdiff-backup/testing/finaltest.py @@ -0,0 +1,150 @@ +import unittest, os, re, sys +execfile("commontest.py") +rbexec("restore.py") + +"""Regression tests""" + +Globals.exclude_mirror_regexps = [re.compile(".*/rdiff-backup-data")] +Log.setverbosity(7) +Make() + +lc = Globals.local_connection + +class Local: + """This is just a place to put increments relative to the local + connection""" + def get_local_rp(extension): + return RPath(Globals.local_connection, "testfiles/" + extension) + + inc1rp = get_local_rp('increment1') + inc2rp = get_local_rp('increment2') + inc3rp = get_local_rp('increment3') + inc4rp = get_local_rp('increment4') + + rpout = get_local_rp('output') + rpout_inc = get_local_rp('output_inc') + rpout1 = get_local_rp('restoretarget1') + rpout2 = get_local_rp('restoretarget2') + rpout3 = get_local_rp('restoretarget3') + rpout4 = get_local_rp('restoretarget4') + + prefix = get_local_rp('.') + + vft_in = get_local_rp('vft_out') + vft_out = get_local_rp('increment2/various_file_types') + + timbar_in = get_local_rp('increment1/timbar.pyc') + timbar_out = get_local_rp('../timbar.pyc') # in cur directory + +class PathSetter(unittest.TestCase): + def setUp(self): + self.rb_schema = SourceDir + \ + "/rdiff-backup -v5 --remote-schema './chdir-wrapper %s' " + + def refresh(self, *rp_list): + """Reread data for the given rps""" + for rp in rp_list: rp.setdata() + + def set_connections(self, src_pre, src_back, dest_pre, dest_back): + """Set source and destination prefixes""" + if src_pre: self.src_prefix = "%s::%s" % (src_pre, src_back) + else: self.src_prefix = './' + + if dest_pre: self.dest_prefix = "%s::%s" % (dest_pre, dest_back) + else: self.dest_prefix = './' + + def exec_rb(self, *args): + """Run rdiff-backup on given arguments""" + arglist = [] + arglist.append(self.src_prefix + args[0]) + if len(args) > 1: + arglist.append(self.dest_prefix + args[1]) + assert len(args) == 2 + + cmdstr = self.rb_schema + ' '.join(arglist) + print "executing " + cmdstr + assert not os.system(cmdstr) + + def runtest(self): + # Deleting previous output + assert not os.system(MiscDir + '/myrm testfiles/output* ' + 'testfiles/restoretarget* testfiles/vft_out ' + 'timbar.pyc') + + # Backing up increment1 + self.exec_rb('testfiles/increment1', 'testfiles/output') + assert RPathStatic.cmp_recursive(Local.inc1rp, Local.rpout) + time.sleep(1) + + # Backing up increment2 + self.exec_rb('testfiles/increment2', 'testfiles/output') + assert RPathStatic.cmp_recursive(Local.inc2rp, Local.rpout) + time.sleep(1) + + # Backing up increment3 + self.exec_rb('testfiles/increment3', 'testfiles/output') + assert RPathStatic.cmp_recursive(Local.inc3rp, Local.rpout) + time.sleep(1) + + # Backing up increment4 + self.exec_rb('testfiles/increment4', 'testfiles/output') + assert RPathStatic.cmp_recursive(Local.inc4rp, Local.rpout) + + # Getting restore rps + inc_paths = self.getinc_paths("increments.", + "testfiles/output/rdiff-backup-data") + assert len(inc_paths) == 3 + + # Restoring increment1 + self.exec_rb(inc_paths[0], 'testfiles/restoretarget1') + assert RPathStatic.cmp_recursive(Local.inc1rp, Local.rpout1) + + # Restoring increment2 + self.exec_rb(inc_paths[1], 'testfiles/restoretarget2') + assert RPathStatic.cmp_recursive(Local.inc2rp, Local.rpout2) + + # Restoring increment3 + self.exec_rb(inc_paths[2], 'testfiles/restoretarget3') + assert RPathStatic.cmp_recursive(Local.inc3rp, Local.rpout3) + + # Test restoration of a few random files + vft_paths = self.getinc_paths("various_file_types.", + "testfiles/output/rdiff-backup-data/increments") + self.exec_rb(vft_paths[1], 'testfiles/vft_out') + self.refresh(Local.vft_in, Local.vft_out) + assert RPathStatic.cmp_recursive(Local.vft_in, Local.vft_out) + + timbar_paths = self.getinc_paths("timbar.pyc.", + "testfiles/output/rdiff-backup-data/increments") + self.exec_rb(timbar_paths[0]) + self.refresh(Local.timbar_in, Local.timbar_out) + assert RPath.cmp_with_attribs(Local.timbar_in, Local.timbar_out) + + # Make sure too many increment files not created + assert len(self.getinc_paths("nochange.", + "testfiles/output/rdiff-backup-data/increments")) == 0 + assert len(self.getinc_paths("", + "testfiles/output/rdiff-backup-data/increments/nochange")) == 0 + + def getinc_paths(self, basename, directory): + """Return increment.______.dir paths""" + incfiles = filter(lambda s: s.startswith(basename), + os.listdir(directory)) + incfiles.sort() + incrps = map(lambda f: RPath(lc, directory+"/"+f), incfiles) + return map(lambda x: x.path, filter(RPath.isincfile, incrps)) + + +class Final(PathSetter): + def testLocal(self): + """Run test sequence everything local""" + self.set_connections(None, None, None, None) + self.runtest() + + def testRemoteAll(self): + """Run test sequence everything remote""" + self.set_connections("test1/", '../', 'test2/tmp/', '../../') + self.runtest() + + +if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/find-max-ram.py b/rdiff-backup/testing/find-max-ram.py new file mode 100755 index 0000000..41685f1 --- /dev/null +++ b/rdiff-backup/testing/find-max-ram.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python + +"""find-max-ram - Returns the maximum amount of memory used by a program. + +Every half second, run ps with the appropriate commands, getting the +size of the program. Return max value. + +""" + +import os, sys, time + +def get_val(cmdstr): + """Runs ps and gets sum rss for processes making cmdstr + + Returns None if process not found. + + """ + cmd = ("ps -Ao cmd -o rss | grep '%s' | grep -v grep" % cmdstr) +# print "Running ", cmd + fp = os.popen(cmd) + lines = fp.readlines() + fp.close() + + if not lines: return None + else: return reduce(lambda x,y: x+y, map(read_ps_line, lines)) + +def read_ps_line(psline): + """Given a specially formatted line by ps, return rss value""" + l = psline.split() + assert len(l) >= 2 # first few are name, last one is rss + return int(l[-1]) + + +def main(cmdstr): + while get_val(cmdstr) is None: time.sleep(0.5) + + current_max = 0 + while 1: + rss = get_val(cmdstr) + print rss + if rss is None: break + current_max = max(current_max, rss) + time.sleep(0.5) + + print current_max + + +if __name__=="__main__": + + if len(sys.argv) != 2: + print """Usage: find-max-ram [command string] + + It will then run ps twice a second and keep totalling how much RSS + (resident set size) the process(es) whose ps command name contain the + given string use up. When there are no more processes found, it will + print the number and exit. + """ + sys.exit(1) + else: main(sys.argv[1]) + diff --git a/rdiff-backup/testing/highleveltest.py b/rdiff-backup/testing/highleveltest.py new file mode 100644 index 0000000..b1e6f8d --- /dev/null +++ b/rdiff-backup/testing/highleveltest.py @@ -0,0 +1,75 @@ +import unittest + +execfile("commontest.py") +rbexec("setconnections.py") + + +class RemoteMirrorTest(unittest.TestCase): + """Test mirroring""" + def setUp(self): + """Start server""" + Log.setverbosity(7) + Globals.change_source_perms = 1 + self.conn = SetConnections.init_connection("./server.py") + + self.inrp = RPath(Globals.local_connection, "testfiles/various_file_types") + self.outrp = RPath(self.conn, "testfiles/output") + self.rbdir = RPath(self.conn, "testfiles/output/rdiff-backup-data") + SetConnections.UpdateGlobal('rbdir', self.rbdir) + self.inc1 = RPath(Globals.local_connection, "testfiles/increment1") + self.inc2 = RPath(Globals.local_connection, "testfiles/increment2") + self.inc3 = RPath(Globals.local_connection, "testfiles/increment3") + self.inc4 = RPath(Globals.local_connection, "testfiles/increment4") + + SetConnections.BackupInitConnections(Globals.local_connection, + self.conn) + SetConnections.UpdateGlobal('checkpoint_interval', 3) + + def testMirror(self): + """Testing simple mirror""" + if self.outrp.lstat(): self.outrp.delete() + HighLevel.Mirror(self.inrp, self.outrp, None) + self.outrp.setdata() + assert RPath.cmp_recursive(self.inrp, self.outrp) + + def testMirror2(self): + """Test mirror with larger data set""" + if self.outrp.lstat(): self.outrp.delete() + for rp in [self.inc1, self.inc2, self.inc3, self.inc4]: + rp.setdata() + print "----------------- Starting ", rp.path + HighLevel.Mirror(rp, self.outrp, None) + #if rp is self.inc2: assert 0 + assert RPath.cmp_recursive(rp, self.outrp) + self.outrp.setdata() + + def testMirrorWithCheckpointing(self): + """Like testMirror but this time checkpoint""" + if self.outrp.lstat(): self.outrp.delete() + self.outrp.mkdir() + self.rbdir.mkdir() + Globals.add_regexp("testfiles/output/rdiff-backup-data", 1) + Time.setcurtime() + SaveState.init_filenames(None) + HighLevel.Mirror(self.inrp, self.outrp, 1) + self.outrp.setdata() + assert RPath.cmp_recursive(self.inrp, self.outrp) + + def testMirrorWithCheckpointing2(self): + """Larger data set""" + if self.outrp.lstat(): os.system(MiscDir+"/myrm %s" % self.outrp.path) + self.outrp.setdata() + self.outrp.mkdir() + self.rbdir.mkdir() + Globals.add_regexp("testfiles/output/rdiff-backup-data", 1) + Time.setcurtime() + SaveState.init_filenames(None) + for rp in [self.inc1, self.inc2, self.inc3, self.inc4]: + print "----------------- Starting ", rp.path + HighLevel.Mirror(rp, self.outrp, 1) + assert RPath.cmp_recursive(rp, self.outrp) + + def tearDown(self): SetConnections.CloseConnections() + + +if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/incrementtest.py b/rdiff-backup/testing/incrementtest.py new file mode 100644 index 0000000..d6cd17f --- /dev/null +++ b/rdiff-backup/testing/incrementtest.py @@ -0,0 +1,100 @@ +import unittest, os + +execfile("commontest.py") +rbexec("increment.py") + + +lc = Globals.local_connection +Globals.change_source_perms = 1 +Log.setverbosity(7) + +def getrp(ending): + return RPath(lc, "testfiles/various_file_types/" + ending) + +rf = getrp("regular_file") +exec1 = getrp("executable") +exec2 = getrp("executable2") +sig = getrp("regular_file.sig") +hl1, hl2 = map(getrp, ["two_hardlinked_files1", "two_hardlinked_files2"]) +test = getrp("test") +dir = getrp(".") +sym = getrp("symbolic_link") +nothing = getrp("nothing") + +target = RPath(lc, "testfiles/out") + +Time.setprevtime(999424113.24931) +prevtimestr = "2001-09-02T02:48:33-07:00" +t_pref = "testfiles/out.2001-09-02T02:48:33-07:00" +t_diff = "testfiles/out.2001-09-02T02:48:33-07:00.diff" + +class inctest(unittest.TestCase): + """Test the incrementRP function""" + def setUp(self): + pass + + def testreg(self): + """Test increment of regular files""" + Inc.Increment(rf, exec1, target) + rpd = RPath(lc, t_diff) + assert rpd.isreg() + assert RPath.cmp_attribs(rpd, exec1) + rpd.delete() + + def testmissing(self): + """Test creation of missing files""" + Inc.Increment(rf, nothing, target) + rp = RPath(lc, t_pref + ".missing") + assert rp.lstat() + rp.delete() + + def testsnapshot(self): + """Test making of a snapshot""" + Inc.Increment(rf, sym, target) + rp = RPath(lc, t_pref + ".snapshot") + assert rp.lstat() + assert RPath.cmp_attribs(rp, sym) + assert RPath.cmp(rp, sym) + rp.delete() + + Inc.Increment(sym, rf, target) + rp = RPath(lc, t_pref + ".snapshot") + assert rp.lstat() + assert RPath.cmp_attribs(rp, rf) + assert RPath.cmp(rp, rf) + rp.delete() + + def testdir(self): + """Test increment on dir""" + Inc.Increment(sym, dir, target) + rp = RPath(lc, t_pref + ".dir") + rp2 = RPath(lc, t_pref) + assert rp.lstat() + assert target.isdir() + assert RPath.cmp_attribs(dir, rp) + assert rp.isreg() + rp.delete() + target.delete() + + +inc1rp = RPath(lc, "testfiles/increment1") +inc2rp = RPath(lc, "testfiles/increment2") +inc3rp = RPath(lc, "testfiles/increment3") +inc4rp = RPath(lc, "testfiles/increment4") +rpout = RPath(lc, "testfiles/output") + +#class IncTreeTest(unittest.TestCase): +# def setUp(self): +# os.system("./myrm testfiles/output*") + +# def testinctree(self): +# """Test tree incrementing""" +# rpt1 = RPTriple(inc2rp, inc1rp, rpout) +# rpt2 = RPTriple(inc3rp, inc2rp, rpout) +# rpt3 = RPTriple(inc4rp, inc3rp, rpout) +# for rpt in [rpt1, rpt2, rpt3]: +# Time.setprevtime(Time.prevtime + 10000) +# Inc.IncrementTTree(TripleTree(rpt).destructive_stepping()) +# Time.setprevtime(999424113.24931) + +if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/iterfiletest.py b/rdiff-backup/testing/iterfiletest.py new file mode 100644 index 0000000..38dca4d --- /dev/null +++ b/rdiff-backup/testing/iterfiletest.py @@ -0,0 +1,27 @@ +import unittest, StringIO +execfile("commontest.py") +rbexec("iterfile.py") + + +class testIterFile(unittest.TestCase): + def setUp(self): + self.iter1maker = lambda: iter(range(50)) + self.iter2maker = lambda: iter(map(str, range(50))) + + def testConversion(self): + """Test iter to file conversion""" + for itm in [self.iter1maker, self.iter2maker]: + assert Iter.equal(itm(), + IterWrappingFile(FileWrappingIter(itm()))) + +class testBufferedRead(unittest.TestCase): + def testBuffering(self): + """Test buffering a StringIO""" + fp = StringIO.StringIO("12345678"*10000) + bfp = BufferedRead(fp) + assert bfp.read(5) == "12345" + assert bfp.read(4) == "6781" + assert len(bfp.read(75000)) == 75000 + + +if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/lazytest.py b/rdiff-backup/testing/lazytest.py new file mode 100644 index 0000000..fb464a1 --- /dev/null +++ b/rdiff-backup/testing/lazytest.py @@ -0,0 +1,326 @@ +from __future__ import generators +import unittest + +execfile("commontest.py") +rbexec("lazy.py") + +class Iterators(unittest.TestCase): + one_to_100 = lambda s: iter(range(1, 101)) + evens = lambda s: iter(range(2, 101, 2)) + odds = lambda s: iter(range(1, 100, 2)) + empty = lambda s: iter([]) + + def __init__(self, *args): + apply (unittest.TestCase.__init__, (self,) + args) + self.falseerror = self.falseerror_maker() + self.trueerror = self.trueerror_maker() + self.emptygen = self.emptygen_maker() + self.typeerror = self.typeerror_maker() + self.nameerror = self.nameerror_maker() + + def falseerror_maker(self): + yield None + yield 0 + yield [] + raise Exception + + def trueerror_maker(self): + yield 1 + yield "hello" + yield (2, 3) + raise Exception + + def nameerror_maker(self): + if 0: yield 1 + raise NameError + + def typeerror_maker(self): + yield 1 + yield 2 + raise TypeError + + def alwayserror(self, x): + raise Exception + + def emptygen_maker(self): + if 0: yield 1 + + +class IterEqualTestCase(Iterators): + """Tests for iter_equal function""" + def testEmpty(self): + """Empty iterators should be equal""" + assert Iter.equal(self.empty(), iter([])) + + def testNormal(self): + """See if normal iterators are equal""" + assert Iter.equal(iter((1,2,3)), iter((1,2,3))) + assert Iter.equal(self.odds(), iter(range(1, 100, 2))) + assert Iter.equal(iter((1,2,3)), iter(range(1, 4))) + + def testNormalInequality(self): + """See if normal unequals work""" + assert not Iter.equal(iter((1,2,3)), iter((1,2,4))) + assert not Iter.equal(self.odds(), iter(["hello", "there"])) + + def testGenerators(self): + """equals works for generators""" + def f(): + yield 1 + yield "hello" + def g(): + yield 1 + yield "hello" + assert Iter.equal(f(), g()) + + def testLength(self): + """Differently sized iterators""" + assert not Iter.equal(iter((1,2,3)), iter((1,2))) + assert not Iter.equal(iter((1,2)), iter((1,2,3))) + + +class FilterTestCase(Iterators): + """Tests for lazy_filter function""" + def testEmpty(self): + """empty iterators -> empty iterators""" + assert Iter.empty(Iter.filter(self.alwayserror, + self.empty())), \ + "Filtering an empty iterator should result in empty iterator" + + def testNum1(self): + """Test numbers 1 - 100 #1""" + assert Iter.equal(Iter.filter(lambda x: x % 2 == 0, + self.one_to_100()), + self.evens()) + assert Iter.equal(Iter.filter(lambda x: x % 2, + self.one_to_100()), + self.odds()) + + def testError(self): + """Should raise appropriate error""" + i = Iter.filter(lambda x: x, self.falseerror_maker()) + self.assertRaises(Exception, i.next) + + +class MapTestCase(Iterators): + """Test mapping of iterators""" + def testNumbers(self): + """1 to 100 * 2 = 2 to 200""" + assert Iter.equal(Iter.map(lambda x: 2*x, self.one_to_100()), + iter(range(2, 201, 2))) + + def testShortcut(self): + """Map should go in order""" + def f(x): + if x == "hello": + raise NameError + i = Iter.map(f, self.trueerror_maker()) + i.next() + self.assertRaises(NameError, i.next) + + def testEmpty(self): + """Map of an empty iterator is empty""" + assert Iter.empty(Iter.map(lambda x: x, iter([]))) + + +class CatTestCase(Iterators): + """Test concatenation of iterators""" + def testEmpty(self): + """Empty + empty = empty""" + assert Iter.empty(Iter.cat(iter([]), iter([]))) + + def testNumbers(self): + """1 to 50 + 51 to 100 = 1 to 100""" + assert Iter.equal(Iter.cat(iter(range(1, 51)), iter(range(51, 101))), + self.one_to_100()) + + def testShortcut(self): + """Process iterators in order""" + i = Iter.cat(self.typeerror_maker(), self.nameerror_maker()) + i.next() + i.next() + self.assertRaises(TypeError, i.next) + + +class AndOrTestCase(Iterators): + """Test And and Or""" + def testEmpty(self): + """And() -> true, Or() -> false""" + assert Iter.And(self.empty()) + assert not Iter.Or(self.empty()) + + def testAndShortcut(self): + """And should return if any false""" + assert Iter.And(self.falseerror_maker()) is None + + def testOrShortcut(self): + """Or should return if any true""" + assert Iter.Or(self.trueerror_maker()) == 1 + + def testNormalAnd(self): + """And should go through true iterators, picking last""" + assert Iter.And(iter([1,2,3,4])) == 4 + self.assertRaises(Exception, Iter.And, self.trueerror_maker()) + + def testNormalOr(self): + """Or goes through false iterators, picking last""" + assert Iter.Or(iter([0, None, []])) == [] + self.assertRaises(Exception, Iter.Or, self.falseerror_maker()) + + +class FoldingTest(Iterators): + """Test folding operations""" + def f(self, x, y): return x + y + + def testEmpty(self): + """Folds of empty iterators should produce defaults""" + assert Iter.foldl(self.f, 23, self.empty()) == 23 + assert Iter.foldr(self.f, 32, self.empty()) == 32 + + def testAddition(self): + """Use folds to sum lists""" + assert Iter.foldl(self.f, 0, self.one_to_100()) == 5050 + assert Iter.foldr(self.f, 0, self.one_to_100()) == 5050 + + def testLargeAddition(self): + """Folds on 10000 element iterators""" + assert Iter.foldl(self.f, 0, iter(range(1, 10001))) == 50005000 + self.assertRaises(RuntimeError, + Iter.foldr, self.f, 0, iter(range(1, 10001))) + + def testLen(self): + """Use folds to calculate length of lists""" + assert Iter.foldl(lambda x, y: x+1, 0, self.evens()) == 50 + assert Iter.foldr(lambda x, y: y+1, 0, self.odds()) == 50 + +class MultiplexTest(Iterators): + def testSingle(self): + """Test multiplex single stream""" + i_orig = self.one_to_100() + i2_orig = self.one_to_100() + i = Iter.multiplex(i_orig, 1)[0] + assert Iter.equal(i, i2_orig) + + def testTrible(self): + """Test splitting iterator into three""" + counter = [0] + def ff(x): counter[0] += 1 + i_orig = self.one_to_100() + i2_orig = self.one_to_100() + i1, i2, i3 = Iter.multiplex(i_orig, 3, ff) + assert Iter.equal(i1, i2) + assert Iter.equal(i3, i2_orig) + assert counter[0] == 100, counter + + def testDouble(self): + """Test splitting into two...""" + i1, i2 = Iter.multiplex(self.one_to_100(), 2) + assert Iter.equal(i1, self.one_to_100()) + assert Iter.equal(i2, self.one_to_100()) + + +class index: + """This is just used below to test the iter tree reducer""" + def __init__(self, index): + self.index = index + + +class TreeReducerTest(unittest.TestCase): + def setUp(self): + self.i1 = iter(map(index, [(), (1,), (2,), (3,)])) + self.i2 = iter(map(index, [(0,), (0,1), (0,1,0), (0,1,1), + (0,2), (0,2,1), (0,3)])) + + self.i1a = iter(map(index, [(), (1,)])) + self.i1b = iter(map(index, [(2,), (3,)])) + self.i2a = iter(map(index, [(0,), (0,1), (0,1,0)])) + self.i2b = iter(map(index, [(0,1,1), (0,2)])) + self.i2c = iter(map(index, [(0,2,1), (0,3)])) + + # The four following are used to make an ITR later + def number_of_index(self, index_obj): + if not index_obj.index: return 0 + else: return index_obj.index[-1] + + def sum_index(self, index_obj): + return reduce(lambda x,y: x+y, index_obj.index, 0) + + def add2(self, x, y): + #print "adding %d and %d" % (x,y) + return x+y + + def add3(self, x,y,z): + #print "ignoring %s, adding %d and %d" % (x,y,z) + return y+z + + def testTreeReducer(self): + """testing IterTreeReducer""" + itm = IterTreeReducer(self.number_of_index, self.add2, 0, self.add3) + for elem in self.i1: + val = itm(elem) + assert val, elem.index + itm.calculate_final_val() + assert itm.getresult() == 6, itm.getresult() + + itm2 = IterTreeReducer(self.sum_index, self.add2, 0, self.add3) + for elem in self.i2: + val = itm2(elem) + if elem.index == (): assert not val + else: assert val + assert itm2.getresult() == 12, itm2.getresult() + + def testTreeReducerState(self): + """Test saving and recreation of an IterTreeReducer""" + itm1a = IterTreeReducer(self.number_of_index, self.add2, 0, self.add3) + for elem in self.i1a: + val = itm1a(elem) + assert val, elem.index + itm1b = IterTreeReducer(self.number_of_index, self.add2, 0, self.add3, + itm1a.getstate()) + for elem in self.i1b: + val = itm1b(elem) + assert val, elem.index + itm1b.calculate_final_val() + assert itm1b.getresult() == 6, itm1b.getresult() + + itm2a = IterTreeReducer(self.sum_index, self.add2, 0, self.add3) + for elem in self.i2a: + val = itm2a(elem) + if elem.index == (): assert not val + else: assert val + itm2b = IterTreeReducer(self.sum_index, self.add2, 0, self.add3, + itm2a.getstate()) + for elem in self.i2b: + val = itm2b(elem) + if elem.index == (): assert not val + else: assert val + itm2c = IterTreeReducer(self.sum_index, self.add2, 0, self.add3, + itm2b.getstate()) + for elem in self.i2c: + val = itm2c(elem) + if elem.index == (): assert not val + else: assert val + assert itm2c.getresult() == 12, itm2c.getresult() + + def testTreeReducer2(self): + """Another test of the tree reducer""" + assert Iter.len(self.i1) == 4 + + hit_021_02 = [None, None] + def helper(indexobj, elem_init, branch_result): + if indexobj.index == (0,2): + assert hit_021_02[0] + hit_021_02[1] = 1 + elif indexobj.index == (0,2,1): + assert not hit_021_02[1] + hit_021_02[0] = 1 + return None + itm = IterTreeReducer(lambda x: None, lambda x,y: None, None, helper) + + for elem in self.i2: itm(elem) + itm.getresult() + assert hit_021_02 == [1,1] + + + +if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/rdifftest.py b/rdiff-backup/testing/rdifftest.py new file mode 100644 index 0000000..471eab7 --- /dev/null +++ b/rdiff-backup/testing/rdifftest.py @@ -0,0 +1,127 @@ +import unittest, random + +execfile("commontest.py") +rbexec("destructive_stepping.py") + + +Log.setverbosity(6) + +def MakeRandomFile(path): + """Writes a random file of length between 10000 and 100000""" + fp = open(path, "w") + randseq = [] + for i in xrange(random.randrange(5000, 30000)): + randseq.append(chr(random.randrange(256))) + fp.write("".join(randseq)) + fp.close() + + +class RdiffTest(unittest.TestCase): + """Test rdiff""" + lc = Globals.local_connection + basis = RPath(lc, "testfiles/basis") + new = RPath(lc, "testfiles/new") + output = RPath(lc, "testfiles/output") + delta = RPath(lc, "testfiles/delta") + signature = RPath(lc, "testfiles/signature") + + def testRdiffSig(self): + """Test making rdiff signatures""" + sig = RPath(self.lc, "testfiles/various_file_types/regular_file.sig") + sigfp = sig.open("r") + rfsig = Rdiff.get_signature(RPath(self.lc, "testfiles/various_file_types/regular_file")) + assert RPath.cmpfileobj(sigfp, rfsig) + sigfp.close() + rfsig.close() + + def testRdiffDeltaPatch(self): + """Test making deltas and patching files""" + rplist = [self.basis, self.new, self.delta, + self.signature, self.output] + for rp in rplist: + if rp.lstat(): rp.delete() + + for i in range(2): + MakeRandomFile(self.basis.path) + MakeRandomFile(self.new.path) + map(RPath.setdata, [self.basis, self.new]) + assert self.basis.lstat() and self.new.lstat() + self.signature.write_from_fileobj(Rdiff.get_signature(self.basis)) + assert self.signature.lstat() + self.delta.write_from_fileobj(Rdiff.get_delta(self.signature, + self.new)) + assert self.delta.lstat() + Rdiff.patch_action(self.basis, self.delta, self.output).execute() + assert RPath.cmp(self.new, self.output) + map(RPath.delete, rplist) + + def testWriteDelta(self): + """Test write delta feature of rdiff""" + rplist = [self.basis, self.new, self.delta, self.output] + MakeRandomFile(self.basis.path) + MakeRandomFile(self.new.path) + map(RPath.setdata, [self.basis, self.new]) + assert self.basis.lstat() and self.new.lstat() + + Rdiff.write_delta(self.basis, self.new, self.delta) + assert self.delta.lstat() + Rdiff.patch_action(self.basis, self.delta, self.output).execute() + assert RPath.cmp(self.new, self.output) + map(RPath.delete, rplist) + + def testRdiffRename(self): + """Rdiff replacing original file with patch outfile""" + rplist = [self.basis, self.new, self.delta, self.signature] + for rp in rplist: + if rp.lstat(): rp.delete() + + MakeRandomFile(self.basis.path) + MakeRandomFile(self.new.path) + map(RPath.setdata, [self.basis, self.new]) + assert self.basis.lstat() and self.new.lstat() + self.signature.write_from_fileobj(Rdiff.get_signature(self.basis)) + assert self.signature.lstat() + self.delta.write_from_fileobj(Rdiff.get_delta(self.signature, + self.new)) + assert self.delta.lstat() + Rdiff.patch_action(self.basis, self.delta).execute() + assert RPath.cmp(self.basis, self.new) + map(RPath.delete, rplist) + + def testCopy(self): + """Using rdiff to copy two files""" + rplist = [self.basis, self.new] + for rp in rplist: + if rp.lstat(): rp.delete() + + MakeRandomFile(self.basis.path) + MakeRandomFile(self.new.path) + map(RPath.setdata, rplist) + Rdiff.copy_action(self.basis, self.new).execute() + assert RPath.cmp(self.basis, self.new) + map(RPath.delete, rplist) + + def testPatchWithAttribs(self): + """Using rdiff to copy two files with attributes""" + rplist = [self.basis, self.new, self.delta] + for rp in rplist: + if rp.lstat(): rp.delete() + + MakeRandomFile(self.basis.path) + MakeRandomFile(self.new.path) + self.new.chmod(0401) + map(RPath.setdata, rplist) + Rdiff.write_delta(self.basis, self.new, self.delta) + RPath.copy_attribs(self.new, self.delta) + assert self.delta.getperms() == 0401 + + assert not self.basis == self.new + Rdiff.patch_with_attribs_action(self.basis, self.delta).execute() + if not self.basis == self.new: + print self.basis, self.new + assert 0 + map(RPath.delete, rplist) + + +if __name__ == '__main__': + unittest.main() diff --git a/rdiff-backup/testing/regressiontest.py b/rdiff-backup/testing/regressiontest.py new file mode 100644 index 0000000..5d4d27e --- /dev/null +++ b/rdiff-backup/testing/regressiontest.py @@ -0,0 +1,410 @@ +import unittest, os + +execfile("commontest.py") +rbexec("setconnections.py") + + +"""Regression tests + +This one must be run in the rdiff-backup directory, as it requres +chdir-wrapper, the various rdiff-backup files, and the directory +testfiles +""" + +Globals.set('change_source_perms', 1) +Globals.counter = 0 +Log.setverbosity(7) + +class Local: + """This is just a place to put increments relative to the local + connection""" + def get_local_rp(extension): + return RPath(Globals.local_connection, "testfiles/" + extension) + + inc1rp = get_local_rp('increment1') + inc2rp = get_local_rp('increment2') + inc3rp = get_local_rp('increment3') + inc4rp = get_local_rp('increment4') + + rpout = get_local_rp('output') + rpout_inc = get_local_rp('output_inc') + rpout1 = get_local_rp('restoretarget1') + rpout2 = get_local_rp('restoretarget2') + rpout3 = get_local_rp('restoretarget3') + rpout4 = get_local_rp('restoretarget4') + + noperms = get_local_rp('noperms') + noperms_out = get_local_rp('noperms_output') + + rootfiles = get_local_rp('root') + rootfiles2 = get_local_rp('root2') + rootfiles21 = get_local_rp('root2.1') + rootfiles_out = get_local_rp('root_output') + rootfiles_out2 = get_local_rp('root_output2') + + prefix = get_local_rp('.') + + +class PathSetter(unittest.TestCase): + def get_prefix_and_conn(self, path, return_path): + """Return (prefix, connection) tuple""" + if path: + return (return_path, + SetConnections.init_connection("python ./chdir-wrapper "+path)) + else: return ('./', Globals.local_connection) + + def get_src_rp(self, path): + return RPath(self.src_conn, self.src_prefix + path) + + def get_dest_rp(self, path): + return RPath(self.dest_conn, self.dest_prefix + path) + + def set_rbdir(self, rpout): + """Create rdiff-backup-data dir if not already, tell everyone""" + self.rbdir = self.rpout.append('rdiff-backup-data') + self.rpout.mkdir() + self.rbdir.mkdir() + SetConnections.UpdateGlobal('rbdir', self.rbdir) + + # Better safe than sorry - cover all possibilities + Globals.add_regexp("testfiles/output/rdiff-backup-data", 1) + Globals.add_regexp("./testfiles/output/rdiff-backup-data", 1) + Globals.add_regexp("../testfiles/output/rdiff-backup-data", 1) + Globals.add_regexp("../../testfiles/output/rdiff-backup-data", 1) + + def setPathnames(self, src_path, src_return, dest_path, dest_return): + """Start servers which will run in src_path and dest_path respectively + + If either is None, then no server will be run and local + process will handle that end. src_return and dest_return are + the prefix back to the original rdiff-backup directory. So + for instance is src_path is "test2/tmp", then src_return will + be '../'. + + """ + # Clear old data that may rely on deleted connections + Globals.isbackup_writer = None + Globals.isbackup_reader = None + Globals.rbdir = None + + print "Setting up connection" + self.src_prefix, self.src_conn = \ + self.get_prefix_and_conn(src_path, src_return) + self.dest_prefix, self.dest_conn = \ + self.get_prefix_and_conn(dest_path, dest_return) + SetConnections.BackupInitConnections(self.src_conn, self.dest_conn) + + os.system(MiscDir+"/myrm testfiles/output* testfiles/restoretarget* " + "testfiles/noperms_output testfiles/root_output " + "testfiles/unreadable_out") + self.inc1rp = self.get_src_rp("testfiles/increment1") + self.inc2rp = self.get_src_rp('testfiles/increment2') + self.inc3rp = self.get_src_rp('testfiles/increment3') + self.inc4rp = self.get_src_rp('testfiles/increment4') + + self.rpout_inc = self.get_dest_rp('testfiles/output_inc') + self.rpout1 = self.get_dest_rp('testfiles/restoretarget1') + self.rpout2 = self.get_dest_rp('testfiles/restoretarget2') + self.rpout3 = self.get_dest_rp('testfiles/restoretarget3') + self.rpout4 = self.get_dest_rp('testfiles/restoretarget4') + + self.rpout = self.get_dest_rp('testfiles/output') + self.set_rbdir(self.rpout) + + self.noperms = self.get_src_rp('testfiles/noperms') + self.noperms_out = self.get_dest_rp('testfiles/noperms_output') + + self.rootfiles = self.get_src_rp('testfiles/root') + self.rootfiles_out = self.get_dest_rp('testfiles/root_output') + self.rootfiles2 = self.get_src_rp('testfiles/root2') + self.rootfiles21 = self.get_src_rp('testfiles/root2.1') + self.rootfiles_out2 = self.get_dest_rp('testfiles/root_output2') + + self.one_unreadable = self.get_src_rp('testfiles/one_unreadable') + self.one_unreadable_out = self.get_dest_rp('testfiles/unreadable_out') + + def tearDown(self): + print "Taking down connections" + SetConnections.CloseConnections() + + +class IncrementTest(PathSetter): + def testLocalinc(self): + """Test self.incrementing, and then restoring, local""" + self.setPathnames(None, None, None, None) + self.runtest() + + def test_remote_src(self): + """Increment/Restore when source directory is remote""" + self.setPathnames('test1', '../', None, None) + self.runtest() + + def test_remote_dest(self): + """Increment/Restore when target directory is remote""" + self.setPathnames(None, None, 'test2', '../') + self.runtest() + + def test_remote_both(self): + """Increment/Restore when both directories are remote""" + self.setPathnames('test1', '../', 'test2/tmp', '../../') + self.runtest() + + def OldtestRecoveryLocal(self): + """Test to see if rdiff-backup can continue with bad increment""" + os.system(MiscDir+'/myrm testfiles/recovery_out_backup') + self.setPathnames(None, None, None, None) + Time.setprevtime(1006136450) + Time.setcurtime() + Globals.add_regexp('.*rdiff-backup-data', 1) + os.system('cp -a testfiles/recovery_out testfiles/recovery_out_backup') + recovery_in = self.get_src_rp('testfiles/recovery') + recovery_out = self.get_dest_rp('testfiles/recovery_out_backup') + recovery_inc = self.get_dest_rp('testfiles/recovery_out_backup/' + 'rdiff-backup-data/increments') + HighLevel.Mirror_and_increment(recovery_in, recovery_out, + recovery_inc) + # Should probably check integrity of increments, but for now + # allow if it doesn't during the Mirror_and_increment + + def OldtestRecoveryRemote(self): + """Test Recovery with both connections remote""" + os.system(MiscDir+'/myrm testfiles/recovery_out_backup') + self.setPathnames('test1', '../', 'test2/tmp', '../../') + Time.setprevtime(1006136450) + Time.setcurtime() + Globals.add_regexp('.*rdiff-backup-data', 1) + os.system('cp -a testfiles/recovery_out testfiles/recovery_out_backup') + recovery_in = self.get_src_rp('testfiles/recovery') + recovery_out = self.get_dest_rp('testfiles/recovery_out_backup') + recovery_inc = self.get_dest_rp('testfiles/recovery_out_backup/' + 'rdiff-backup-data/increments') + HighLevel.Mirror_and_increment(recovery_in, recovery_out, + recovery_inc) + # Should probably check integrity of increments, but for now + # allow if it doesn't during the Mirror_and_increment + + def runtest(self): + """After setting connections, etc., run actual test using this""" + Time.setcurtime() + SaveState.init_filenames(1) + HighLevel.Mirror(self.inc1rp, self.rpout) + assert RPath.cmp_recursive(Local.inc1rp, Local.rpout) + + Time.setcurtime() + Time.setprevtime(999500000) + HighLevel.Mirror_and_increment(self.inc2rp, self.rpout, self.rpout_inc) + assert RPath.cmp_recursive(Local.inc2rp, Local.rpout) + + Time.setcurtime() + Time.setprevtime(999510000) + HighLevel.Mirror_and_increment(self.inc3rp, self.rpout, self.rpout_inc) + assert RPath.cmp_recursive(Local.inc3rp, Local.rpout) + + Time.setcurtime() + Time.setprevtime(999520000) + HighLevel.Mirror_and_increment(self.inc4rp, self.rpout, self.rpout_inc) + assert RPath.cmp_recursive(Local.inc4rp, Local.rpout) + + + print "Restoring to self.inc4" + HighLevel.Restore(999530000, self.rpout, self.get_inctup(), + self.rpout4) + assert RPath.cmp_recursive(Local.inc4rp, Local.rpout4) + + print "Restoring to self.inc3" + HighLevel.Restore(999520000, self.rpout, self.get_inctup(), + self.rpout3) + assert RPath.cmp_recursive(Local.inc3rp, Local.rpout3) + + print "Restoring to self.inc2" + HighLevel.Restore(999510000, self.rpout, self.get_inctup(), + self.rpout2) + assert RPath.cmp_recursive(Local.inc2rp, Local.rpout2) + + print "Restoring to self.inc1" + HighLevel.Restore(999500000, self.rpout, self.get_inctup(), + self.rpout1) + assert RPath.cmp_recursive(Local.inc1rp, Local.rpout1) + + def get_inctup(self): + """Return inc tuples as expected by Restore.RestoreRecursive + + Assumes output increment directory is + testfiles/output_inc._____. + + """ + filenames = filter(lambda x: x.startswith("output_inc."), + Local.prefix.listdir()) + rplist = map(lambda x: Local.prefix.append(x), filenames) + return IndexedTuple((), (Local.prefix.append("output_inc"), rplist)) + + +class MirrorTest(PathSetter): + """Test some mirroring functions""" + def testLocalMirror(self): + """Test Local mirroring""" + self.setPathnames(None, None, None, None) + self.runtest() + + def testPartialLocalMirror(self): + """Test updating an existing directory""" + self.setPathnames(None, None, None, None) + self.run_partial_test() + + def testRemoteMirror(self): + """Mirroring when destination is remote""" + self.setPathnames(None, None, 'test1', '../') + self.runtest() + + def testPartialRemoteMirror(self): + """Partial mirroring when destination is remote""" + self.setPathnames(None, None, 'test1', '../') + self.run_partial_test() + + def testSourceRemoteMirror(self): + """Mirroring when source is remote""" + self.setPathnames('test2', '../', None, None) + self.runtest() + + def testPartialSourceRemoteMirror(self): + """Partial Mirroring when source is remote""" + self.setPathnames('test2', '../', None, None) + self.run_partial_test() + + def testBothRemoteMirror(self): + """Mirroring when both directories are remote""" + self.setPathnames('test1', '../', 'test2/tmp', '../../') + self.runtest() + + def testPartialBothRemoteMirror(self): + """Partial mirroring when both directories are remote""" + self.setPathnames('test1', '../', 'test2/tmp', '../../') + self.run_partial_test() + + def testNopermsLocal(self): + "Test mirroring a directory that has no permissions" + self.setPathnames(None, None, None, None) + Time.setcurtime() + SaveState.init_filenames(None) + HighLevel.Mirror(self.noperms, self.noperms_out, None) + # Can't compare because we don't have the permissions to do it right + #assert RPath.cmp_recursive(Local.noperms, Local.noperms_out) + + def testNopermsRemote(self): + "No permissions mirroring (remote)" + self.setPathnames('test1', '../', 'test2/tmp', '../../') + Time.setcurtime() + SaveState.init_filenames(None) + HighLevel.Mirror(self.noperms, self.noperms_out, checkpoint=None) + #assert RPath.cmp_recursive(Local.noperms, Local.noperms_out) + + def testPermSkipLocal(self): + """Test to see if rdiff-backup will skip unreadable files""" + self.setPathnames(None, None, None, None) + Globals.change_source_perms = None + Time.setcurtime() + SaveState.init_filenames(None) + HighLevel.Mirror(self.one_unreadable, self.one_unreadable_out, checkpoint=None) + Globals.change_source_perms = 1 + HighLevel.Mirror(self.one_unreadable, self.one_unreadable_out) + # Could add test, but for now just make sure it doesn't exit + + def testPermSkipRemote(self): + """Test skip of unreadable files remote""" + self.setPathnames('test1', '../', 'test2/tmp', '../../') + Globals.change_source_perms = None + Time.setcurtime() + SaveState.init_filenames(None) + HighLevel.Mirror(self.one_unreadable, self.one_unreadable_out) + Globals.change_source_perms = 1 + HighLevel.Mirror(self.one_unreadable, self.one_unreadable_out) + # Could add test, but for now just make sure it doesn't exit + + def refresh(self, *rps): + for rp in rps: rp.setdata() + + def _testRootLocal(self): + """Test mirroring a directory with dev files and different owners""" + self.setPathnames(None, None, None, None) + Globals.change_ownership = 1 + self.refresh(self.rootfiles, self.rootfiles_out, + Local.rootfiles, Local.rootfiles_out) # add uid/gid info + HighLevel.Mirror(self.rootfiles, self.rootfiles_out) + assert RPath.cmp_recursive(Local.rootfiles, Local.rootfiles_out) + Globals.change_ownership = None + self.refresh(self.rootfiles, self.rootfiles_out, + Local.rootfiles, Local.rootfiles_out) # remove that info + + def _testRootRemote(self): + """Mirroring root files both ends remote""" + self.setPathnames('test1', '../', 'test2/tmp', '../../') + for conn in Globals.connections: + conn.Globals.set('change_ownership', 1) + self.refresh(self.rootfiles, self.rootfiles_out, + Local.rootfiles, Local.rootfiles_out) # add uid/gid info + HighLevel.Mirror(self.rootfiles, self.rootfiles_out) + assert RPath.cmp_recursive(Local.rootfiles, Local.rootfiles_out) + for coon in Globals.connections: + conn.Globals.set('change_ownership', None) + self.refresh(self.rootfiles, self.rootfiles_out, + Local.rootfiles, Local.rootfiles_out) # remove that info + + def testRoot2Local(self): + """Make sure we can backup a directory we don't own""" + self.setPathnames(None, None, None, None) + Globals.change_ownership = Globals.change_source_perms = None + self.refresh(self.rootfiles2, self.rootfiles_out2, + Local.rootfiles2, Local.rootfiles_out2) # add uid/gid info + HighLevel.Mirror(self.rootfiles2, self.rootfiles_out2) + assert RPath.cmp_recursive(Local.rootfiles2, Local.rootfiles_out2) + self.refresh(self.rootfiles2, self.rootfiles_out2, + Local.rootfiles2, Local.rootfiles_out2) # remove that info + HighLevel.Mirror(self.rootfiles21, self.rootfiles_out2) + assert RPath.cmp_recursive(Local.rootfiles21, Local.rootfiles_out2) + self.refresh(self.rootfiles21, self.rootfiles_out2, + Local.rootfiles21, Local.rootfiles_out2) # remove that info + Globals.change_source_perms = 1 + + def deleteoutput(self): + os.system(MiscDir+"/myrm testfiles/output*") + self.rbdir = self.rpout.append('rdiff-backup-data') + self.rpout.mkdir() + self.rbdir.mkdir() + self.reset_rps() + + def reset_rps(self): + """Use after external changes made, to update the rps""" + for rp in [self.rpout, Local.rpout, + self.rpout_inc, Local.rpout_inc, + self.rpout1, Local.rpout1, + self.rpout2, Local.rpout2, + self.rpout3, Local.rpout3, + self.rpout4, Local.rpout4]: + rp.setdata() + + def runtest(self): + Time.setcurtime() + SaveState.init_filenames(None) + assert self.rbdir.lstat() + HighLevel.Mirror(self.inc1rp, self.rpout) + assert RPath.cmp_recursive(Local.inc1rp, Local.rpout) + + self.deleteoutput() + + HighLevel.Mirror(self.inc2rp, self.rpout) + assert RPath.cmp_recursive(Local.inc2rp, Local.rpout) + + def run_partial_test(self): + os.system("cp -a testfiles/increment3 testfiles/output") + self.reset_rps() + + Time.setcurtime() + SaveState.init_filenames(None) + HighLevel.Mirror(self.inc1rp, self.rpout) + #RPath.copy_attribs(self.inc1rp, self.rpout) + assert RPath.cmp_recursive(Local.inc1rp, Local.rpout) + + HighLevel.Mirror(self.inc2rp, self.rpout) + assert RPath.cmp_recursive(Local.inc2rp, Local.rpout) + +if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/restoretest.py b/rdiff-backup/testing/restoretest.py new file mode 100644 index 0000000..7672e1a --- /dev/null +++ b/rdiff-backup/testing/restoretest.py @@ -0,0 +1,47 @@ +import unittest + +execfile("commontest.py") +rbexec("restore.py") + + +lc = Globals.local_connection + +class RestoreTest(unittest.TestCase): + """Test Restore class""" + prefix = "testfiles/restoretest/" + def maketesttuples(self, basename): + """Make testing tuples from available files starting with prefix""" + dirlist = os.listdir(self.prefix) + baselist = filter(lambda f: f.startswith(basename), dirlist) + rps = map(lambda f: RPath(lc, self.prefix+f), baselist) + incs = filter(lambda rp: rp.isincfile(), rps) + tuples = map(lambda rp: (rp, RPath(lc, "%s.%s" % + (rp.getincbase().path, + rp.getinctime()))), + incs) + return tuples, incs + + def restoreonefiletest(self, basename): + tuples, incs = self.maketesttuples(basename) + rpbase = RPath(lc, self.prefix + basename) + rptarget = RPath(lc, "testfiles/outfile") + + if rptarget.lstat(): rptarget.delete() + for pair in tuples: + print "Processing file " + pair[0].path + rest_time = Time.stringtotime(pair[0].getinctime()) + Restore.RestoreFile(rest_time, rpbase, incs, rptarget) + if not rptarget.lstat(): assert not pair[1].lstat() + else: + assert RPath.cmp(rptarget, pair[1]), \ + "%s %s" % (rptarget.path, pair[1].path) + assert RPath.cmp_attribs(rptarget, pair[1]), \ + "%s %s" % (rptarget.path, pair[1].path) + rptarget.delete() + + def testRestorefiles(self): + """Testing restoration of files one at a time""" + map(self.restoreonefiletest, ["ocaml", "mf"]) + + +if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/rlisttest.py b/rdiff-backup/testing/rlisttest.py new file mode 100644 index 0000000..2fe258b --- /dev/null +++ b/rdiff-backup/testing/rlisttest.py @@ -0,0 +1,98 @@ +import unittest +execfile("commontest.py") +rbexec("rlist.py") + +class BasicObject: + """The simplest object that can be used with RList""" + def __init__(self, i): + self.index = i + self.data = "This is object # %d" % i + + def __eq__(self, other): + return self.index == other.index and self.data == other.data + +l1_pre = filter(lambda x: x != 342 and not x in [650, 651, 652] and + x != 911 and x != 987, + range(1, 1001)) +l2_pre = filter(lambda x: not x in [222, 223, 224, 225] and x != 950 + and x != 999 and x != 444, + range(1, 1001)) + +l1 = map(BasicObject, l1_pre) +l2 = map(BasicObject, l2_pre) +combined = map(BasicObject, range(1, 1001)) + +def lmaphelper2((x, i)): + """Return difference triple to say that index x only in list # i""" + if i == 1: return (BasicObject(x), None) + elif i == 2: return (None, BasicObject(x)) + else: assert 0, "Invalid parameter %s for i" % i + +difference1 = map(lmaphelper2, [(222, 1), (223, 1), (224, 1), (225, 1), + (342, 2), (444, 1), (650, 2), (651, 2), + (652, 2), (911, 2), (950, 1), (987, 2), + (999, 1)]) +difference2 = map(lambda (a, b): (b, a), difference1) + +def comparelists(l1, l2): + print len(l1), len(l2) + for i in range(len(l1)): + if l1[i] != l2[i]: print l1[i], l2[i] + print l1 + print l2 + + + +class RListTest(unittest.TestCase): + def setUp(self): + """Make signatures, deltas""" + self.l1_sig = RList.Signatures(l1) + self.l2_sig = RList.Signatures(l2) + self.l1_to_l2_diff = RList.Deltas(self.l1_sig, l2) + self.l2_to_l1_diff = RList.Deltas(self.l2_sig, l1) + +# for d in makedeltas(makesigs(l2ci(l1)), l2ci(l2)): +# print d.min, d.max +# print d.elemlist + + def testPatching(self): + """Test to make sure each list can be reconstructed from other""" + newlist = list(RList.Patch(l1, RList.Deltas(RList.Signatures(l1), + l2))) + assert l2 == newlist + newlist = list(RList.Patch(l2, RList.Deltas(RList.Signatures(l2), + l1))) + assert l1 == newlist + + def testDifference(self): + """Difference between lists correctly identified""" + diff = list(RList.Dissimilar(l1, RList.Deltas(RList.Signatures(l1), + l2))) + assert diff == difference1 + diff = list(RList.Dissimilar(l2, RList.Deltas(RList.Signatures(l2), + l1))) + assert diff == difference2 + + + +class CachingIterTest(unittest.TestCase): + """Test the Caching Iter object""" + def testNormalIter(self): + """Make sure it can act like a normal iterator""" + ci = CachingIter(iter(range(10))) + for i in range(10): assert i == ci.next() + self.assertRaises(StopIteration, ci.next) + + def testPushing(self): + """Pushing extra objects onto the iterator""" + ci = CachingIter(iter(range(10))) + ci.push(12) + ci.push(11) + assert ci.next() == 11 + assert ci.next() == 12 + assert ci.next() == 0 + ci.push(10) + assert ci.next() == 10 + + +if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/robusttest.py b/rdiff-backup/testing/robusttest.py new file mode 100644 index 0000000..4f08e44 --- /dev/null +++ b/rdiff-backup/testing/robusttest.py @@ -0,0 +1,86 @@ +import os, unittest + +execfile("commontest.py") +rbexec("setconnections.py") + +class TestRobustAction(unittest.TestCase): + """Test some robust actions""" + def testCopyWithAttribs(self): + """Test copy with attribs action""" + rpin = RPath(Globals.local_connection, "./testfiles/robust/in") + fp = open("./testfiles/robust/in", "wb") + fp.write("hello there") + fp.close() + os.chmod("./testfiles/robust/in", 0604) + rpin.setdata() + assert rpin.isreg() and rpin.getperms() % 01000 == 0604 + + rpout = RPath(Globals.local_connection, "./testfiles/robust/out") + Robust.copy_with_attribs_action(rpin, rpout).execute() + if not rpout == rpin: + print rpout, rpin + assert 0 + + rpout.delete() + rpin.delete() + + +class TempFileTest(unittest.TestCase): + """Test creation and management of tempfiles""" + rp_base = RPath(Globals.local_connection, + "./testfiles/robust/testfile_base") + def testBasic(self): + """Make a temp file, write to it, and then delete it + + Also test tempfile accounting and file name prefixing. + + """ + assert not TempFileManager._tempfiles + tf = TempFileManager.new(self.rp_base) + assert TempFileManager._tempfiles == [tf] + assert tf.dirsplit()[0] == "testfiles/robust", tf.dirsplit()[0] + assert not tf.lstat() + fp = tf.open("w") + fp.write("hello") + assert not fp.close() + fp = tf.open("r") + assert fp.read() == "hello" + assert not fp.close() + tf.delete() + assert not TempFileManager._tempfiles + + def testRename(self): + """Test renaming of tempfile""" + tf = TempFileManager.new(self.rp_base) + assert TempFileManager._tempfiles + tf.touch() + destination = RPath(Globals.local_connection, + "./testfiles/robust/testfile_dest") + tf.rename(destination) + assert not TempFileManager._tempfiles + assert destination.lstat() + destination.delete() + + +class SaveStateTest(unittest.TestCase): + """Test SaveState class""" + data_dir = RPath(Globals.local_connection, "testfiles/robust") + def testSymlinking(self): + """Test recording last file with symlink""" + last_rorp = RORPath(('usr', 'local', 'bin', 'ls')) + Globals.rbdir = self.data_dir + Time.setcurtime() + SetConnections.BackupInitConnections(Globals.local_connection, + Globals.local_connection) + SaveState.init_filenames(None) + SaveState.record_last_file_action(last_rorp).execute() + + sym_rp = RPath(Globals.local_connection, + "testfiles/robust/last-file-mirrored.%s.snapshot" % + Time.curtimestr) + assert sym_rp.issym() + assert sym_rp.readlink() == "increments/usr/local/bin/ls" + sym_rp.delete() + + +if __name__ == '__main__': unittest.main() diff --git a/rdiff-backup/testing/roottest.py b/rdiff-backup/testing/roottest.py new file mode 100644 index 0000000..18a0afc --- /dev/null +++ b/rdiff-backup/testing/roottest.py @@ -0,0 +1,165 @@ +execfile("../src/setconnections.py") +import unittest, os + +"""Root tests + +This is mainly a copy of regressiontest.py, but contains the two tests +that are meant to be run as root. +""" + +Globals.change_source_perms = 1 +Globals.counter = 0 +Log.setverbosity(4) + +class Local: + """This is just a place to put increments relative to the local + connection""" + def get_local_rp(extension): + return RPath(Globals.local_connection, "testfiles/" + extension) + + inc1rp = get_local_rp('increment1') + inc2rp = get_local_rp('increment2') + inc3rp = get_local_rp('increment3') + inc4rp = get_local_rp('increment4') + + rpout = get_local_rp('output') + rpout_inc = get_local_rp('output_inc') + rpout1 = get_local_rp('restoretarget1') + rpout2 = get_local_rp('restoretarget2') + rpout3 = get_local_rp('restoretarget3') + rpout4 = get_local_rp('restoretarget4') + + noperms = get_local_rp('noperms') + noperms_out = get_local_rp('noperms_output') + + rootfiles = get_local_rp('root') + rootfiles_out = get_local_rp('root_output') + + prefix = get_local_rp('.') + + +class PathSetter(unittest.TestCase): + def get_prefix_and_conn(self, path, return_path): + """Return (prefix, connection) tuple""" + if path: + return (return_path, + SetConnections.init_connection("./chdir-wrapper "+path)) + else: return ('./', Globals.local_connection) + + def get_src_rp(self, path): + return RPath(self.src_conn, self.src_prefix + path) + + def get_dest_rp(self, path): + return RPath(self.dest_conn, self.dest_prefix + path) + + + def set_rbdir(self, rpout): + """Create rdiff-backup-data dir if not already, tell everyone""" + self.rbdir = self.rpout.append('rdiff-backup-data') + self.rpout.mkdir() + self.rbdir.mkdir() + SetConnections.UpdateGlobal('rbdir', self.rbdir) + + # Better safe than sorry - cover all possibilities + Globals.add_regexp("testfiles/output/rdiff-backup-data", 1) + Globals.add_regexp("./testfiles/output/rdiff-backup-data", 1) + Globals.add_regexp("../testfiles/output/rdiff-backup-data", 1) + Globals.add_regexp("../../testfiles/output/rdiff-backup-data", 1) + + def setPathnames(self, src_path, src_return, dest_path, dest_return): + """Start servers which will run in src_path and dest_path respectively + + If either is None, then no server will be run and local + process will handle that end. src_return and dest_return are + the prefix back to the original rdiff-backup directory. So + for instance is src_path is "test2/tmp", then src_return will + be '../'. + + """ + # Clear old data that may rely on deleted connections + Globals.isbackup_writer = None + Globals.isbackup_reader = None + Globals.rbdir = None + + print "Setting up connection" + self.src_prefix, self.src_conn = \ + self.get_prefix_and_conn(src_path, src_return) + self.dest_prefix, self.dest_conn = \ + self.get_prefix_and_conn(dest_path, dest_return) + SetConnections.BackupInitConnections(self.src_conn, self.dest_conn) + + os.system("./myrm testfiles/output* testfiles/restoretarget* " + "testfiles/noperms_output testfiles/root_output " + "testfiles/unreadable_out") + self.inc1rp = self.get_src_rp("testfiles/increment1") + self.inc2rp = self.get_src_rp('testfiles/increment2') + self.inc3rp = self.get_src_rp('testfiles/increment3') + self.inc4rp = self.get_src_rp('testfiles/increment4') + + self.rpout_inc = self.get_dest_rp('testfiles/output_inc') + self.rpout1 = self.get_dest_rp('testfiles/restoretarget1') + self.rpout2 = self.get_dest_rp('testfiles/restoretarget2') + self.rpout3 = self.get_dest_rp('testfiles/restoretarget3') + self.rpout4 = self.get_dest_rp('testfiles/restoretarget4') + + self.rpout = self.get_dest_rp('testfiles/output') + self.set_rbdir(self.rpout) + + self.noperms = self.get_src_rp('testfiles/noperms') + self.noperms_out = self.get_dest_rp('testfiles/noperms_output') + + self.rootfiles = self.get_src_rp('testfiles/root') + self.rootfiles_out = self.get_dest_rp('testfiles/root_output') + + self.one_unreadable = self.get_src_rp('testfiles/one_unreadable') + self.one_unreadable_out = self.get_dest_rp('testfiles/unreadable_out') + + def tearDown(self): + print "Taking down connections" + SetConnections.CloseConnections() + os.system("./myrm testfiles/output* testfiles/restoretarget* " + "testfiles/noperms_output testfiles/root_output " + "testfiles/unreadable_out") + + +class MirrorTest(PathSetter): + """Test some mirroring functions""" + def refresh(self, *rps): + for rp in rps: rp.setdata() + + def testRootLocal(self): + """Test mirroring a directory with dev files and different owners""" + self.setPathnames(None, None, None, None) + Time.setcurtime() + SaveState.init_filenames(None) + Globals.change_ownership = 1 + self.refresh(self.rootfiles, self.rootfiles_out, + Local.rootfiles, Local.rootfiles_out) # add uid/gid info + HighLevel.Mirror(self.rootfiles, self.rootfiles_out) + assert RPath.cmp_recursive(Local.rootfiles, Local.rootfiles_out) + Globals.change_ownership = None + self.refresh(self.rootfiles, self.rootfiles_out, + Local.rootfiles, Local.rootfiles_out) # remove that info + + def testRootRemote(self): + """Mirroring root files both ends remote""" + self.setPathnames('test1', '../', 'test2/tmp', '../../') + Time.setcurtime() + SaveState.init_filenames(None) + for conn in Globals.connections: + conn.Globals.set('change_ownership', 1) + self.refresh(self.rootfiles, self.rootfiles_out, + Local.rootfiles, Local.rootfiles_out) # add uid/gid info + HighLevel.Mirror(self.rootfiles, self.rootfiles_out) + assert RPath.cmp_recursive(Local.rootfiles, Local.rootfiles_out) + for coon in Globals.connections: + conn.Globals.set('change_ownership', None) + self.refresh(self.rootfiles, self.rootfiles_out, + Local.rootfiles, Local.rootfiles_out) # remove that info + + def deleteoutput(self): + os.system("./myrm testfiles/output*") + self.reset_rps() + + +if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/rorpitertest.py b/rdiff-backup/testing/rorpitertest.py new file mode 100644 index 0000000..b9d558f --- /dev/null +++ b/rdiff-backup/testing/rorpitertest.py @@ -0,0 +1,105 @@ +import unittest +execfile("commontest.py") +rbexec("highlevel.py") + + +#Log.setverbosity(8) + +class index: + """This is just used below to test the iter tree reducer""" + def __init__(self, index): + self.index = index + + +class RORPIterTest(unittest.TestCase): + def setUp(self): + self.lc = Globals.local_connection + self.inc0rp = RPath(self.lc, "testfiles/empty", ()) + self.inc1rp = RPath(self.lc, "testfiles/inc-reg-perms1", ()) + self.inc2rp = RPath(self.lc, "testfiles/inc-reg-perms2", ()) + self.output = RPath(self.lc, "testfiles/output", ()) + + def testCollateIterators(self): + """Test basic collating""" + indicies = map(index, [0,1,2,3]) + helper = lambda i: indicies[i] + + makeiter1 = lambda: iter(indicies) + makeiter2 = lambda: iter(map(helper, [0,1,3])) + makeiter3 = lambda: iter(map(helper, [1,2])) + + outiter = RORPIter.CollateIterators(makeiter1(), makeiter2()) + assert Iter.equal(outiter, + iter([(indicies[0], indicies[0]), + (indicies[1], indicies[1]), + (indicies[2], None), + (indicies[3], indicies[3])])) + + assert Iter.equal(RORPIter.CollateIterators(makeiter1(), + makeiter2(), + makeiter3()), + iter([(indicies[0], indicies[0], None), + (indicies[1], indicies[1], indicies[1]), + (indicies[2], None, indicies[2]), + (indicies[3], indicies[3], None)])) + + assert Iter.equal(RORPIter.CollateIterators(makeiter1(), iter([])), + iter(map(lambda i: (i, None), + indicies))) + assert Iter.equal(iter(map(lambda i: (i, None), indicies)), + RORPIter.CollateIterators(makeiter1(), iter([]))) + + + def testCombinedPatching(self): + """Combined signature, patch, and diff operations""" + if self.output.lstat(): self.output.delete() + + def turninto(final_rp): + sigfile = RORPIter.ToFile(RORPIter.GetSignatureIter(self.output)) + diff_file = RORPIter.ToFile( + RORPIter.GetDiffIter(RORPIter.FromFile(sigfile), + RORPIter.IterateRPaths(final_rp))) + RORPIter.PatchIter(self.output, RORPIter.FromFile(diff_file)) + + turninto(self.inc1rp) + assert self.compare_rps(self.output, self.inc1rp) + turninto(self.inc2rp) + assert self.compare_rps(self.output, self.inc2rp) + + def compare_rps(self, rp1, rp2): + """True if rp1 and rp2 are equal in some sense""" + def RawIter(rp): + """Get raw iterator of file stats based an rp1""" + return RORPIter.ToRaw(Iter.map(lambda rp2: rp2.getRORPath(), + RORPIter.IterateRPaths(rp))) + ri1 = RawIter(rp1) + ri2 = RawIter(rp2) + try: + rorp1 = ri1.next() + rorp2 = ri2.next() + assert rorp1 == rorp2, "%s %s" % (rorp1, rorp2) + except StopIteration: pass + return 1 + # return Iter.equal(RawIter(rp1), RawIter(rp2)) + + +class IndexedTupleTest(unittest.TestCase): + def testTuple(self): + """Test indexed tuple""" + i = IndexedTuple((1,2,3), ("a", "b")) + i2 = IndexedTuple((), ("hello", "there", "how are you")) + + assert i[0] == "a" + assert i[1] == "b" + assert i2[1] == "there" + assert len(i) == 2 and len(i2) == 3 + assert i2 < i, i2 < i + + def testTupleAssignment(self): + a, b, c = IndexedTuple((), (1, 2, 3)) + assert a == 1 + assert b == 2 + assert c == 3 + +if __name__ == "__main__": unittest.main() + diff --git a/rdiff-backup/testing/rpathtest.py b/rdiff-backup/testing/rpathtest.py new file mode 100644 index 0000000..0b28f14 --- /dev/null +++ b/rdiff-backup/testing/rpathtest.py @@ -0,0 +1,337 @@ +import os, cPickle, sys, unittest +execfile("commontest.py") +rbexec("highlevel.py") + + + +class RPathTest(unittest.TestCase): + lc = Globals.local_connection + prefix = "testfiles/various_file_types/" + mainprefix = "testfiles/" + rp_prefix = RPath(lc, prefix, ()) + rp_main = RPath(lc, mainprefix, ()) + + +class RORPStateTest(RPathTest): + """Test Pickling of RORPaths""" + def testPickle(self): + rorp = RPath(self.lc, self.prefix, ("regular_file",)).getRORPath() + rorp.file = sys.stdin # try to confuse pickler + assert rorp.isreg() + rorp2 = cPickle.loads(cPickle.dumps(rorp, 1)) + assert rorp2.isreg() + assert rorp2.data == rorp.data and rorp.index == rorp2.index + + +class CheckTypes(RPathTest): + """Check to see if file types are identified correctly""" + def testExist(self): + """Can tell if files exist""" + assert RPath(self.lc, self.prefix, ()).lstat() + assert not RPath(self.lc, "asuthasetuouo", ()).lstat() + + def testDir(self): + """Directories identified correctly""" + assert RPath(self.lc, self.prefix, ()).isdir() + assert not RPath(self.lc, self.prefix, ("regular_file",)).isdir() + + def testSym(self): + """Symbolic links identified""" + assert RPath(self.lc, self.prefix, ("symbolic_link",)).issym() + assert not RPath(self.lc, self.prefix, ()).issym() + + def testReg(self): + """Regular files identified""" + assert RPath(self.lc, self.prefix, ("regular_file",)).isreg() + assert not RPath(self.lc, self.prefix, ("symbolic_link",)).isreg() + + def testFifo(self): + """Fifo's identified""" + assert RPath(self.lc, self.prefix, ("fifo",)).isfifo() + assert not RPath(self.lc, self.prefix, ()).isfifo() + + def testCharDev(self): + """Char special files identified""" + assert RPath(self.lc, "/dev/tty2", ()).ischardev() + assert not RPath(self.lc, self.prefix, ("regular_file",)).ischardev() + + def testBlockDev(self): + """Block special files identified""" + assert RPath(self.lc, "/dev/hda", ()).isblkdev() + assert not RPath(self.lc, self.prefix, ("regular_file",)).isblkdev() + + +class CheckPerms(RPathTest): + """Check to see if permissions are reported and set accurately""" + def testExecReport(self): + """Check permissions for executable files""" + assert self.rp_prefix.append('executable').getperms() == 0755 + assert self.rp_prefix.append('executable2').getperms() == 0700 + + def testOrdinaryReport(self): + """Ordinary file permissions...""" + assert self.rp_prefix.append("regular_file").getperms() == 0644 + assert self.rp_prefix.append('two_hardlinked_files1').getperms() == 0640 + + def testChmod(self): + """Test changing file permission""" + rp = self.rp_prefix.append("changeable_permission") + rp.chmod(0700) + assert rp.getperms() == 0700 + rp.chmod(0644) + assert rp.getperms() == 0644 + + def testExceptions(self): + """What happens when file absent""" + self.assertRaises(Exception, + RPath(self.lc, self.prefix, ("aoeunto",)).getperms) + + +class CheckDir(RPathTest): + """Check directory related functions""" + def testCreation(self): + """Test directory creation and deletion""" + d = self.rp_prefix.append("tempdir") + assert not d.lstat() + d.mkdir() + assert d.isdir() + d.rmdir() + assert not d.lstat() + + def testExceptions(self): + """Should raise os.errors when no files""" + d = RPath(self.lc, self.prefix, ("suthosutho",)) + self.assertRaises(os.error, d.rmdir) + d.mkdir() + self.assertRaises(os.error, d.mkdir) + d.rmdir() + + def testListdir(self): + """Checking dir listings""" + assert (RPath(self.lc, self.mainprefix, ("sampledir",)).listdir() == + ["1", "2", "3", "4"]) + + +class CheckSyms(RPathTest): + """Check symlinking and reading""" + def testRead(self): + """symlink read""" + assert (RPath(self.lc, self.prefix, ("symbolic_link",)).readlink() == + "regular_file") + + def testMake(self): + """Creating symlink""" + link = RPath(self.lc, self.mainprefix, ("symlink",)) + assert not link.lstat() + link.symlink("abcdefg") + assert link.issym() + assert link.readlink() == "abcdefg" + link.delete() + + +class TouchDelete(RPathTest): + """Check touching and deletion of files""" + def testTouch(self): + """Creation of 0 length files""" + t = RPath(self.lc, self.mainprefix, ("testtouch",)) + assert not t.lstat() + t.touch() + assert t.lstat() + t.delete() + + def testDelete(self): + """Deletion of files""" + d = RPath(self.lc, self.mainprefix, ("testdelete",)) + d.touch() + assert d.lstat() + d.delete() + assert not d.lstat() + + +class MiscFileInfo(RPathTest): + """Check Miscellaneous file information""" + def testFileLength(self): + """File length = getsize()""" + assert (RPath(self.lc, self.prefix, ("regular_file",)).getsize() == + 75650) + + +class FilenameOps(RPathTest): + """Check filename operations""" + weirdfilename = eval('\'\\xd8\\xab\\xb1Wb\\xae\\xc5]\\x8a\\xbb\\x15v*\\xf4\\x0f!\\xf9>\\xe2Y\\x86\\xbb\\xab\\xdbp\\xb0\\x84\\x13k\\x1d\\xc2\\xf1\\xf5e\\xa5U\\x82\\x9aUV\\xa0\\xf4\\xdf4\\xba\\xfdX\\x03\\x82\\x07s\\xce\\x9e\\x8b\\xb34\\x04\\x9f\\x17 \\xf4\\x8f\\xa6\\xfa\\x97\\xab\\xd8\\xac\\xda\\x85\\xdcKvC\\xfa#\\x94\\x92\\x9e\\xc9\\xb7\\xc3_\\x0f\\x84g\\x9aB\\x11<=^\\xdbM\\x13\\x96c\\x8b\\xa7|*"\\\\\\\'^$@#!(){}?+ ~` \'') + normdict = {"/": "/", + ".": ".", + "//": "/", + "/a/b": "/a/b", + "a/b": "a/b", + "a//b": "a/b", + "a////b//c": "a/b/c", + "..": "..", + "a/": "a", + "/a//b///": "/a/b"} + dirsplitdict = {"/": ("", ""), + "/a": ("", "a"), + "/a/b": ("/a", "b"), + ".": (".", "."), + "b/c": ("b", "c"), + "a": (".", "a")} + + def testQuote(self): + """See if filename quoting works""" + wtf = RPath(self.lc, self.prefix, (self.weirdfilename,)) + reg = RPath(self.lc, self.prefix, ("regular_file",)) + assert wtf.lstat() + assert reg.lstat() + assert not os.system("ls %s >/dev/null 2>&1" % wtf.quote()) + assert not os.system("ls %s >/dev/null 2>&1" % reg.quote()) + + def testNormalize(self): + """rpath.normalize() dictionary test""" + for (before, after) in self.normdict.items(): + assert RPath(self.lc, before, ()).normalize().path == after, \ + "Normalize fails for %s => %s" % (before, after) + + def testDirsplit(self): + """Test splitting of various directories""" + for full, split in self.dirsplitdict.items(): + result = RPath(self.lc, full, ()).dirsplit() + assert result == split, \ + "%s => %s instead of %s" % (full, result, split) + + def testGetnums(self): + """Test getting file numbers""" + devnums = RPath(self.lc, "/dev/hda", ()).getdevnums() + assert devnums == (3, 0), devnums + devnums = RPath(self.lc, "/dev/tty2", ()).getdevnums() + assert devnums == (4, 2), devnums + + +class FileIO(RPathTest): + """Test file input and output""" + def testRead(self): + """File reading""" + fp = RPath(self.lc, self.prefix, ("executable",)).open("r") + assert fp.read(6) == "#!/bin" + fp.close() + + def testWrite(self): + """File writing""" + rp = RPath(self.lc, self.mainprefix, ("testfile",)) + fp = rp.open("w") + fp.write("hello") + fp.close() + fp_input = rp.open("r") + assert fp_input.read() == "hello" + fp_input.close() + rp.delete() + + +class FileCopying(RPathTest): + """Test file copying and comparison""" + def setUp(self): + self.hl1 = RPath(self.lc, self.prefix, ("two_hardlinked_files1",)) + self.hl2 = RPath(self.lc, self.prefix, ("two_hardlinked_files2",)) + self.sl = RPath(self.lc, self.prefix, ("symbolic_link",)) + self.dir = RPath(self.lc, self.prefix, ()) + self.fifo = RPath(self.lc, self.prefix, ("fifo",)) + self.rf = RPath(self.lc, self.prefix, ("regular_file",)) + self.dest = RPath(self.lc, self.mainprefix, ("dest",)) + if self.dest.lstat(): self.dest.delete() + assert not self.dest.lstat() + + def testComp(self): + """Test comparisons involving regular files""" + assert RPath.cmp(self.hl1, self.hl2) + assert not RPath.cmp(self.rf, self.hl1) + assert not RPath.cmp(self.dir, self.rf) + + def testCompMisc(self): + """Test miscellaneous comparisons""" + assert RPath.cmp(self.dir, RPath(self.lc, self.mainprefix, ())) + self.dest.symlink("regular_file") + assert RPath.cmp(self.sl, self.dest) + self.dest.delete() + assert not RPath.cmp(self.sl, self.fifo) + assert not RPath.cmp(self.dir, self.sl) + + def testCopy(self): + """Test copy of various files""" + for rp in [self.sl, self.rf, self.fifo, self.dir]: + RPath.copy(rp, self.dest) + assert self.dest.lstat(), "%s doesn't exist" % self.dest.path + assert RPath.cmp(rp, self.dest) + assert RPath.cmp(self.dest, rp) + self.dest.delete() + + +class FileAttributes(FileCopying): + """Test file attribute operations""" + def setUp(self): + FileCopying.setUp(self) + self.noperms = RPath(self.lc, self.mainprefix, ("noperms",)) + self.nowrite = RPath(self.lc, self.mainprefix, ("nowrite",)) + self.exec1 = RPath(self.lc, self.prefix, ("executable",)) + self.exec2 = RPath(self.lc, self.prefix, ("executable2",)) + self.test = RPath(self.lc, self.prefix, ("test",)) + self.nothing = RPath(self.lc, self.prefix, ("aoeunthoenuouo",)) + self.sym = RPath(self.lc, self.prefix, ("symbolic_link",)) + + def testComp(self): + """Test attribute comparison success""" + testpairs = [(self.hl1, self.hl2)] + for a, b in testpairs: + assert RPath.cmp_attribs(a, b), "Err with %s %s" % (a.path, b.path) + assert RPath.cmp_attribs(b, a), "Err with %s %s" % (b.path, a.path) + + def testCompFail(self): + """Test attribute comparison failures""" + testpairs = [(self.nowrite, self.noperms), + (self.exec1, self.exec2), + (self.rf, self.hl1)] + for a, b in testpairs: + assert not RPath.cmp_attribs(a, b), \ + "Err with %s %s" % (a.path, b.path) + assert not RPath.cmp_attribs(b, a), \ + "Err with %s %s" % (b.path, a.path) + + def testCompRaise(self): + """Should raise exception when file missing""" + self.assertRaises(RPathException, RPath.cmp_attribs, + self.nothing, self.hl1) + self.assertRaises(RPathException, RPath.cmp_attribs, + self.noperms, self.nothing) + + def testCopyAttribs(self): + """Test copying attributes""" + t = RPath(self.lc, self.mainprefix, ("testattribs",)) + if t.lstat(): t.delete() + for rp in [self.noperms, self.nowrite, self.rf, self.exec1, + self.exec2, self.hl1, self.dir]: + t.touch() + RPath.copy_attribs(rp, t) + assert RPath.cmp_attribs(t, rp), \ + "Attributes for file %s not copied successfully" % rp.path + t.delete() + + def testCopyWithAttribs(self): + """Test copying with attribs (bug found earlier)""" + out = RPath(self.lc, self.mainprefix, ("out",)) + if out.lstat(): out.delete() + for rp in [self.noperms, self.nowrite, self.rf, self.exec1, + self.exec2, self.hl1, self.dir, self.sym]: + RPath.copy_with_attribs(rp, out) + assert RPath.cmp(rp, out) + assert RPath.cmp_attribs(rp, out) + out.delete() + + def testCopyRaise(self): + """Should raise exception for non-existent files""" + self.assertRaises(RPathException, RPath.copy_attribs, + self.hl1, self.nothing) + self.assertRaises(RPathException, RPath.copy_attribs, + self.nothing, self.nowrite) + + + +if __name__ == "__main__": + unittest.main() diff --git a/rdiff-backup/testing/server.py b/rdiff-backup/testing/server.py new file mode 100755 index 0000000..b30c745 --- /dev/null +++ b/rdiff-backup/testing/server.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python + +import sys +execfile("commontest.py") +rbexec("setconnections.py") + +def Test_SetConnGlobals(conn, name, val): + """Used in unittesting - set one of specified connection's Global vars""" + conn.Globals.set(name, val) + +Log.setverbosity(9) +PipeConnection(sys.stdin, sys.stdout).Server() diff --git a/rdiff-backup/testing/setconnectionstest.py b/rdiff-backup/testing/setconnectionstest.py new file mode 100644 index 0000000..d5d2671 --- /dev/null +++ b/rdiff-backup/testing/setconnectionstest.py @@ -0,0 +1,26 @@ +import unittest +execfile("commontest.py") +rbexec("setconnections.py") + +class SetConnectionsTest(unittest.TestCase): + """Set SetConnections Class""" + def testParsing(self): + """Test parsing of various file descriptors""" + pfd = SetConnections.parse_file_desc + assert pfd("bescoto@folly.stanford.edu::/usr/bin/ls") == \ + ("bescoto@folly.stanford.edu", "/usr/bin/ls") + assert pfd("hello there::/goodbye:euoeu") == \ + ("hello there", "/goodbye:euoeu") + assert pfd(r"test\\ing\::more::and more\\..") == \ + (r"test\ing::more", r"and more\\.."), \ + pfd(r"test\\ing\::more::and more\\..") + assert pfd("a:b:c:d::e") == ("a:b:c:d", "e") + assert pfd("foobar") == (None, "foobar") + assert pfd(r"hello\::there") == (None, "hello\::there") + + self.assertRaises(SetConnectionsException, pfd, r"hello\:there::") + self.assertRaises(SetConnectionsException, pfd, "foobar\\") + + + +if __name__ == "__main__": unittest.main() diff --git a/rdiff-backup/testing/statictest.py b/rdiff-backup/testing/statictest.py new file mode 100644 index 0000000..a9ff812 --- /dev/null +++ b/rdiff-backup/testing/statictest.py @@ -0,0 +1,63 @@ +import unittest, types +execfile("commontest.py") +rbexec("static.py") + + +class D: + def foo(x, y): + return x, y + def bar(self, x): + return 3, x + def _hello(self): + return self + +MakeStatic(D) + + +class C: + _a = 0 + def get(cls): + return cls._a + def inc(cls): + cls._a = cls._a + 1 + +MakeClass(C) + + +class StaticMethodsTest(unittest.TestCase): + """Test StaticMethods module""" + def testType(self): + """Methods should have type StaticMethod""" + assert type(D.foo) is types.FunctionType + assert type(D.bar) is types.FunctionType + + def testStatic(self): + """Methods should be callable without instance""" + assert D.foo(1,2) == (1,2) + assert D.bar(3,4) == (3,4) + + def testBound(self): + """Methods should also work bound""" + d = D() + assert d.foo(1,2) == (1,2) + assert d.bar(3,4) == (3,4) + + def testStatic_(self): + """_ Methods should be untouched""" + d = D() + self.assertRaises(TypeError, d._hello, 4) + assert d._hello() is d + + +class ClassMethodsTest(unittest.TestCase): + def test(self): + """Test MakeClass function""" + assert C.get() == 0 + C.inc() + assert C.get() == 1 + C.inc() + assert C.get() == 2 + + +if __name__ == "__main__": + unittest.main() diff --git a/rdiff-backup/testing/testall.py b/rdiff-backup/testing/testall.py new file mode 100644 index 0000000..5389408 --- /dev/null +++ b/rdiff-backup/testing/testall.py @@ -0,0 +1,26 @@ +import unittest + +"""This probably doesn't work any more - just run the tests manually.""" + +from connectiontest import * +#from destructive-steppingtest import * +from dstest import * +from highleveltest import * +from incrementtest import * +from iterfiletest import * +from lazytest import * +from rdifftest import * +from regressiontest import * +from restoretest import * +from rlisttest import * +from rorpitertest import * +from rpathtest import * +#from finaltest import * +from statictest import * +from timetest import * +from filelisttest import * +from setconnectionstest import * + +if __name__ == "__main__": + unittest.main() + diff --git a/rdiff-backup/testing/timetest.py b/rdiff-backup/testing/timetest.py new file mode 100644 index 0000000..f7a6fcd --- /dev/null +++ b/rdiff-backup/testing/timetest.py @@ -0,0 +1,71 @@ +import unittest +execfile("commontest.py") +rbexec("highlevel.py") + +class TimeTest(unittest.TestCase): + def testConversion(self): + """test timetostring and stringtotime""" + Time.setcurtime() + assert type(Time.curtime) is types.FloatType + assert type(Time.curtimestr) is types.StringType + assert (Time.cmp(int(Time.curtime), Time.curtimestr) == 0 or + Time.cmp(int(Time.curtime) + 1, Time.curtimestr) == 0) + time.sleep(1.05) + assert Time.cmp(time.time(), Time.curtime) == 1 + assert Time.cmp(Time.timetostring(time.time()), Time.curtimestr) == 1 + + def testConversion_separator(self): + """Same as testConversion, but change time Separator""" + Globals.time_separator = "_" + self.testConversion() + Globals.time_separator = ":" + + def testCmp(self): + """Test time comparisons""" + cmp = Time.cmp + assert cmp(1,2) == -1 + assert cmp(2,2) == 0 + assert cmp(5,1) == 1 + assert cmp("2001-09-01T21:49:04Z", "2001-08-01T21:49:04Z") == 1 + assert cmp("2001-09-01T04:49:04+03:23", "2001-09-01T21:49:04Z") == -1 + assert cmp("2001-09-01T12:00:00Z", "2001-09-01T04:00:00-08:00") == 0 + assert cmp("2001-09-01T12:00:00-08:00", + "2001-09-01T12:00:00-07:00") == 1 + + def testCmp_separator(self): + """Like testCmp but with new separator""" + Globals.time_separator = "_" + cmp = Time.cmp + assert cmp(1,2) == -1 + assert cmp(2,2) == 0 + assert cmp(5,1) == 1 + assert cmp("2001-09-01T21_49_04Z", "2001-08-01T21_49_04Z") == 1 + assert cmp("2001-09-01T04_49_04+03_23", "2001-09-01T21_49_04Z") == -1 + assert cmp("2001-09-01T12_00_00Z", "2001-09-01T04_00_00-08_00") == 0 + assert cmp("2001-09-01T12_00_00-08_00", + "2001-09-01T12_00_00-07_00") == 1 + Globals.time_separator = ":" + + def testStringtotime(self): + """Test converting string to time""" + timesec = int(time.time()) + assert timesec == int(Time.stringtotime(Time.timetostring(timesec))) + assert not Time.stringtotime("2001-18-83T03:03:03Z") + assert not Time.stringtotime("2001-01-23L03:03:03L") + assert not Time.stringtotime("2001_01_23T03:03:03Z") + + def testIntervals(self): + """Test converting strings to intervals""" + i2s = Time.intstringtoseconds + for s in ["32", "", "d", "231I", "MM", "s", "-2h"]: + try: i2s(s) + except TimeException: pass + else: assert 0, s + assert i2s("7D") == 7*86400 + assert i2s("232s") == 232 + assert i2s("2M") == 2*30*86400 + assert i2s("400m") == 400*60 + assert i2s("1Y") == 365*86400 + assert i2s("30h") == 30*60*60 + +if __name__ == '__main__': unittest.main() -- cgit v1.2.1