diff options
Diffstat (limited to 'contrib/validate_repo.in')
-rw-r--r-- | contrib/validate_repo.in | 856 |
1 files changed, 856 insertions, 0 deletions
diff --git a/contrib/validate_repo.in b/contrib/validate_repo.in new file mode 100644 index 0000000..6fdbead --- /dev/null +++ b/contrib/validate_repo.in @@ -0,0 +1,856 @@ +#! @PERL@ -w +######################################################################## +# +# Copyright (c) 2000, 2001 by Donald Sharp <sharpd@cisco.com> +# All Rights Reserved +# +# Some portions Copyright (c) 2002, 2003 by +# Derek R. Price <mailto:derek@ximbiot.com> +# & Ximbiot <http://ximbiot.com>. +# All rights reserved. +# +# Permission is granted to copy and/or distribute this file, with or +# without modifications, provided this notice is preserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +######################################################################## + +=head1 validate_repo.pl + +Script to check the integrity of the Repository. + +=head1 SYNOPSIS + + perldoc validate_repo.pl + validate_repo.pl --help [--verbose!] + validate_repo.pl [--verbose!] [--cvsroot=CVSROOT] [--exec=SCRIPT]... + [--all-revisions!] [module]... + +=head1 DESCRIPTION + +This script will search through a repository and determine if any of the +files in it are corrupted. + +This is normally accomplished by checking out all I<important> revisions, where +I<important> revisions are defined as the smallest set which, when checked out, +will cause each and every revision's integrity to be verified. This resolves +to the most recent revision on each branch and the first and last revisions on +the trunk. + +Please do not run this script inside of the repository itself. This will cause +it too fail. + +=head1 OPTIONS + +=over + +=item C<--help> + +Print this very help text (or, with C<--verbose>, act like +C<perldoc validate_repo.pl>). + +=item C<-a> or C<--all-revisions> + +Check out each and every revision rather than just the I<important> ones. +This flag is useful with C<--exec> to execute the C<SCRIPT> (from C<--exec> +below) on a checked out copy of each and every revision. + +=item C<-d> or C<--cvsroot=CVSROOT> + +Use repository specified by C<CVSROOT>. Defaults to the contents of the +F<./CVS/Root> file when it exists and is readable, then to the contents of the +C<$CVSROOT> environment variable when it is set and non-empty. + +=item C<-e> or C<--exec=SCRIPT> + +Execute (as from command prompt) C<SCRIPT> if it exists as a file, is readable, +and is executable, or evaluate (as a perl script) C<SCRIPT> for a checked out +copy of each I<important> revision of each RCS archive in CVSROOT. Executed +scripts are passed C<CVSROOT FILE REVISION FNO>, where C<CVSROOT> is what +you'd think, C<FILE> is the path to the file relative to C<CVSROOT> and +suitable for use as an argument to C<cvs co>, C<cvs rlog>, and so on, +C<REVISION> is the revision of the checked out file, and C<FNO> is the file +number of the open, read-only file descriptor containing the checked out +contents of revision C<REVISION> of C<FILE>. An evaluated C<SCRIPT> will find +the same four arguments in the same order in C<@_>, except that C<FNO> will be +an open file handle. + +With C<--all-revisions>, execute or evaluate C<SCRIPT> for a checked out +version of each revsion in the RCS archive. + +=item C<-v> or C<--verbose> + +Print verbose debugging information (or, when specified with C<--help>, act +like C<perldoc validate_repo.pl>). + +=head1 ARGUMENTS + +=over + +=item C<modules> + +The module in the repository to examine. Defaults to the contents of the +F<./CVS/Repository> file when it exists and is readable, then to F<.> +(all modules). + +=head1 EXAMPLES + + setenv CVSROOT /release/111/cvs + validate_repo.pl + + + validate_repo.pl -d /another/cvsroot --verbose --exec ' + system "grep \"This string means Im a bad, bad file!\" <&" + . fileno( $_[3] ) + . ">/dev/null" + or die "Revision $_[2] of $_[0]/$_[1],v is bad, bad, bad!"' + +=head1 SEE ALSO + +None. + +=cut + +###################################################################### +# MODULES # +###################################################################### +use strict; + +use Fcntl qw( F_GETFD F_SETFD ); +use File::Find; +use File::Basename; +use File::Path; +use File::Spec; +use Getopt::Long; +use IO::File; +use Pod::Usage; + +###################################################################### +# GLOBALS # +###################################################################### + +use vars qw( + $all_revisions + $cvsroot + @extra_files + @ignore_files + $ignored_files + @invalid_revs + @list_of_broken_files + @scripts + $total_files + $total_interesting_revisions + $total_revisions + $verbose + ); + + + +###################################################################### +# SUBROUTINES # +###################################################################### + +###################################################################### +# +# NAME : +# main +# +# PURPOSE : +# To search the repository for broken files +# +# PARAMETERS : +# NONE +# +# GLOBALS : +# $cvsroot - The CVS repository to search through. +# $ENV{ CVSROOT } - The default CVS repository to search through. +# @list_of_broken_files - The list of files that need to +# be fixed. +# $verbose - is verbose mode on? +# @scripts - scripts to run on checked out files. +# $total_revisions - The number of revisions considered +# $total_interesting_revisions - The number of revisions used +# $total_files - The total number of files looked at. +# +# RETURNS : +# A list of broken files +# +# COMMENTS : +# Do not run this script inside the repository. Choose +# a nice safe spot( like /tmp ) outside of the repository. +# +###################################################################### +sub main +{ + my $help; + + $ignored_files = 0; + $total_files = 0; + $total_interesting_revisions = 0; + $total_revisions = 0; + + Getopt::Long::Configure( "bundling" ); + unless( GetOptions( + 'all-revisions|a!' => \$all_revisions, + 'cvsroot|d=s' => \$cvsroot, + 'exec|e=s' => \@scripts, + 'help|h|?!' => \$help, + 'verbose|v!' => \$verbose + ) + ) + { + pod2usage( 2 ); + exit 2; + } + + pod2usage( -exitval => 2, + -verbose => $verbose ? 2 : 1, + -output => \*STDOUT ) + if $help; + + verbose( "Verbose Mode Turned On\n" ); + + if( !$cvsroot && -f "CVS/Root" && -r "CVS/Root" ) + { + my $file = new IO::File "< CVS/Root"; + $cvsroot = $file->getline; + chomp $cvsroot; + } + $cvsroot = $ENV{'CVSROOT'} unless $cvsroot; + pod2usage( "error: Must set CVSROOT" ) unless $cvsroot; + + if( $cvsroot =~ /^:\w+:/ && $cvsroot !~ /^:local:/ + || $cvsroot =~ /@/ ) + { + print STDERR "CVSROOT must be :local:\n"; + exit 2; + } + + for (@scripts) + { + $_ = File::Spec->rel2abs( $_ ) unless /\n/ || !-x $_; + } + + + if( !scalar( @ARGV ) && -f "CVS/Repository" && -r "CVS/Repository" ) + { + my $file = new IO::File "< CVS/Repository"; + my $module = $file->getline; + chomp $module; + push @ARGV, $module; + } + + push @ARGV, "." unless( scalar @ARGV ); + + foreach my $directory_to_look_at ( @ARGV ) + { + $directory_to_look_at = File::Spec->catfile( $cvsroot, + $directory_to_look_at ); + + my $sym_count = 0; + while( -l $directory_to_look_at ) + { + $directory_to_look_at = readlink( $directory_to_look_at ); + $sym_count += 1; + die( "Encountered too many symlinks for CVSROOT ($cvsroot)\n" ) + if( $sym_count > 5 ); + } + + # Remove indirections. + $directory_to_look_at =~ s#(/+.)*$##o; + + verbose( "Processing: $directory_to_look_at\n" ); + @ignore_files = get_ignore_files_from_cvsroot( $directory_to_look_at ); + find( \&process_file, $directory_to_look_at ); + } + + print "List of corrupted files\n" if @list_of_broken_files; + foreach my $broken ( @list_of_broken_files ) + { + print( "**** File: $broken\n" ); + } + + print "List of Files containing invalid revisions:\n" + if @invalid_revs; + foreach ( @invalid_revs ) + { + print( "**** File: ($_->{'rev'}) $_->{'file'}\n" ); + } + + print "List of Files That Don't belong in Repository:\n" + if @extra_files; + foreach my $extra ( @extra_files ) + { + print( "**** File: $extra\n" ); + } + print( "Total Files: $total_files Corrupted files: " + . scalar( @list_of_broken_files ) + . " Invalid revs: " + . scalar( @invalid_revs ) + . " Extra files: " + . scalar( @extra_files ) + . " Ignored Files: $ignored_files\n" ); + print( "Total Revisions: $total_revisions Interesting Revisions: $total_interesting_revisions\n" ); +} + + + +sub verbose +{ + print STDERR @_ if $verbose; +} + + + +###################################################################### +# +# NAME : +# process_file +# +# PURPOSE : +# This function is called by the find function, its purpose +# is to decide if it is important to look at a file or not. When +# a file is important, we log it or call &look_at_cvs_file on it. +# +# ALGORITHM +# 1) If the file is an archive file, we call &look_at_cvs_file on +# it. +# 2) Else, if the file is not in the ignore list, we store its name +# for later. +# +# PARAMETERS : +# NONE +# +# GLOBALS : +# $cvsroot - The CVS repository to search through +# @ignore_files - File patterns we can afford to ignore. +# $File::Find::name - The absolute path of the file being examined. +# +# RETURNS : +# NONE +# +# COMMENTS : +# NONE +# +###################################################################### +sub process_file +{ + if( ! -d $File::Find::name ) + { + my $path = $File::Find::name; + $path =~ s#^$cvsroot/(\./)*##; + $total_files++; + + verbose( "Examining `$path'\n" ); + + if( $path =~ s/,v$// ) + { + look_at_cvs_file( $path ); + } + elsif( !grep { $path =~ $_ } @ignore_files ) + { + push @extra_files, $path; + verbose( "Adding unrecognized file `$path' to corrupted list.\n" ); + } + else + { + $ignored_files++; + verbose( "Ignoring `$path'\n" ); + } + } +} + +###################################################################### +# +# NAME : +# look_at_cvs_file +# +# PURPOSE : +# To decide if a file is broken or not. The algorithm is: +# a) Get the revision history for the file. +# - If that fails the file is broken, save the fact +# and continue processing other files. +# - If that succeeds we have a list of revisions. +# b) For each revision call &check_revision on the file. +# - If that fails the file is broken, save the fact +# and continue processing other files. +# c) Continue on +# +# PARAMETERS : +# $file - The path of the file to look at, relative to $cvsroot and +# suitable for use as an argument to `cvs co', `cvs rlog', and +# the rest of CVS's r* commands. +# +# GLOBALS : +# NONE +# +# RETURNS : +# NONE +# +# COMMENTS : +# We have to handle Attic files in a special manner. +# Basically remove the Attic from the string if it +# exists at the end of the $path variable. +# +###################################################################### +sub look_at_cvs_file +{ + my( $file ) = @_; + my( $name, $path ) = fileparse( $file ); + + $file = $path . $name if $path =~ s#Attic/$##; + + my( $finfo, $rinfo ) = get_history( $file ); + + unless( defined $rinfo ) + { + verbose( "\t`$file' is corrupted. It was determined to contain no\n" + . "\trevisions via a cvs rlog command\n" ); + push( @list_of_broken_files, $file ); + return(); + } + + my @int_revisions = + $all_revisions ? keys %$rinfo + : find_interesting_revisions( keys %$rinfo ); + + foreach my $revision ( @int_revisions ) + { + verbose( "\t\tLooking at Revision: $revision\n" ); + if( !check_revision( $file, $revision, $finfo, $rinfo ) ) + { + verbose( "\t$file is corrupted in revision: $revision\n" ); + push( @list_of_broken_files, $file ); + return(); + } + } +} + +###################################################################### +# +# NAME : +# get_history +# +# PURPOSE : +# To retrieve an array of revision numbers. +# +# PARAMETERS : +# $file - The file to retrieve the revision numbers for +# +# GLOBALS : +# $cvsroot - the CVSROOT we are examining +# +# RETURNS : +# On Success - A hash of revision info, indexed by revision numbers. +# On Failure - undef. +# +# COMMENTS : +# The $_ is saved off because The File::find functionality +# expects the $_ to not have been changed. +# The -N option for the rlog command means to spit out +# tags or branch names. +# +###################################################################### +sub get_history +{ + my( $file ) = @_; + $file =~ s/(["\$`\\])/\\$1/g; + my %finfo; # Info about the file. + my %rinfo; # Info about revisions in the file. + my $revision; + + my $fh = new IO::File( "cvs -d $cvsroot rlog -N \"$file\"" + . ($verbose ? "" : " 2>&1") . " |" ) + or die( "unable to run `cvs rlog', help" ); + + my $ignore = -1; + while( my $line = $fh->getline ) + { + if( $ignore == 1 ) + { + if( ( $revision ) = $line =~ /^revision (.*?)(\tlocked by: \S+;)?$/ ) + { + unless($revision =~ m/^\d+\.\d+(?:\.\d+\.\d+)*$/) + { + push @invalid_revs, { 'file' => $file, 'rev' => $revision }; + verbose( "Adding invalid revision `$revision' of file `$file' to invalid revs list.\n" ); + } + + $ignore++; + next; + } + + # We require ---- before a ^revision tag, not a revision + # after every ----. + $ignore = 0; + } + if( $ignore == 2 ) + { + if( my ( $date, $author, $state ) = + $line =~ /^date: (\S+ \S+); author: ([^;]+); state: (\S+);/ ) + { + $rinfo{$revision} = + { + 'date' => $date, + 'author' => $author, + 'state' => $state + } + } + else + { + die "Couldn't read date/author/state for revision $revision\n" + . "of $file from `cvs rlog'.\n" + . "line = $line"; + } + $ignore = 0; + next; + } + if( $ignore == -1 ) + { + # Until we find the first ---- below, we can read general file info + if( my ( $kwmode ) = + $line =~ /^keyword substitution: (\S+)$/ ) + { + $finfo{'kwmode'} = $kwmode; + next; + } + } + # rlog outputs a "----" line before the actual revision + # without this we'll pick up peoples comments if they + # happen to start with revision + if( $line =~ /^----------------------------$/ ) + { + # Catch this case when $ignore == -1 or 0 + $ignore = 1; + next; + } + } + if( $verbose ) + { + for (keys %rinfo) + { + verbose( "Revision $_: " ); + verbose( join( ", ", %{$rinfo{$_}} ) ); + verbose( "\n" ); + } + } + + die "Syserr closing pipe from `cvs co': $!" + if !$fh->close && $!; + return if $?; + + return( \%finfo, %rinfo ? \%rinfo : undef ); +} + +###################################################################### +# +# NAME : +# check_revision +# +# PURPOSE : +# Given a file and a revision number ensure that we can check out that +# file. +# +# If the user has specified any scripts (passed in as arguments to --exec +# and stored in @scripts), run them on the checked out revision. If +# executable scripts exit with a non-zero status or evaluated scripts set +# $@ (die), print $status or $@ as a warning. +# +# PARAMETERS : +# $file - The file to look at. +# $revision - The revision to look at. +# $rinfo - A reference to a hash containing information about the +# revisions in $file. +# For instance, $rinfo->{$revision}->{'date'} contains the +# date revision $revision was committed. +# +# GLOBALS : +# NONE +# +# RETURNS : +# If we can get the File - 1 +# If we can not get the File - 0 +# +# COMMENTS : +# cvs command line options are as followed: +# -n - Do not run any checkout program as specified by the -o +# option in the modules file +# -p - Put all output to standard out. +# -r - The revision of the file that we would like to look at. +# -ko - Get the revision exactly as checked in - do not allow +# RCS keyword substitution. +# Please note that cvs will return 0 for being able to successfully +# read the file and 1 for failure to read the file. +# +###################################################################### +sub check_revision +{ + my( $file, $revision, $finfo, $rinfo ) = @_; + $file =~ s/(["\$`\\])/\\$1/g; + + # Allow binaries to be checked out as such. Otherwise, use -ko to avoid + # replacing keywords in the files. + my $kwmode = $finfo->{'kwmode'} eq 'b' ? '' : ' -ko'; + my $command = "cvs -d $cvsroot co$kwmode -npr $revision \"$file\""; + my $ret_code; + verbose( "Executing `$command'.\n" ); + if( @scripts ) + { + my $fh = new IO::File $command . ($verbose ? "" : " 2>&1") . " |"; + fcntl( $fh, F_SETFD, 0 ) + or die "Can't clear close-on-exec flag on filehandle: $!"; + my $count; + foreach my $script (@scripts) + { + $count++; + if( $script !~ /\n/ && -x $script ) + { + # exec external script + my $status = system $script, $cvsroot, $file, $revision, + fileno( $fh ); + warn "`$script $cvsroot $file $revision " + . fileno( $fh ) + . "' exited with code $status" + if $status; + } + else + { + # eval script + @_ = ($cvsroot, $file, $revision, $fh); + eval $script; + warn "script $count ($cvsroot, $file, $revision, $fh) exited abnormally: $@" + if $@; + } + } + # Read any data left so the close will work even if our called script + # didn't finish reading the data. + () = $fh->getlines; # force list context + die "Syserr closing pipe from `cvs co': $!" + if !$fh->close && $!; + $ret_code = $?; + } + else + { + $ret_code = 0xffff & system "$command >/dev/null 2>&1"; + } + + return !$ret_code; +} + +###################################################################### +# +# NAME : +# find_interesting_revisions +# +# PURPOSE : +# CVS stores information in a logical manner. We only really +# need to look at some interestin revisions. These are: +# The first version +# And the last version on every branch. +# This is because cvs stores changes descending from +# main line. ie suppose the last version on mainline is 1.6 +# version 1.6 of the file is stored in toto. version 1.5 +# is stored as a diff between 1.5 and 1.6. 1.4 is stored +# as a diff between 1.5 and 1.4. +# branches are stored a little differently. They are +# stored in ascending order. Suppose there is a branch +# on 1.4 of the file. The first branches revision number +# would be 1.4.1.1. This is stored as a diff between +# version 1.4 and 1.4.1.1. The 1.4.1.2 version is stored +# as a diff between 1.4.1.1 and 1.4.1.2. Therefore +# we are only interested in the earliest revision number +# and the highest revision number on a branch. +# +# PARAMETERS : +# @revisions - The list of revisions to find interesting ones +# +# GLOBALS : +# NONE +# +# RETURNS : +# @new_revisions - The list of revisions that we find interesting +# +# COMMENTS : +# +###################################################################### +sub find_interesting_revisions +{ + my( @revisions ) = @_; + my @new_revisions; + my %max_branch_revision; + my $branch_number; + my $branch_rev; + my $key; + my $value; + + foreach my $revision( @revisions ) + { + ( $branch_number, $branch_rev ) = branch_split( $revision ); + $max_branch_revision{$branch_number} = $branch_rev + if( !exists $max_branch_revision{$branch_number} + || $max_branch_revision{$branch_number} < $branch_rev ); + } + + push( @new_revisions, "1.1" ) unless (exists $max_branch_revision{1} + && $max_branch_revision{1} == 1); + while( ( $key, $value ) = each ( %max_branch_revision ) ) + { + push( @new_revisions, $key . "." . $value ); + } + + my $nrc; + my $rc; + + $rc = @revisions; + $nrc = @new_revisions; + + $total_revisions += $rc; + $total_interesting_revisions += $nrc; + + verbose( "\t\tTotal Revisions: $rc Interesting Revisions: $nrc\n" ); + + return( @new_revisions ); +} + + + +###################################################################### +# +# NAME : +# branch_split +# +# PURPOSE : +# To split up a revision number up into the branch part and +# the number part. For Instance: +# 1.1.1.1 - is split 1.1.1 and 1 +# 2.1 - is split 2 and 1 +# 1.3.4.5.7.8 - is split 1.3.4.5.7 and 8 +# +# PARAMETERS : +# $revision - The revision to look at. +# +# GLOBALS : +# NONE +# +# RETURNS : +# ( $branch, $revision ) - +# $branch - The branch part of the revision number +# $revision - The revision part of the revision number +# +# COMMENTS : +# NONE +# +###################################################################### +sub branch_split +{ + my( $revision ) = @_; + my $branch; + my $version; + my @split_rev; + my $count; + + @split_rev = split /\./, $revision; + + my $numbers = @split_rev; + @split_rev = reverse( @split_rev ); + $branch = pop( @split_rev ); + for( $count = 0; $count < $numbers - 2 ; $count++ ) + { + $branch .= "." . pop( @split_rev ); + } + + return( $branch, pop( @split_rev ) ); +} + +###################################################################### +# +# NAME : +# get_ignore_files_from_cvsroot +# +# PURPOSE : +# Retrieve the list of files from the CVSROOT/ directory +# that should be ignored. +# These are the regular files (e.g., commitinfo, loginfo) +# and those specified in the checkoutlist file. +# +# PARAMETERS : +# The CVSROOT +# +# GLOBALS : +# NONE +# +# RETURNS : +# @ignore - the list of files to ignore +# +# COMMENTS : +# NONE +# +###################################################################### +sub get_ignore_files_from_cvsroot { + my( $cvsroot ) = @_; + my @ignore = ( + qr{CVS/fileattr$}o, + qr{^(./)?CVSROOT/.#[^/]*$}o, + qr{^(./)?CVSROOT/checkoutlist$}o, + qr{^(./)?CVSROOT/commitinfo$}o, + qr{^(./)?CVSROOT/config$}o, + qr{^(./)?CVSROOT/cvsignore$}o, + qr{^(./)?CVSROOT/cvswrappers$}o, + qr{^(./)?CVSROOT/editinfo$}o, + qr{^(./)?CVSROOT/history$}o, + qr{^(./)?CVSROOT/loginfo$}o, + qr{^(./)?CVSROOT/modules$}o, + qr{^(./)?CVSROOT/notify$}o, + qr{^(./)?CVSROOT/passwd$}o, + qr{^(./)?CVSROOT/postadmin$}o, + qr{^(./)?CVSROOT/postproxy$}o, + qr{^(./)?CVSROOT/posttag$}o, + qr{^(./)?CVSROOT/postwatch$}o, + qr{^(./)?CVSROOT/preproxy$}o, + qr{^(./)?CVSROOT/rcsinfo$}o, + qr{^(./)?CVSROOT/readers$}o, + qr{^(./)?CVSROOT/taginfo$}o, + qr{^(./)?CVSROOT/val-tags$}o, + qr{^(./)?CVSROOT/verifymsg$}o, + qr{^(./)?CVSROOT/writers$}o + ); + + my $checkoutlist_file = "$cvsroot/CVSROOT/checkoutlist"; + if( -f $checkoutlist_file && -r $checkoutlist_file ) + { + my $fh = new IO::File "<$checkoutlist_file" + or die "Unable to read checkoutlist file ($checkoutlist_file): $!\n"; + + my @list = $fh->getlines; + chomp( @list ); + $fh->close or die( "Unable to close checkoutlist file: $!\n" ); + + foreach my $line( @list ) + { + next if( $line =~ /^#/ || $line =~ /^\s*$/ ); + $line =~ s/^\s*(\S+)(\s+.*)?$/$1/; + push @ignore, qr{^(./)?CVSROOT/$line$}; + } + } + + return @ignore; +} + + + +###### +###### Go. +###### + +exit main @ARGV; + +# vim:tabstop=4:shiftwidth=4 |