#!/usr/bin/env perl # core-cpan-diff: Compare CPAN modules with their equivalent in core # Originally based on App::DualLivedDiff by Steffen Mueller. use strict; use warnings; use 5.010; use Getopt::Long; use File::Basename (); use File::Copy (); use File::Temp (); use File::Path (); use File::Spec::Functions; use Archive::Extract; use IO::Uncompress::Gunzip (); use File::Compare (); use ExtUtils::Manifest; use ExtUtils::MakeMaker (); BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' } use lib 'Porting'; use Maintainers (); # if running from blead, we may be doing -Ilib, which means when we # 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc. # So preload the things we need, and tell it to check %INC first: use Archive::Tar; use IPC::Open3; use IO::Select; $Module::Load::Conditional::CHECK_INC_HASH = 1; # stop Archive::Extract whinging about lack of Archive::Zip $Archive::Extract::WARN = 0; # where, under the cache dir, to download tarballs to use constant SRC_DIR => 'tarballs'; # where, under the cache dir, to untar stuff to use constant UNTAR_DIR => 'untarred'; use constant DIFF_CMD => 'diff'; use constant WGET_CMD => 'wget'; sub usage { print STDERR "\n@_\n\n" if @_; print STDERR < \$scan_all, 'c|cachedir=s' => \$cache_dir, 'd|diff' => \$use_diff, 'diffopts:s' => \$diff_opts, 'f|force' => \$force, 'h|help' => \&usage, 'm|mirror=s' => \$mirror_url, 'o|output=s' => \$output_file, 'r|reverse' => \$reverse, 'u|upstream=s@' => \@wanted_upstreams, 'v|verbose' => \$verbose, 'x|crosscheck' => \$do_crosscheck, ) or usage; my @modules; usage("Cannot mix -a with module list") if $scan_all && @ARGV; if ($do_crosscheck) { usage("can't use -r, -d, --diffopts, -v with --crosscheck") if ( $reverse || $use_diff || $diff_opts || $verbose ); } else { $diff_opts = '-u -b' unless defined $diff_opts; usage("can't use -f without --crosscheck") if $force; } @modules = $scan_all ? grep $Maintainers::Modules{$_}{CPAN}, ( sort { lc $a cmp lc $b } keys %Maintainers::Modules ) : @ARGV; usage("No modules specified") unless @modules; my $outfh; if ( defined $output_file ) { open $outfh, '>', $output_file or die "ERROR: could not open file '$output_file' for writing: $!\n"; } else { open $outfh, ">&STDOUT" or die "ERROR: can't dup STDOUT: $!\n"; } if ( defined $cache_dir ) { die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir; } else { $cache_dir = File::Temp::tempdir( CLEANUP => 1 ); } $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/"; my $test_file = "modules/07mirror.yml"; my_getstore( cpan_url( $mirror_url, $test_file ), catfile( $cache_dir, $test_file ) ) or die "ERROR: not a CPAN mirror '$mirror_url'\n"; if ($do_crosscheck) { do_crosscheck( $outfh, $cache_dir, $mirror_url, $force, \@modules ); } else { do_compare( \@modules, $outfh, $output_file, $cache_dir, $mirror_url, $verbose, $use_diff, $reverse, $diff_opts, \@wanted_upstreams ); } } # construct a CPAN url sub cpan_url { my ( $mirror_url, @path ) = @_; return $mirror_url unless @path; my $cpan_path = join( "/", map { split "/", $_ } @path ); $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing return $mirror_url . $cpan_path; } # compare a list of modules against their CPAN equivalents sub do_compare { my ( $modules, $outfh, $output_file, $cache_dir, $mirror_url, $verbose, $use_diff, $reverse, $diff_opts, $wanted_upstreams ) = @_; # first, make sure we have a directory where they can all be untarred, # and if its a permanent directory, clear any previous content my $untar_dir = catdir( $cache_dir, UNTAR_DIR ); my $src_dir = catdir( $cache_dir, SRC_DIR ); for my $d ( $src_dir, $untar_dir ) { next if -d $d; mkdir $d or die "mkdir $d: $!\n"; } my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE; my %seen_dist; for my $module (@$modules) { warn "Processing $module ...\n" if defined $output_file; my $m = $Maintainers::Modules{$module} or die "ERROR: No such module in Maintainers.pl: '$module'\n"; unless ( $m->{CPAN} ) { print $outfh "WARNING: $module is not dual-life; skipping\n"; next; } my $dist = $m->{DISTRIBUTION}; die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist; if ( $seen_dist{$dist}++ ) { warn "WARNING: duplicate entry for $dist in $module\n"; } my $upstream = $m->{UPSTREAM} || 'UNKNOWN'; next if @$wanted_upstreams and !( $upstream ~~ $wanted_upstreams ); print $outfh "\n$module - " . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n"; print $outfh " upstream is: " . ( $m->{UPSTREAM} || 'UNKNOWN!' ) . "\n"; my $cpan_dir; eval { $cpan_dir = get_distribution( $src_dir, $mirror_url, $untar_dir, $module, $dist ); }; if ($@) { print $outfh " ", $@; print $outfh " (skipping)\n"; next; } my @perl_files = Maintainers::get_module_files($module); my $manifest = catfile( $cpan_dir, 'MANIFEST' ); die "ERROR: no such file: $manifest\n" unless -f $manifest; my $cpan_files = ExtUtils::Manifest::maniread($manifest); my @cpan_files = sort keys %$cpan_files; ( my $main_pm = $module ) =~ s{::}{/}g; $main_pm .= ".pm"; my ( $excluded, $map, $customized ) = get_map( $m, $module, \@perl_files ); my %perl_unseen; @perl_unseen{@perl_files} = (); my %perl_files = %perl_unseen; foreach my $cpan_file (@cpan_files) { my $mapped_file = cpan_to_perl( $excluded, $map, $customized, $cpan_file ); unless ( defined $mapped_file ) { print $outfh " Excluded: $cpan_file\n" if $verbose; next; } if ( exists $perl_files{$mapped_file} ) { delete $perl_unseen{$mapped_file}; } else { # some CPAN files foo are stored in core as foo.packed, # which are then unpacked by 'make test_prep' my $packed_file = "$mapped_file.packed"; if ( exists $perl_files{$packed_file} ) { if ( !-f $mapped_file and -f $packed_file ) { print $outfh <parse_version($mapped_file) || 'unknown'; my $cv = MM->parse_version($abs_cpan_file) || 'unknown'; if ( $pv ne $cv ) { print $outfh " Version mismatch in '$cpan_file':\n $cv (cpan) vs $pv (perl)\n"; } } } } elsif ($verbose) { if ( $cpan_file eq $relative_mapped_file ) { print $outfh " Unchanged: $cpan_file\n"; } else { print $outfh " Unchanged: $cpan_file $relative_mapped_file\n"; } } } for ( sort keys %perl_unseen ) { my $relative_mapped_file = relatively_mapped($_); if ( customized( $m, $relative_mapped_file ) ) { print $outfh " Customized: $_\n"; } else { print $outfh " Perl only: $_\n" unless $use_diff; } } } } sub relatively_mapped { my $relative = shift; $relative =~ s/^(cpan|dist|ext)\/.*?\///; return $relative; } # given FooBar-1.23_45.tar.gz, return FooBar sub distro_base { my $d = shift; $d =~ s/\.tar\.gz$//; $d =~ s/\.gip$//; $d =~ s/[\d\-_\.]+$//; return $d; } # process --crosscheck action: # ie list all distributions whose CPAN versions differ from that listed in # Maintainers.pl sub do_crosscheck { my ( $outfh, $cache_dir, $mirror_url, $force, $modules ) = @_; my $file = '02packages.details.txt'; my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); my $path = catfile( $download_dir, $file ); my $gzfile = "$path.gz"; # grab 02packages.details.txt my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" ); if ( !-f $gzfile or $force ) { unlink $gzfile; my_getstore( $url, $gzfile ); } unlink $path; IO::Uncompress::Gunzip::gunzip( $gzfile, $path ) or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n"; # suck in the data from it open my $fh, '<', $path or die "ERROR: open: $file: $!\n"; my %distros; my %modules; while (<$fh>) { next if 1 .. /^$/; chomp; my @f = split ' ', $_; if ( @f != 3 ) { warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n"; next; } my $distro = $f[2]; $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/ $modules{ $f[0] } = $distro; ( my $short_distro = $distro ) =~ s{^.*/}{}; $distros{ distro_base($short_distro) }{$distro} = 1; } for my $module (@$modules) { my $m = $Maintainers::Modules{$module} or die "ERROR: No such module in Maintainers.pl: '$module'\n"; unless ( $m->{CPAN} ) { print $outfh "\nWARNING: $module is not dual-life; skipping\n"; next; } # given an entry like # Foo::Bar 1.23 foo-bar-1.23.tar.gz, # first compare the module name against Foo::Bar, and failing that, # against foo-bar my $pdist = $m->{DISTRIBUTION}; die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist; my $cdist = $modules{$module}; ( my $short_pdist = $pdist ) =~ s{^.*/}{}; unless ( defined $cdist ) { my $d = $distros{ distro_base($short_pdist) }; unless ( defined $d ) { print $outfh "\n$module: Can't determine current CPAN entry\n"; next; } if ( keys %$d > 1 ) { print $outfh "\n$module: (found more than one CPAN candidate):\n"; print $outfh " perl: $pdist\n"; print $outfh " CPAN: $_\n" for sort keys %$d; next; } $cdist = ( keys %$d )[0]; } if ( $cdist ne $pdist ) { print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n"; } } } # get the EXCLUDED and MAP entries for this module, or # make up defauts if they don't exist sub get_map { my ( $m, $module_name, $perl_files ) = @_; my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)}; $excluded ||= []; $customized ||= []; return $excluded, $map, $customized if $map; # all files under ext/foo-bar (plus maybe some under t/lib)??? my $ext; for (@$perl_files) { if (m{^((?:ext|dist|cpan)/[^/]+/)}) { if ( defined $ext and $ext ne $1 ) { # more than one ext/$ext/ undef $ext; last; } $ext = $1; } elsif (m{^t/lib/}) { next; } else { undef $ext; last; } } if ( defined $ext ) { $map = { '' => $ext },; } else { ( my $base = $module_name ) =~ s{::}{/}g; $base = "lib/$base"; $map = { 'lib/' => 'lib/', '' => "$base/", }; } return $excluded, $map, $customized; } # Given an exclude list and a mapping hash, convert a CPAN filename # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t'). # Returns an empty list for an excluded file sub cpan_to_perl { my ( $excluded, $map, $customized, $cpan_file ) = @_; for my $exclude (@$excluded) { next if $exclude ~~ $customized; # may be a simple string to match exactly, or a pattern if ( ref $exclude ) { return if $cpan_file =~ $exclude; } else { return if $cpan_file eq $exclude; } } my $perl_file = $cpan_file; # try longest prefix first, then alphabetically on tie-break for my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map ) { last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/; } return $perl_file; } # do LWP::Simple::getstore, possibly without LWP::Simple being available my $lwp_simple_available; sub my_getstore { my ( $url, $file ) = @_; File::Path::mkpath( File::Basename::dirname($file) ); unless ( defined $lwp_simple_available ) { eval { require LWP::Simple }; $lwp_simple_available = $@ eq ''; } if ($lwp_simple_available) { return LWP::Simple::is_success( LWP::Simple::getstore( $url, $file ) ); } elsif ( $url =~ qr{\Afile://(?:localhost)?/} ) { ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{}; File::Copy::copy( $local_path, $file ); } else { return system( WGET_CMD, "-O", $file, $url ) == 0; } } # download and unpack a distribution # Returns the full pathname of the extracted directory # (eg '/tmp/XYZ/Foo_bar-1.23') # cache_dir: where to download the .tar.gz file to # mirror_url: CPAN mirror to download from # untar_dir: where to untar or unzup the file # module: name of module # dist: name of the distribution sub get_distribution { my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_; $dist =~ m{.+/([^/]+)$} or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n"; my $filename = $1; my $download_file = catfile( $src_dir, $filename ); # download distribution if ( -f $download_file and !-s $download_file ) { # wget can leave a zero-length file on failed download unlink $download_file; } unless ( -f $download_file ) { # not cached $dist =~ /^([A-Z])([A-Z])/ or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n"; my $url = cpan_url( $mirror_url, "modules/by-authors/id/$1/$1$2/$dist" ); my_getstore( $url, $download_file ) or die "ERROR: Could not fetch '$url'\n"; } # get the expected name of the extracted distribution dir my $path = catfile( $untar_dir, $filename ); $path =~ s/\.tar\.gz$// or $path =~ s/\.zip$// or die "ERROR: downloaded file does not have a recognised suffix: $path\n"; # extract it unless we already have it cached or tarball is newer if ( !-d $path || ( -M $download_file < -M $path ) ) { my $ae = Archive::Extract->new( archive => $download_file ); $ae->extract( to => $untar_dir ) or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n"; $path = $ae->extract_path; } die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path; return $path; } # produce the diff of a single file sub file_diff { my $outfh = shift; my $cpan_file = shift; my $perl_file = shift; my $reverse = shift; my $diff_opts = shift; my @cmd = ( DIFF_CMD, split ' ', $diff_opts ); if ($reverse) { push @cmd, $perl_file, $cpan_file; } else { push @cmd, $cpan_file, $perl_file; } return `@cmd`; } sub customized { my ( $module_data, $file ) = @_; return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} }; } run();