summaryrefslogtreecommitdiff
path: root/lib/CPAN/Checksums.pm
blob: 342e3920d49684e390b347dabe9e676620723f5e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
package CPAN::Checksums;

use strict;
use vars qw(
            $CAUTION
            $DIRNAME
            $IGNORE_MATCH
            $MIN_MTIME_CHECKSUMS
            $SIGNING_KEY
            $SIGNING_PROGRAM
            $TRY_SHORTNAME
            $VERSION
            @EXPORT_OK
            @ISA
           );

require Exporter;

@ISA = qw(Exporter);
@EXPORT_OK = qw(updatedir);
$VERSION = "2.10";
$VERSION =~ s/_//;
$CAUTION ||= 0;
$TRY_SHORTNAME ||= 0;
$SIGNING_PROGRAM ||= 'gpg --clearsign --default-key ';
$SIGNING_KEY ||= '';
$MIN_MTIME_CHECKSUMS ||= 0;
$IGNORE_MATCH = qr{(?i-xsm:readme$)};

use DirHandle ();
use IO::File ();
use Digest::MD5 ();
use Compress::Bzip2();
use Compress::Zlib ();
use File::Spec ();
use File::Temp;
use Data::Dumper ();
use Data::Compare ();
use Digest::SHA ();

sub _dir_to_dref {
  my($dirname,$old_dref) = @_;
  my($dref) = {};
  my($dh)= DirHandle->new;
  my($fh) = new IO::File;
  $dh->open($dirname) or die "Couldn't opendir $dirname\: $!";
  my(%shortnameseen);
 DIRENT: for my $de ($dh->read) {
    next if $de =~ /^\./;
    next if substr($de,0,9) eq "CHECKSUMS";
    next if $IGNORE_MATCH && $de =~ $IGNORE_MATCH;

    my $abs = File::Spec->catfile($dirname,$de);

    #
    # SHORTNAME offers an 8.3 name, probably not needed but it was
    # always there,,,
    #
    if ($TRY_SHORTNAME) {
      my $shortname = lc $de;
      $shortname =~ s/\.tar[._-]gz$/\.tgz/;
      my $suffix;
      ($suffix = $shortname) =~ s/.*\.//;
      substr($suffix,3) = "" if length($suffix) > 3;
      my @p;
      if ($shortname =~ /\-/) {
        @p = $shortname =~ /(.{1,16})-.*?([\d\.]{2,8})/;
      } else {
        @p = $shortname =~ /(.{1,8}).*?([\d\.]{2,8})/;
      }
      $p[0] ||= lc $de;
      $p[0] =~ s/[^a-z0-9]//g;
      $p[1] ||= 0;
      $p[1] =~ s/\D//g;
      my $counter = 7;
      while (length($p[0]) + length($p[1]) > 8) {
        substr($p[0], $counter) = "" if length($p[0]) > $counter;
        substr($p[1], $counter) = "" if length($p[1]) > $counter--;
      }
      my $dot = $suffix ? "." : "";
      $shortname = "$p[0]$p[1]$dot$suffix";
      while (exists $shortnameseen{$shortname}) {
        my($modi) = $shortname =~ /([a-z\d]+)/;
        $modi++;
        $shortname = "$modi$dot$suffix";
        if (++$counter > 1000){ # avoid endless loops and accept the buggy choice
          warn "Warning: long loop on shortname[$shortname]de[$de]";
          last;
        }
      }
      $dref->{$de}->{shortname} = $shortname;
      $shortnameseen{$shortname} = undef; # for exists check good enough
    }

    #
    # STAT facts
    #
    if (-l File::Spec->catdir($dirname,$de)){
      # Symlinks are a mess on a replicated, database driven system,
      # but as they are not forbidden, we cannot ignore them. We do
      # have a directory with nothing but a symlink in it. When we
      # ignored the symlink, we did not write a CHECKSUMS file and
      # CPAN.pm issued lots of warnings:-(
      $dref->{$de}{issymlink} = 1;
    }
    if (-d File::Spec->catdir($dirname,$de)){
      $dref->{$de}{isdir} = 1;
    } else {
      my @stat = stat $abs or next DIRENT;
      $dref->{$de}{size} = $stat[7];
      my(@gmtime) = gmtime $stat[9];
      $gmtime[4]++;
      $gmtime[5]+=1900;
      $dref->{$de}{mtime} = sprintf "%04d-%02d-%02d", @gmtime[5,4,3];
      _add_digests($de,$dref,"Digest::SHA",[256],"sha256",$abs,$old_dref);
      my $can_reuse_old_md5 = 1;
    COMPARE: for my $param (qw(size mtime sha256)) {
        if (!exists $old_dref->{$de}{$param} ||
            $dref->{$de}{$param} ne $old_dref->{$de}{$param}) {
          $can_reuse_old_md5 = 0;
          last COMPARE;
        }
      }
      if ( $can_reuse_old_md5 ) {
        for my $param (qw(md5 md5-ungz md5-unbz2)) {
          next unless exists $old_dref->{$de}{$param};
          $dref->{$de}{$param} = $old_dref->{$de}{$param};
        }
      } else {
        _add_digests($de,$dref,"Digest::MD5",[],"md5",$abs,$old_dref);
      }

    } # ! -d
  }
  $dh->close;
  $dref;
}

sub _read_old_ddump {
  my($ckfn) = @_;
  my $is_signed = 0;
  my($fh) = new IO::File;
  my $old_ddump = "";
  if ($fh->open($ckfn)) {
    local $/ = "\n";
    while (<$fh>) {
      next if /^\#/;
      $is_signed = 1 if /SIGNED MESSAGE/;
      $old_ddump .= $_;
    }
    close $fh;
  }
  return($old_ddump,$is_signed);
}

sub updatedir ($) {
  my($dirname) = @_;
  my $ckfn = File::Spec->catfile($dirname, "CHECKSUMS"); # checksum-file-name
  my($old_ddump,$is_signed) = _read_old_ddump($ckfn);
  my($old_dref) = makehashref($old_ddump);
  my $dref = _dir_to_dref($dirname,$old_dref);
  local $Data::Dumper::Indent = 1;
  local $Data::Dumper::Quotekeys = 1;
  local $Data::Dumper::Sortkeys = 1;
  my $ddump = Data::Dumper->new([$dref],["cksum"])->Dump;
  my @ckfnstat = stat $ckfn;
  if ($old_ddump) {
    local $DIRNAME = $dirname;
    if ( !!$SIGNING_KEY == !!$is_signed ) { # either both or neither
      if (!$MIN_MTIME_CHECKSUMS || $ckfnstat[9] > $MIN_MTIME_CHECKSUMS ) {
        # recent enough
        return 1 if $old_ddump eq $ddump;
        return 1 if ckcmp($old_dref,$dref);
      }
    }
    if ($CAUTION) {
      my $report = investigate($old_dref,$dref);
      warn $report if $report;
    }
  }
  my $ft = File::Temp->new(
                           DIR => $dirname,
                           TEMPLATE => "CHECKSUMS.XXXX",
                           CLEANUP => 0,
                          ) or die;
  my $tckfn = $ft->filename;
  close $ft;
  my($fh) = new IO::File;
  open $fh, ">$tckfn\0" or die "Couldn't open >$tckfn\: $!";

  local $\;
  if ($SIGNING_KEY) {
    print $fh "0&&<<''; # this PGP-signed message is also valid perl\n";
    close $fh;
    open $fh, "| $SIGNING_PROGRAM $SIGNING_KEY >> $tckfn"
        or die "Could not call gpg: $!";
    $ddump .= "__END__\n";
  }

  my $message = sprintf "# CHECKSUMS file written on %s GMT by CPAN::Checksums (v%s)\n%s",
      scalar gmtime, $VERSION, $ddump;
  print $fh $message;
  my $success = close $fh;
  if ($SIGNING_KEY && !$success) {
    warn "Couldn't run '$SIGNING_PROGRAM $SIGNING_KEY'!
Writing to $tckfn directly";
    open $fh, ">$tckfn\0" or die "Couldn't open >$tckfn\: $!";
    print $fh $message;
    close $fh or warn "Couldn't close $tckfn: $!";
  }
  chmod 0644, $ckfn or die "Couldn't chmod to 0644 for $ckfn\: $!" if -f $ckfn;
  rename $tckfn, $ckfn or die "Could not rename: $!";
  chmod 0444, $ckfn or die "Couldn't chmod to 0444 for $ckfn\: $!";
  return 2;
}

sub _add_digests ($$$$$$$) {
  my($de,$dref,$module,$constructor_args,$keyname,$abs,$old_dref) = @_;
  my($fh) = new IO::File;
  my $dig = $module->new(@$constructor_args);
  $fh->open("$abs\0") or die "Couldn't open $abs: $!";
  binmode($fh); # make sure it's called as a function, solaris with
                # perl 5.8.4 complained about missing method in
                # IO::File
  $dig->addfile($fh);
  $fh->close;
  my $digest = $dig->hexdigest;
  $dref->{$de}{$keyname} = $digest;
  $dig = $module->new(@$constructor_args);
  if ($de =~ /\.gz$/) {
    my($buffer, $zip);
    if (exists $old_dref->{$de}{$keyname} &&
        $dref->{$de}{$keyname} eq $old_dref->{$de}{$keyname} &&
        exists $old_dref->{$de}{"$keyname-ungz"}
       ) {
      $dref->{$de}{"$keyname-ungz"} = $old_dref->{$de}{"$keyname-ungz"};
      return;
    }
    if ($zip  = Compress::Zlib::gzopen($abs, "rb")) {
      $dig->add($buffer)
          while $zip->gzread($buffer) > 0;
      $dref->{$de}{"$keyname-ungz"} = $dig->hexdigest;
      $zip->gzclose;
    }
  } elsif ($de =~ /\.bz2$/) {
    my($buffer, $zip);
    if (exists $old_dref->{$de}{$keyname} &&
        $dref->{$de}{$keyname} eq $old_dref->{$de}{$keyname} &&
        exists $old_dref->{$de}{"$keyname-unbz2"}
       ) {
      $dref->{$de}{"$keyname-unbz2"} = $old_dref->{$de}{"$keyname-unbz2"};
      return;
    }
    if ($zip  = Compress::Bzip2::bzopen($abs, "rb")) {
      $dig->add($buffer)
          while $zip->bzread($buffer) > 0;
      $dref->{$de}{"$keyname-unbz2"} = $dig->hexdigest;
      $zip->bzclose;
    }
  }
}

sub ckcmp ($$) {
  my($old,$new) = @_;
  for ($old,$new) {
    $_ = makehashref($_);
  }
  Data::Compare::Compare($old,$new);
}

# see if a file changed but the name not
sub investigate ($$) {
  my($old,$new) = @_;
  for ($old,$new) {
    $_ = makehashref($_);
  }
  my $complain = "";
  for my $dist (sort keys %$new) {
    if (exists $old->{$dist}) {
      my $headersaid;
      for my $diff (qw/md5 sha256 size md5-ungz sha256-ungz mtime/) {
        next unless exists $old->{$dist}{$diff} &&
            exists $new->{$dist}{$diff};
        next if $old->{$dist}{$diff} eq $new->{$dist}{$diff};
        $complain .=
            scalar gmtime().
                " GMT:\ndiffering old/new version of same file $dist:\n"
                    unless $headersaid++;
        $complain .=
            qq{\t$diff "$old->{$dist}{$diff}" -> "$new->{$dist}{$diff}"\n}; #};
      }
    }
  }
  $complain;
}

sub makehashref ($) {
  local($_) = shift;
  unless (ref $_ eq "HASH") {
    require Safe;
    my($comp) = Safe->new("CPAN::Checksums::reval");
    my $cksum; # used by Data::Dumper
    $_ = $comp->reval($_) || {};
    die "CPAN::Checksums: Caught error[$@] while checking $DIRNAME" if $@;
  }
  $_;
}

1;

__END__

=head1 NAME

CPAN::Checksums - Write a C<CHECKSUMS> file for a directory as on CPAN

=head1 SYNOPSIS

  use CPAN::Checksums qw(updatedir);
  my $success = updatedir($directory);

=head1 INCOMPATIBILITY ALERT

Since version 1.0 the generation of the attribute C<shortname> is
turned off by default. It was too slow and was not used as far as I
know, and above all, it could fail on large directories. The shortname
feature can still be turned on by setting the global variable
$TRY_SHORTNAME to a true value.

=head1 DESCRIPTION

=over 2

=item $success = updatedir($dir)

C<updatedir()> takes a directory name as argument and writes a typical
C<CHECKSUMS> file in that directory as used on CPAN unless a previously
written C<CHECKSUMS> file is there that is still valid. Returns 2 if a
new C<CHECKSUMS> file has been written, 1 if a valid C<CHECKSUMS> file is
already there, otherwise dies.

Note: since version 2.0 updatedir on empty directories behaves just
the same. In older versions it silently did nothing.

=back

=head2 Global Variables in package CPAN::Checksums

=over

=item $IGNORE_MATCH

If the global variable $IGNORE_MATCH is set, then all files matching
this expression will be completely ignored and will not be included in
the CPAN C<CHECKSUMS> files. Per default this variable is set to

    qr{(?i-xsm:readme$)}

=item $CAUTION

Setting the global variable $CAUTION causes updatedir() to report
changes of files in the attributes C<size>, C<mtime>, C<md5>, or
C<md5-ungz> to STDERR.

=item $TRY_SHORTNAME

By setting the global variable $TRY_SHORTNAME to a true value, you can
tell updatedir() to include an attribute C<shortname> in the resulting
hash that is 8.3-compatible. Please note, that updatedir() in this
case may be slow and may even fail on large directories, because it
will always only try 1000 iterations to find a name that is not yet
taken and then give up.

=item $SIGNING_KEY

Setting the global variable $SIGNING_KEY makes the generated C<CHECKSUMS>
file to be clear-signed by the command specified in $SIGNING_PROGRAM
(defaults to C<gpg --clearsign --default-key >), passing the signing
key as an extra argument.  The resulting C<CHECKSUMS> file should look like:

    0&&<<''; # this PGP-signed message is also valid perl
    -----BEGIN PGP SIGNED MESSAGE-----
    Hash: SHA1

    # CHECKSUMS file written on ... by CPAN::Checksums (v...)
    $cksum = {
	...
    };

    __END__
    -----BEGIN PGP SIGNATURE-----
    ...
    -----END PGP SIGNATURE-----

note that the actual data remains intact, but two extra lines are
added to make it legal for both OpenPGP and perl syntax.

=item $MIN_MTIME_CHECKSUMS

If the global variable $MIN_MTIME_CHECKSUMS is set, then updatedir
will renew signatures on checksum files that have an older mtime than
the given value.

=back

=head1 PREREQUISITES

DirHandle, IO::File, Digest::MD5, Digest::SHA, Compress::Bzip2,
Compress::Zlib, File::Spec, Data::Dumper, Data::Compare, File::Temp

=head1 BUGS

If updatedir is interrupted, it may leave a temporary file lying
around. These files have the File::Temp template C<CHECKSUMS.XXXX> and
should be harvested by a cronjob.

=head1 AUTHOR

Andreas Koenig, andreas.koenig@anima.de; GnuPG support by Autrijus Tang

=head1 COPYRIGHT & LICENSE

Copyright (c) 2002-2008 Andreas Koenig, Audrey Tang, Steve Peters.

This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=head1 SEE ALSO

perl(1).

=cut