summaryrefslogtreecommitdiff
path: root/cpan/Archive-Tar/bin/ptargrep
blob: 8dc6b4f896f4d65982e329eaf3a8f369fdf071a7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/usr/bin/perl
##############################################################################
# Tool for using regular expressions against the contents of files in a tar
# archive.  See 'ptargrep --help' for more documentation.
#

BEGIN { pop @INC if $INC[-1] eq '.' }
use strict;
use warnings;

use Pod::Usage   qw(pod2usage);
use Getopt::Long qw(GetOptions);
use Archive::Tar qw();
use File::Path   qw(mkpath);

my(%opt, $pattern);

if(!GetOptions(\%opt,
    'basename|b',
    'ignore-case|i',
    'list-only|l',
    'verbose|v',
    'help|?',
)) {
    pod2usage(-exitval => 1,  -verbose => 0);
}


pod2usage(-exitstatus => 0, -verbose => 2) if $opt{help};

pod2usage(-exitval => 1,  -verbose => 0,
    -message => "No pattern specified",
) unless @ARGV;
make_pattern( shift(@ARGV) );

pod2usage(-exitval => 1,  -verbose => 0,
    -message => "No tar files specified",
) unless @ARGV;

process_archive($_) foreach @ARGV;

exit 0;


sub make_pattern {
    my($pat) = @_;

    if($opt{'ignore-case'}) {
        $pattern = qr{(?im)$pat};
    }
    else {
        $pattern = qr{(?m)$pat};
    }
}


sub process_archive {
    my($filename) = @_;

    _log("Processing archive: $filename");
    my $next = Archive::Tar->iter($filename);
    while( my $f = $next->() ) {
        next unless $f->is_file;
        match_file($f) if $f->size > 0;
    }
}


sub match_file {
    my($f)   = @_;
    my $path = $f->name;
    my $prefix = $f->prefix;
    if (defined $prefix) {
        $path = File::Spec->catfile($prefix, $path);
    }

    _log("filename: %s  (%d bytes)", $path, $f->size);

    my $body = $f->get_content();
    if($body !~ $pattern) {
        _log("  no match");
        return;
    }

    if($opt{'list-only'}) {
        print $path, "\n";
        return;
    }

    save_file($path, $body);
}


sub save_file {
    my($path, $body) = @_;

    _log("  found match - extracting");
    my($fh);
    my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z};
    if($dir and not $opt{basename}) {
        _log("  writing to $dir/$file");
        $dir =~ s{\A/}{./};
        mkpath($dir) unless -d $dir;
        open $fh, '>', "$dir/$file" or die "open($dir/$file): $!";
    }
    else {
        _log("  writing to ./$file");
        open $fh, '>', $file or die "open($file): $!";
    }
    print $fh $body;
    close($fh);
}


sub _log {
    return unless $opt{verbose};
    my($format, @args) = @_;
    warn sprintf($format, @args) . "\n";
}


__END__

=head1 NAME

ptargrep - Apply pattern matching to the contents of files in a tar archive

=head1 SYNOPSIS

  ptargrep [options] <pattern> <tar file> ...

  Options:

   --basename|-b     ignore directory paths from archive
   --ignore-case|-i  do case-insensitive pattern matching
   --list-only|-l    list matching filenames rather than extracting matches
   --verbose|-v      write debugging message to STDERR
   --help|-?         detailed help message

=head1 DESCRIPTION

This utility allows you to apply pattern matching to B<the contents> of files
contained in a tar archive.  You might use this to identify all files in an
archive which contain lines matching the specified pattern and either print out
the pathnames or extract the files.

The pattern will be used as a Perl regular expression (as opposed to a simple
grep regex).

Multiple tar archive filenames can be specified - they will each be processed
in turn.

=head1 OPTIONS

=over 4

=item B<--basename> (alias -b)

When matching files are extracted, ignore the directory path from the archive
and write to the current directory using the basename of the file from the
archive.  Beware: if two matching files in the archive have the same basename,
the second file extracted will overwrite the first.

=item B<--ignore-case> (alias -i)

Make pattern matching case-insensitive.

=item B<--list-only> (alias -l)

Print the pathname of each matching file from the archive to STDOUT.  Without
this option, the default behaviour is to extract each matching file.

=item B<--verbose> (alias -v)

Log debugging info to STDERR.

=item B<--help> (alias -?)

Display this documentation.

=back

=head1 COPYRIGHT

Copyright 2010 Grant McLean E<lt>grantm@cpan.orgE<gt>

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut