summaryrefslogtreecommitdiff
path: root/lib/Archive/Zip/MemberRead.pm
blob: e0f0b14d88cc722152f6124c86ee82e4fad68713 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
package Archive::Zip::MemberRead;

=head1 NAME

Archive::Zip::MemberRead - A wrapper that lets you read Zip archive members as if they were files.

=cut

=head1 SYNOPSIS

  use Archive::Zip;
  use Archive::Zip::MemberRead;
  $zip = Archive::Zip->new("file.zip");
  $fh  = Archive::Zip::MemberRead->new($zip, "subdir/abc.txt");
  while (defined($line = $fh->getline()))
  {
      print $fh->input_line_number . "#: $line\n";
  }

  $read = $fh->read($buffer, 32*1024);
  print "Read $read bytes as :$buffer:\n";

=head1 DESCRIPTION

The Archive::Zip::MemberRead module lets you read Zip archive member data
just like you read data from files.

=head1 METHODS

=over 4

=cut

use strict;

use Archive::Zip qw( :ERROR_CODES :CONSTANTS );

use vars qw{$VERSION};

my $nl;

BEGIN {
    $VERSION = '1.49';
    $VERSION = eval $VERSION;

# Requirement for newline conversion. Should check for e.g., DOS and OS/2 as well, but am too lazy.
    $nl = $^O eq 'MSWin32' ? "\r\n" : "\n";
}

=item Archive::Zip::Member::readFileHandle()

You can get a C<Archive::Zip::MemberRead> from an archive member by
calling C<readFileHandle()>:

  my $member = $zip->memberNamed('abc/def.c');
  my $fh = $member->readFileHandle();
  while (defined($line = $fh->getline()))
  {
      # ...
  }
  $fh->close();

=cut

sub Archive::Zip::Member::readFileHandle {
    return Archive::Zip::MemberRead->new(shift());
}

=item Archive::Zip::MemberRead->new($zip, $fileName)

=item Archive::Zip::MemberRead->new($zip, $member)

=item Archive::Zip::MemberRead->new($member)

Construct a new Archive::Zip::MemberRead on the specified member.

  my $fh = Archive::Zip::MemberRead->new($zip, 'fred.c')

=cut

sub new {
    my ($class, $zip, $file) = @_;
    my ($self, $member);

    if ($zip && $file)    # zip and filename, or zip and member
    {
        $member = ref($file) ? $file : $zip->memberNamed($file);
    } elsif ($zip && !$file && ref($zip))    # just member
    {
        $member = $zip;
    } else {
        die(
            'Archive::Zip::MemberRead::new needs a zip and filename, zip and member, or member'
        );
    }

    $self = {};
    bless($self, $class);
    $self->set_member($member);
    return $self;
}

sub set_member {
    my ($self, $member) = @_;

    $self->{member} = $member;
    $self->set_compression(COMPRESSION_STORED);
    $self->rewind();
}

sub set_compression {
    my ($self, $compression) = @_;
    $self->{member}->desiredCompressionMethod($compression) if $self->{member};
}

=item setLineEnd(expr)

Set the line end character to use. This is set to \n by default
except on Windows systems where it is set to \r\n. You will
only need to set this on systems which are not Windows or Unix
based and require a line end different from \n.
This is a class method so call as C<Archive::Zip::MemberRead>->C<setLineEnd($nl)>

=cut

sub setLineEnd {
    shift;
    $nl = shift;
}

=item rewind()

Rewinds an C<Archive::Zip::MemberRead> so that you can read from it again
starting at the beginning.

=cut

sub rewind {
    my $self = shift;

    $self->_reset_vars();
    $self->{member}->rewindData() if $self->{member};
}

sub _reset_vars {
    my $self = shift;

    $self->{line_no} = 0;
    $self->{at_end}  = 0;

    delete $self->{buffer};
}

=item input_record_separator(expr)

If the argument is given, input_record_separator for this
instance is set to it. The current setting (which may be
the global $/) is always returned.

=cut

sub input_record_separator {
    my $self = shift;
    if (@_) {
        $self->{sep} = shift;
        $self->{sep_re} =
          _sep_as_re($self->{sep});    # Cache the RE as an optimization
    }
    return exists $self->{sep} ? $self->{sep} : $/;
}

# Return the input_record_separator in use as an RE fragment
# Note that if we have a per-instance input_record_separator
# we can just return the already converted value. Otherwise,
# the conversion must be done on $/ every time since we cannot
# know whether it has changed or not.
sub _sep_re {
    my $self = shift;

    # Important to phrase this way: sep's value may be undef.
    return exists $self->{sep} ? $self->{sep_re} : _sep_as_re($/);
}

# Convert the input record separator into an RE and return it.
sub _sep_as_re {
    my $sep = shift;
    if (defined $sep) {
        if ($sep eq '') {
            return "(?:$nl){2,}";
        } else {
            $sep =~ s/\n/$nl/og;
            return quotemeta $sep;
        }
    } else {
        return undef;
    }
}

=item input_line_number()

Returns the current line number, but only if you're using C<getline()>.
Using C<read()> will not update the line number.

=cut

sub input_line_number {
    my $self = shift;
    return $self->{line_no};
}

=item close()

Closes the given file handle.

=cut

sub close {
    my $self = shift;

    $self->_reset_vars();
    $self->{member}->endRead();
}

=item buffer_size([ $size ])

Gets or sets the buffer size used for reads.
Default is the chunk size used by Archive::Zip.

=cut

sub buffer_size {
    my ($self, $size) = @_;

    if (!$size) {
        return $self->{chunkSize} || Archive::Zip::chunkSize();
    } else {
        $self->{chunkSize} = $size;
    }
}

=item getline()

Returns the next line from the currently open member.
Makes sense only for text files.
A read error is considered fatal enough to die.
Returns undef on eof. All subsequent calls would return undef,
unless a rewind() is called.
Note: The line returned has the input_record_separator (default: newline) removed.

=item getline( { preserve_line_ending => 1 } )

Returns the next line including the line ending.

=cut

sub getline {
    my ($self, $argref) = @_;

    my $size = $self->buffer_size();
    my $sep  = $self->_sep_re();

    my $preserve_line_ending;
    if (ref $argref eq 'HASH') {
        $preserve_line_ending = $argref->{'preserve_line_ending'};
        $sep =~ s/\\([^A-Za-z_0-9])+/$1/g;
    }

    for (; ;) {
        if (   $sep
            && defined($self->{buffer})
            && $self->{buffer} =~ s/^(.*?)$sep//s) {
            my $line = $1;
            $self->{line_no}++;
            if ($preserve_line_ending) {
                return $line . $sep;
            } else {
                return $line;
            }
        } elsif ($self->{at_end}) {
            $self->{line_no}++ if $self->{buffer};
            return delete $self->{buffer};
        }
        my ($temp, $status) = $self->{member}->readChunk($size);
        if ($status != AZ_OK && $status != AZ_STREAM_END) {
            die "ERROR: Error reading chunk from archive - $status";
        }
        $self->{at_end} = $status == AZ_STREAM_END;
        $self->{buffer} .= $$temp;
    }
}

=item read($buffer, $num_bytes_to_read)

Simulates a normal C<read()> system call.
Returns the no. of bytes read. C<undef> on error, 0 on eof, I<e.g.>:

  $fh = Archive::Zip::MemberRead->new($zip, "sreeji/secrets.bin");
  while (1)
  {
    $read = $fh->read($buffer, 1024);
    die "FATAL ERROR reading my secrets !\n" if (!defined($read));
    last if (!$read);
    # Do processing.
    ....
   }

=cut

#
# All these $_ are required to emulate read().
#
sub read {
    my $self = $_[0];
    my $size = $_[2];
    my ($temp, $status, $ret);

    ($temp, $status) = $self->{member}->readChunk($size);
    if ($status != AZ_OK && $status != AZ_STREAM_END) {
        $_[1] = undef;
        $ret = undef;
    } else {
        $_[1] = $$temp;
        $ret = length($$temp);
    }
    return $ret;
}

1;

=back

=head1 AUTHOR

Sreeji K. Das E<lt>sreeji_k@yahoo.comE<gt>

See L<Archive::Zip> by Ned Konz without which this module does not make
any sense! 

Minor mods by Ned Konz.

=head1 COPYRIGHT

Copyright 2002 Sreeji K. Das.

This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

=cut