summaryrefslogtreecommitdiff
path: root/lib/DBM_Filter.pm
blob: 033fd1c75f4109e2bee482856abba23cdbe5f4c1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
package DBM_Filter ;

use strict;
use warnings;
our $VERSION = '0.06';

package Tie::Hash ;

use strict;
use warnings;

use Carp;


our %LayerStack = ();
our %origDESTROY = ();

our %Filters = map { $_, undef } qw(
            Fetch_Key
            Fetch_Value
            Store_Key
            Store_Value
	);

our %Options = map { $_, 1 } qw(
            fetch
            store
	);

#sub Filter_Enable
#{
#}
#
#sub Filter_Disable
#{
#}

sub Filtered
{
    my $this = shift;
    return defined $LayerStack{$this} ;
}

sub Filter_Pop
{
    my $this = shift;
    my $stack = $LayerStack{$this} || return undef ;
    my $filter = pop @{ $stack };

    # remove the filter hooks if this is the last filter to pop
    if ( @{ $stack } == 0 ) {
        $this->filter_store_key  ( undef );
        $this->filter_store_value( undef );
        $this->filter_fetch_key  ( undef );
        $this->filter_fetch_value( undef );
        delete $LayerStack{$this};
    }

    return $filter;
}

sub Filter_Key_Push
{
    &_do_Filter_Push;
}

sub Filter_Value_Push
{
    &_do_Filter_Push;
}


sub Filter_Push
{
    &_do_Filter_Push;
}

sub _do_Filter_Push
{
    my $this = shift;
    my %callbacks = ();
    my $caller = (caller(1))[3];
    $caller =~ s/^.*:://;
 
    croak "$caller: no parameters present" unless @_ ;

    if ( ! $Options{lc $_[0]} ) {
        my $class = shift;
        my @params = @_;

        # if $class already contains "::", don't prefix "DBM_Filter::"
        $class = "DBM_Filter::$class" unless $class =~ /::/;
    
        no strict 'refs';
        # does the "DBM_Filter::$class" exist?
	if ( ! %{ "${class}::"} ) {
	    # Nope, so try to load it.
            eval " require $class ; " ;
            croak "$caller: Cannot Load DBM Filter '$class': $@" if $@;
        }
    
        my $fetch  = *{ "${class}::Fetch"  }{CODE};
        my $store  = *{ "${class}::Store"  }{CODE};
        my $filter = *{ "${class}::Filter" }{CODE};
        use strict 'refs';

        my $count = defined($filter) + defined($store) + defined($fetch) ;

        if ( $count == 0 )
          { croak "$caller: No methods (Filter, Fetch or Store) found in class '$class'" }
        elsif ( $count == 1 && ! defined $filter) {
           my $need = defined($fetch) ? 'Store' : 'Fetch';
           croak "$caller: Missing method '$need' in class '$class'" ;
        }
        elsif ( $count >= 2 && defined $filter)
          { croak "$caller: Can't mix Filter with Store and Fetch in class '$class'" }

        if (defined $filter) {
            my $callbacks = &{ $filter }(@params);
            croak "$caller: '${class}::Filter' did not return a hash reference" 
                unless ref $callbacks && ref $callbacks eq 'HASH';
            %callbacks = %{ $callbacks } ;
        }
        else {
            $callbacks{Fetch} = $fetch;
            $callbacks{Store} = $store;
        }
    }
    else {
        croak "$caller: not even params" unless @_ % 2 == 0;
        %callbacks = @_;
    }
    
    my %filters = %Filters ;
    my @got = ();
    while (my ($k, $v) = each %callbacks )
    {
        my $key = $k;
        $k = lc $k;
        if ($k eq 'fetch') {
            push @got, 'Fetch';
            if ($caller eq 'Filter_Push')
              { $filters{Fetch_Key} = $filters{Fetch_Value} = $v }
            elsif ($caller eq 'Filter_Key_Push')
              { $filters{Fetch_Key} = $v }
            elsif ($caller eq 'Filter_Value_Push')
              { $filters{Fetch_Value} = $v }
        }
        elsif ($k eq 'store') {
            push @got, 'Store';
            if ($caller eq 'Filter_Push')
              { $filters{Store_Key} = $filters{Store_Value} = $v }
            elsif ($caller eq 'Filter_Key_Push')
              { $filters{Store_Key} = $v }
            elsif ($caller eq 'Filter_Value_Push')
              { $filters{Store_Value} = $v }
        }
        else
          { croak "$caller: Unknown key '$key'" }

        croak "$caller: value associated with key '$key' is not a code reference"
            unless ref $v && ref $v eq 'CODE';
    }

    if ( @got != 2 ) {
        push @got, 'neither' if @got == 0 ;
        croak "$caller: expected both Store & Fetch - got @got";
    }

    # remember the class
    push @{ $LayerStack{$this} }, \%filters ;

    my $str_this = "$this" ; # Avoid a closure with $this in the subs below

    $this->filter_store_key  ( sub { store_hook($str_this, 'Store_Key')   });
    $this->filter_store_value( sub { store_hook($str_this, 'Store_Value') });
    $this->filter_fetch_key  ( sub { fetch_hook($str_this, 'Fetch_Key')   });
    $this->filter_fetch_value( sub { fetch_hook($str_this, 'Fetch_Value') });

    # Hijack the callers DESTROY method
    $this =~ /^(.*)=/;
    my $type = $1 ;
    no strict 'refs';
    if ( *{ "${type}::DESTROY" }{CODE} ne \&MyDESTROY )
    {
        $origDESTROY{$type} = *{ "${type}::DESTROY" }{CODE};
        no warnings 'redefine';
        *{ "${type}::DESTROY" } = \&MyDESTROY ;
    }
}

sub store_hook
{
    my $this = shift ;
    my $type = shift ;
    foreach my $layer (@{ $LayerStack{$this} })
    {
        &{ $layer->{$type} }() if defined $layer->{$type} ;
    }
}

sub fetch_hook
{
    my $this = shift ;
    my $type = shift ;
    foreach my $layer (reverse @{ $LayerStack{$this} })
    {
        &{ $layer->{$type} }() if defined $layer->{$type} ;
    }
}

sub MyDESTROY
{
    my $this = shift ;
    delete $LayerStack{$this} ;

    # call real DESTROY
    $this =~ /^(.*)=/;
    &{ $origDESTROY{$1} }($this);
}

1;

__END__

=head1 NAME

DBM_Filter -- Filter DBM keys/values 

=head1 SYNOPSIS

    use DBM_Filter ;
    use SDBM_File; # or DB_File, GDBM_File, NDBM_File, or ODBM_File

    $db = tie %hash, ...

    $db->Filter_Push(Fetch => sub {...},
                     Store => sub {...});

    $db->Filter_Push('my_filter1');
    $db->Filter_Push('my_filter2', params...);

    $db->Filter_Key_Push(...) ;
    $db->Filter_Value_Push(...) ;

    $db->Filter_Pop();
    $db->Filtered();

    package DBM_Filter::my_filter1;

    sub Store { ... }
    sub Fetch { ... }

    1;

    package DBM_Filter::my_filter2;

    sub Filter
    {
        my @opts = @_;
        ...
        return (
            sub Store { ... },
            sub Fetch { ... } );
    }

    1;

=head1 DESCRIPTION

This module provides an interface that allows filters to be applied
to tied Hashes associated with DBM files. It builds on the DBM Filter
hooks that are present in all the *DB*_File modules included with the
standard Perl source distribution from version 5.6.1 onwards. In addition
to the *DB*_File modules distributed with Perl, the BerkeleyDB module,
available on CPAN, supports the DBM Filter hooks. See L<perldbmfilter>
for more details on the DBM Filter hooks.

=head1 What is a DBM Filter?

A DBM Filter allows the keys and/or values in a tied hash to be modified
by some user-defined code just before it is written to the DBM file and
just after it is read back from the DBM file. For example, this snippet
of code

    $some_hash{"abc"} = 42;

could potentially trigger two filters, one for the writing of the key
"abc" and another for writing the value 42.  Similarly, this snippet

    my ($key, $value) = each %some_hash

will trigger two filters, one for the reading of the key and one for
the reading of the value.

Like the existing DBM Filter functionality, this module arranges for the
C<$_> variable to be populated with the key or value that a filter will
check. This usually means that most DBM filters tend to be very short.

=head2 So what's new?

The main enhancements over the standard DBM Filter hooks are:

=over 4

=item *

A cleaner interface.

=item *

The ability to easily apply multiple filters to a single DBM file.

=item *

The ability to create "canned" filters. These allow commonly used filters
to be packaged into a stand-alone module.

=back

=head1 METHODS

This module will arrange for the following methods to be available via
the object returned from the C<tie> call.

=head2 $db->Filter_Push() / $db->Filter_Key_Push() / $db->Filter_Value_Push()

Add a filter to filter stack for the database, C<$db>. The three formats
vary only in whether they apply to the DBM key, the DBM value or both.

=over 5

=item Filter_Push

The filter is applied to I<both> keys and values.

=item Filter_Key_Push

The filter is applied to the key I<only>.

=item Filter_Value_Push

The filter is applied to the value I<only>.

=back


=head2 $db->Filter_Pop()

Removes the last filter that was applied to the DBM file associated with
C<$db>, if present.

=head2 $db->Filtered()

Returns TRUE if there are any filters applied to the DBM associated
with C<$db>.  Otherwise returns FALSE.



=head1 Writing a Filter

Filters can be created in two main ways

=head2 Immediate Filters

An immediate filter allows you to specify the filter code to be used
at the point where the filter is applied to a dbm. In this mode the
Filter_*_Push methods expects to receive exactly two parameters.

    my $db = tie %hash, 'SDBM_File', ...
    $db->Filter_Push( Store => sub { },
                      Fetch => sub { });

The code reference associated with C<Store> will be called before any
key/value is written to the database and the code reference associated
with C<Fetch> will be called after any key/value is read from the
database.

For example, here is a sample filter that adds a trailing NULL character
to all strings before they are written to the DBM file, and removes the
trailing NULL when they are read from the DBM file

    my $db = tie %hash, 'SDBM_File', ...
    $db->Filter_Push( Store => sub { $_ .= "\x00" ; },
                      Fetch => sub { s/\x00$// ;    });


Points to note:

=over 5

=item 1.

Both the Store and Fetch filters manipulate C<$_>.

=back

=head2 Canned Filters

Immediate filters are useful for one-off situations. For more generic
problems it can be useful to package the filter up in its own module.

The usage is for a canned filter is:

    $db->Filter_Push("name", params)

where

=over 5

=item "name"

is the name of the module to load. If the string specified does not
contain the package separator characters "::", it is assumed to refer to
the full module name "DBM_Filter::name". This means that the full names
for canned filters, "null" and "utf8", included with this module are:

    DBM_Filter::null
    DBM_Filter::utf8

=item params

any optional parameters that need to be sent to the filter. See the
encode filter for an example of a module that uses parameters.

=back

The module that implements the canned filter can take one of two
forms. Here is a template for the first

    package DBM_Filter::null ;

    use strict;
    use warnings;

    sub Store 
    {
        # store code here    
    }

    sub Fetch
    {
        # fetch code here
    }

    1;


Notes:

=over 5

=item 1.

The package name uses the C<DBM_Filter::> prefix.

=item 2.

The module I<must> have both a Store and a Fetch method. If only one is
present, or neither are present, a fatal error will be thrown.

=back

The second form allows the filter to hold state information using a
closure, thus:

    package DBM_Filter::encoding ;

    use strict;
    use warnings;

    sub Filter
    {
        my @params = @_ ;

        ...
        return {
            Store   => sub { $_ = $encoding->encode($_) },
            Fetch   => sub { $_ = $encoding->decode($_) }
            } ;
    }

    1;


In this instance the "Store" and "Fetch" methods are encapsulated inside a
"Filter" method.


=head1 Filters Included

A number of canned filers are provided with this module. They cover a
number of the main areas that filters are needed when interfacing with
DBM files. They also act as templates for your own filters.

The filter included are:

=over 5

=item * utf8

This module will ensure that all data written to the DBM will be encoded
in UTF-8.

This module needs the Encode module.

=item * encode

Allows you to choose the character encoding will be store in the DBM file.

=item * compress

This filter will compress all data before it is written to the database
and uncompressed it on reading.

This module needs Compress::Zlib. 

=item * int32

This module is used when interoperating with a C/C++ application that
uses a C int as either the key and/or value in the DBM file.

=item * null

This module ensures that all data written to the DBM file is null
terminated. This is useful when you have a perl script that needs
to interoperate with a DBM file that a C program also uses. A fairly
common issue is for the C application to include the terminating null
in a string when it writes to the DBM file. This filter will ensure that
all data written to the DBM file can be read by the C application.

=back

=head1 NOTES

=head2 Maintain Round Trip Integrity

When writing a DBM filter it is I<very> important to ensure that it is
possible to retrieve all data that you have written when the DBM filter
is in place. In practice, this means that whatever transformation is
applied to the data in the Store method, the I<exact> inverse operation
should be applied in the Fetch method.

If you don't provide an exact inverse transformation, you will find that
code like this will not behave as you expect.

     while (my ($k, $v) = each %hash)
     {
         ...
     }

Depending on the transformation, you will find that one or more of the
following will happen

=over 5

=item 1

The loop will never terminate.

=item 2

Too few records will be retrieved.

=item 3

Too many will be retrieved.

=item 4

The loop will do the right thing for a while, but it will unexpectedly fail. 

=back

=head2 Don't mix filtered & non-filtered data in the same database file. 

This is just a restatement of the previous section. Unless you are
completely certain you know what you are doing, avoid mixing filtered &
non-filtered data.

=head1 EXAMPLE

Say you need to interoperate with a legacy C application that stores
keys as C ints and the values and null terminated UTF-8 strings. Here
is how you would set that up

    my $db = tie %hash, 'SDBM_File', ...

    $db->Filter_Key_Push('int32') ;

    $db->Filter_Value_Push('utf8');
    $db->Filter_Value_Push('null');

=head1 SEE ALSO

<DB_File>,  L<GDBM_File>, L<NDBM_File>, L<ODBM_File>, L<SDBM_File>, L<perldbmfilter>

=head1 AUTHOR

Paul Marquess <pmqs@cpan.org>