diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-06-16 21:47:00 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-06-16 21:47:00 +0000 |
commit | a0cb39004565ec2396fbdb3f1949b8f13304208e (patch) | |
tree | 67b23b5671a1bf84313263478ddd1c4894a7b7ad | |
parent | 58a21a9b07f5f6666d09bb8c0b9bf9150baca513 (diff) | |
download | perl-a0cb39004565ec2396fbdb3f1949b8f13304208e.tar.gz |
Integrate Memoize 0.64. Few tweaks were required in
the test scripts. Note that the speed and expire*
tests take several dozen seconds to run.
p4raw-id: //depot/perl@10645
31 files changed, 4101 insertions, 0 deletions
@@ -848,6 +848,35 @@ lib/Math/BigFloat.pm An arbitrary precision floating-point arithmetic package lib/Math/BigInt.pm An arbitrary precision integer arithmetic package lib/Math/Complex.pm A Complex package lib/Math/Trig.pm A simple interface to complex trigonometry +lib/Memoize.pm Memoize +lib/Memoize/AnyDBM_File.pm Memoize +lib/Memoize/Expire.pm Memoize +lib/Memoize/ExpireFile.pm Memoize +lib/Memoize/ExpireTest.pm Memoize +lib/Memoize/NDBM_File.pm Memoize +lib/Memoize/README Memoize +lib/Memoize/SDBM_File.pm Memoize +lib/Memoize/Saves.pm Memoize +lib/Memoize/Storable.pm Memoize +lib/Memoize/TODO Memoize +lib/Memoize/t/array.t Memoize +lib/Memoize/t/correctness.t Memoize +lib/Memoize/t/errors.t Memoize +lib/Memoize/t/expire.t Memoize +lib/Memoize/t/expire_file.t Memoize +lib/Memoize/t/expire_module_n.t Memoize +lib/Memoize/t/expire_module_t.t Memoize +lib/Memoize/t/flush.t Memoize +lib/Memoize/t/normalize.t Memoize +lib/Memoize/t/prototype.t Memoize +lib/Memoize/t/speed.t Memoize +lib/Memoize/t/tie.t Memoize +lib/Memoize/t/tie_gdbm.t Memoize +lib/Memoize/t/tie_ndbm.t Memoize +lib/Memoize/t/tie_sdbm.t Memoize +lib/Memoize/t/tie_storable.t Memoize +lib/Memoize/t/tiefeatures.t Memoize +lib/Memoize/t/unmemoize.t Memoize lib/NEXT.pm Pseudo-class NEXT for method redispatch lib/Net/Ping.pm Hello, anybody home? lib/Net/hostent.pm By-name interface to Perl's builtin gethost* diff --git a/lib/Memoize.pm b/lib/Memoize.pm new file mode 100644 index 0000000000..5ec4e9126c --- /dev/null +++ b/lib/Memoize.pm @@ -0,0 +1,1029 @@ +# -*- mode: perl; perl-indent-level: 2; -*- +# Memoize.pm +# +# Transparent memoization of idempotent functions +# +# Copyright 1998, 1999 M-J. Dominus. +# You may copy and distribute this program under the +# same terms as Perl itself. If in doubt, +# write to mjd-perl-memoize+@plover.com for a license. +# +# Version 0.64 beta $Revision: 1.17 $ $Date: 2000/10/24 04:33:49 $ + +package Memoize; +$VERSION = '0.64'; + +# Compile-time constants +sub SCALAR () { 0 } +sub LIST () { 1 } + + +# +# Usage memoize(functionname/ref, +# { NORMALIZER => coderef, INSTALL => name, +# LIST_CACHE => descriptor, SCALAR_CACHE => descriptor } +# + +use Carp; +use Exporter; +use vars qw($DEBUG); +@ISA = qw(Exporter); +@EXPORT = qw(memoize); +@EXPORT_OK = qw(unmemoize flush_cache); +use strict; + +my %memotable; +my %revmemotable; +my @CONTEXT_TAGS = qw(MERGE TIE MEMORY FAULT HASH); +my %IS_CACHE_TAG = map {($_ => 1)} @CONTEXT_TAGS; + +# Raise an error if the user tries to specify one of thesepackage as a +# tie for LIST_CACHE + +my %scalar_only = map {($_ => 1)} qw(DB_File GDBM_File SDBM_File ODBM_File NDBM_File); + +sub memoize { + my $fn = shift; + my %options = @_; + my $options = \%options; + + unless (defined($fn) && + (ref $fn eq 'CODE' || ref $fn eq '')) { + croak "Usage: memoize 'functionname'|coderef {OPTIONS}"; + } + + my $uppack = caller; # TCL me Elmo! + my $cref; # Code reference to original function + my $name = (ref $fn ? undef : $fn); + + # Convert function names to code references + $cref = &_make_cref($fn, $uppack); + + # Locate function prototype, if any + my $proto = prototype $cref; + if (defined $proto) { $proto = "($proto)" } + else { $proto = "" } + + # Goto considered harmful! Hee hee hee. + my $wrapper = eval "sub $proto { unshift \@_, qq{$cref}; goto &_memoizer; }"; + # Actually I would like to get rid of the eval, but there seems not + # to be any other way to set the prototype properly. + +# --- THREADED PERL COMMENT --- +# The above line might not work under threaded perl because goto & +# semantics are broken. If that's the case, try the following instead: +# my $wrapper = eval "sub { &_memoizer(qq{$cref}, \@_); }"; +# Confirmed 1998-12-27 this does work. +# 1998-12-29: Sarathy says this bug is fixed in 5.005_54. +# However, the module still fails, although the sample test program doesn't. + + my $normalizer = $options{NORMALIZER}; + if (defined $normalizer && ! ref $normalizer) { + $normalizer = _make_cref($normalizer, $uppack); + } + + my $install_name; + if (defined $options->{INSTALL}) { + # INSTALL => name + $install_name = $options->{INSTALL}; + } elsif (! exists $options->{INSTALL}) { + # No INSTALL option provided; use original name if possible + $install_name = $name; + } else { + # INSTALL => undef means don't install + } + + if (defined $install_name) { + $install_name = $uppack . '::' . $install_name + unless $install_name =~ /::/; + no strict; + local($^W) = 0; # ``Subroutine $install_name redefined at ...'' + *{$install_name} = $wrapper; # Install memoized version + } + + $revmemotable{$wrapper} = "" . $cref; # Turn code ref into hash key + + # These will be the caches + my %caches; + for my $context (qw(SCALAR LIST)) { + # suppress subsequent 'uninitialized value' warnings + $options{"${context}_CACHE"} ||= ''; + + my $cache_opt = $options{"${context}_CACHE"}; + my @cache_opt_args; + if (ref $cache_opt) { + @cache_opt_args = @$cache_opt; + $cache_opt = shift @cache_opt_args; + } + if ($cache_opt eq 'FAULT') { # no cache + $caches{$context} = undef; + } elsif ($cache_opt eq 'HASH') { # user-supplied hash + $caches{$context} = $cache_opt_args[0]; + } elsif ($cache_opt eq '' || $IS_CACHE_TAG{$cache_opt}) { + # default is that we make up an in-memory hash + $caches{$context} = {}; + # (this might get tied later, or MERGEd away) + } else { + croak "Unrecognized option to `${context}_CACHE': `$cache_opt' should be one of (@CONTEXT_TAGS); aborting"; + } + } + + # Perhaps I should check here that you didn't supply *both* merge + # options. But if you did, it does do something reasonable: They + # both get merged to the same in-memory hash. + if ($options{SCALAR_CACHE} eq 'MERGE') { + $caches{SCALAR} = $caches{LIST}; + } elsif ($options{LIST_CACHE} eq 'MERGE') { + $caches{LIST} = $caches{SCALAR}; + } + + # Now deal with the TIE options + { + my $context; + foreach $context (qw(SCALAR LIST)) { + # If the relevant option wasn't `TIE', this call does nothing. + _my_tie($context, $caches{$context}, $options); # Croaks on failure + } + } + + # We should put some more stuff in here eventually. + # We've been saying that for serveral versions now. + # And you know what? More stuff keeps going in! + $memotable{$cref} = + { + O => $options, # Short keys here for things we need to access frequently + N => $normalizer, + U => $cref, + MEMOIZED => $wrapper, + PACKAGE => $uppack, + NAME => $install_name, + S => $caches{SCALAR}, + L => $caches{LIST}, + }; + + $wrapper # Return just memoized version +} + +# This function tries to load a tied hash class and tie the hash to it. +sub _my_tie { + my ($context, $hash, $options) = @_; + my $fullopt = $options->{"${context}_CACHE"}; + + # We already checked to make sure that this works. + my $shortopt = (ref $fullopt) ? $fullopt->[0] : $fullopt; + + return unless defined $shortopt && $shortopt eq 'TIE'; + + my @args = ref $fullopt ? @$fullopt : (); + shift @args; + my $module = shift @args; + if ($context eq 'LIST' && $scalar_only{$module}) { + croak("You can't use $module for LIST_CACHE because it can only store scalars"); + } + my $modulefile = $module . '.pm'; + $modulefile =~ s{::}{/}g; + eval { require $modulefile }; + if ($@) { + croak "Memoize: Couldn't load hash tie module `$module': $@; aborting"; + } +# eval { import $module }; +# if ($@) { +# croak "Memoize: Couldn't import hash tie module `$module': $@; aborting"; +# } +# eval "use $module ()"; +# if ($@) { +# croak "Memoize: Couldn't use hash tie module `$module': $@; aborting"; +# } + my $rc = (tie %$hash => $module, @args); + unless ($rc) { + croak "Memoize: Couldn't tie hash to `$module': $@; aborting"; + } + 1; +} + +sub flush_cache { + my $func = _make_cref($_[0], scalar caller); + my $info = $memotable{$revmemotable{$func}}; + die "$func not memoized" unless defined $info; + for my $context (qw(S L)) { + my $cache = $info->{$context}; + if (tied %$cache && ! (tied %$cache)->can('CLEAR')) { + my $funcname = defined($info->{NAME}) ? + "function $info->{NAME}" : "anonymous function $func"; + my $context = {S => 'scalar', L => 'list'}->{$context}; + croak "Tied cache hash for $context-context $funcname does not support flushing"; + } else { + %$cache = (); + } + } +} + +# This is the function that manages the memo tables. +sub _memoizer { + my $orig = shift; # stringized version of ref to original func. + my $info = $memotable{$orig}; + my $normalizer = $info->{N}; + + my $argstr; + my $context = (wantarray() ? LIST : SCALAR); + + if (defined $normalizer) { + no strict; + if ($context == SCALAR) { + $argstr = &{$normalizer}(@_); + } elsif ($context == LIST) { + ($argstr) = &{$normalizer}(@_); + } else { + croak "Internal error \#41; context was neither LIST nor SCALAR\n"; + } + } else { # Default normalizer + $argstr = join $;,@_; # $;,@_;? Perl is great. + } + + if ($context == SCALAR) { + my $cache = $info->{S}; + _crap_out($info->{NAME}, 'scalar') unless defined $cache; + if (exists $cache->{$argstr}) { + return $cache->{$argstr}; + } else { + my $val = &{$info->{U}}(@_); + # Scalars are considered to be lists; store appropriately + if ($info->{O}{SCALAR_CACHE} eq 'MERGE') { + $cache->{$argstr} = [$val]; + } else { + $cache->{$argstr} = $val; + } + $val; + } + } elsif ($context == LIST) { + my $cache = $info->{L}; + _crap_out($info->{NAME}, 'list') unless defined $cache; + if (exists $cache->{$argstr}) { + my $val = $cache->{$argstr}; + return ($val) unless ref $val eq 'ARRAY'; + # An array ref is ambiguous. Did the function really return + # an array ref? Or did we cache a list-context list return in + # an anonymous array? + # If LISTCONTEXT=>MERGE, then the function never returns lists, + # so we know for sure: + return ($val) if $info->{O}{LIST_CACHE} eq 'MERGE'; + # Otherwise, we're doomed. ###BUG + return @$val; + } else { + my $q = $cache->{$argstr} = [&{$info->{U}}(@_)]; + @$q; + } + } else { + croak "Internal error \#42; context was neither LIST nor SCALAR\n"; + } +} + +sub unmemoize { + my $f = shift; + my $uppack = caller; + my $cref = _make_cref($f, $uppack); + + unless (exists $revmemotable{$cref}) { + croak "Could not unmemoize function `$f', because it was not memoized to begin with"; + } + + my $tabent = $memotable{$revmemotable{$cref}}; + unless (defined $tabent) { + croak "Could not figure out how to unmemoize function `$f'"; + } + my $name = $tabent->{NAME}; + if (defined $name) { + no strict; + local($^W) = 0; # ``Subroutine $install_name redefined at ...'' + *{$name} = $tabent->{U}; # Replace with original function + } + undef $memotable{$revmemotable{$cref}}; + undef $revmemotable{$cref}; + + # This removes the last reference to the (possibly tied) memo tables + # my ($old_function, $memotabs) = @{$tabent}{'U','S','L'}; + # undef $tabent; + +# # Untie the memo tables if they were tied. +# my $i; +# for $i (0,1) { +# if (tied %{$memotabs->[$i]}) { +# warn "Untying hash #$i\n"; +# untie %{$memotabs->[$i]}; +# } +# } + + $tabent->{U}; +} + +sub _make_cref { + my $fn = shift; + my $uppack = shift; + my $cref; + my $name; + + if (ref $fn eq 'CODE') { + $cref = $fn; + } elsif (! ref $fn) { + if ($fn =~ /::/) { + $name = $fn; + } else { + $name = $uppack . '::' . $fn; + } + no strict; + if (defined $name and !defined(&$name)) { + croak "Cannot operate on nonexistent function `$fn'"; + } +# $cref = \&$name; + $cref = *{$name}{CODE}; + } else { + my $parent = (caller(1))[3]; # Function that called _make_cref + croak "Usage: argument 1 to `$parent' must be a function name or reference.\n"; + } + $DEBUG and warn "${name}($fn) => $cref in _make_cref\n"; + $cref; +} + +sub _crap_out { + my ($funcname, $context) = @_; + if (defined $funcname) { + croak "Function `$funcname' called in forbidden $context context; faulting"; + } else { + croak "Anonymous function called in forbidden $context context; faulting"; + } +} + +1; + + + + + +=head1 NAME + +Memoize - Make your functions faster by trading space for time + +=head1 SYNOPSIS + + use Memoize; + memoize('slow_function'); + slow_function(arguments); # Is faster than it was before + + +This is normally all you need to know. However, many options are available: + + memoize(function, options...); + +Options include: + + NORMALIZER => function + INSTALL => new_name + + SCALAR_CACHE => 'MEMORY' + SCALAR_CACHE => ['HASH', \%cache_hash ] + SCALAR_CACHE => 'FAULT' + SCALAR_CACHE => 'MERGE' + + LIST_CACHE => 'MEMORY' + LIST_CACHE => ['HASH', \%cache_hash ] + LIST_CACHE => 'FAULT' + LIST_CACHE => 'MERGE' + +=head1 DESCRIPTION + +`Memoizing' a function makes it faster by trading space for time. It +does this by caching the return values of the function in a table. +If you call the function again with the same arguments, C<memoize> +jmups in and gives you the value out of the table, instead of letting +the function compute the value all over again. + +Here is an extreme example. Consider the Fibonacci sequence, defined +by the following function: + + # Compute Fibonacci numbers + sub fib { + my $n = shift; + return $n if $n < 2; + fib($n-1) + fib($n-2); + } + +This function is very slow. Why? To compute fib(14), it first wants +to compute fib(13) and fib(12), and add the results. But to compute +fib(13), it first has to compute fib(12) and fib(11), and then it +comes back and computes fib(12) all over again even though the answer +is the same. And both of the times that it wants to compute fib(12), +it has to compute fib(11) from scratch, and then it has to do it +again each time it wants to compute fib(13). This function does so +much recomputing of old results that it takes a really long time to +run---fib(14) makes 1,200 extra recursive calls to itself, to compute +and recompute things that it already computed. + +This function is a good candidate for memoization. If you memoize the +`fib' function above, it will compute fib(14) exactly once, the first +time it needs to, and then save the result in a table. Then if you +ask for fib(14) again, it gives you the result out of the table. +While computing fib(14), instead of computing fib(12) twice, it does +it once; the second time it needs the value it gets it from the table. +It doesn't compute fib(11) four times; it computes it once, getting it +from the table the next three times. Instead of making 1,200 +recursive calls to `fib', it makes 15. This makes the function about +150 times faster. + +You could do the memoization yourself, by rewriting the function, like +this: + + # Compute Fibonacci numbers, memoized version + { my @fib; + sub fib { + my $n = shift; + return $fib[$n] if defined $fib[$n]; + return $fib[$n] = $n if $n < 2; + $fib[$n] = fib($n-1) + fib($n-2); + } + } + +Or you could use this module, like this: + + use Memoize; + memoize('fib'); + + # Rest of the fib function just like the original version. + +This makes it easy to turn memoizing on and off. + +Here's an even simpler example: I wrote a simple ray tracer; the +program would look in a certain direction, figure out what it was +looking at, and then convert the `color' value (typically a string +like `red') of that object to a red, green, and blue pixel value, like +this: + + for ($direction = 0; $direction < 300; $direction++) { + # Figure out which object is in direction $direction + $color = $object->{color}; + ($r, $g, $b) = @{&ColorToRGB($color)}; + ... + } + +Since there are relatively few objects in a picture, there are only a +few colors, which get looked up over and over again. Memoizing +C<ColorToRGB> speeded up the program by several percent. + +=head1 DETAILS + +This module exports exactly one function, C<memoize>. The rest of the +functions in this package are None of Your Business. + +You should say + + memoize(function) + +where C<function> is the name of the function you want to memoize, or +a reference to it. C<memoize> returns a reference to the new, +memoized version of the function, or C<undef> on a non-fatal error. +At present, there are no non-fatal errors, but there might be some in +the future. + +If C<function> was the name of a function, then C<memoize> hides the +old version and installs the new memoized version under the old name, +so that C<&function(...)> actually invokes the memoized version. + +=head1 OPTIONS + +There are some optional options you can pass to C<memoize> to change +the way it behaves a little. To supply options, invoke C<memoize> +like this: + + memoize(function, NORMALIZER => function, + INSTALL => newname, + SCALAR_CACHE => option, + LIST_CACHE => option + ); + +Each of these options is optional; you can include some, all, or none +of them. + +=head2 INSTALL + +If you supply a function name with C<INSTALL>, memoize will install +the new, memoized version of the function under the name you give. +For example, + + memoize('fib', INSTALL => 'fastfib') + +installs the memoized version of C<fib> as C<fastfib>; without the +C<INSTALL> option it would have replaced the old C<fib> with the +memoized version. + +To prevent C<memoize> from installing the memoized version anywhere, use +C<INSTALL =E<gt> undef>. + +=head2 NORMALIZER + +Suppose your function looks like this: + + # Typical call: f('aha!', A => 11, B => 12); + sub f { + my $a = shift; + my %hash = @_; + $hash{B} ||= 2; # B defaults to 2 + $hash{C} ||= 7; # C defaults to 7 + + # Do something with $a, %hash + } + +Now, the following calls to your function are all completely equivalent: + + f(OUCH); + f(OUCH, B => 2); + f(OUCH, C => 7); + f(OUCH, B => 2, C => 7); + f(OUCH, C => 7, B => 2); + (etc.) + +However, unless you tell C<Memoize> that these calls are equivalent, +it will not know that, and it will compute the values for these +invocations of your function separately, and store them separately. + +To prevent this, supply a C<NORMALIZER> function that turns the +program arguments into a string in a way that equivalent arguments +turn into the same string. A C<NORMALIZER> function for C<f> above +might look like this: + + sub normalize_f { + my $a = shift; + my %hash = @_; + $hash{B} ||= 2; + $hash{C} ||= 7; + + join($;, $a, map ($_ => $hash{$_}) sort keys %hash); + } + +Each of the argument lists above comes out of the C<normalize_f> +function looking exactly the same, like this: + + OUCH^\B^\2^\C^\7 + +You would tell C<Memoize> to use this normalizer this way: + + memoize('f', NORMALIZER => 'normalize_f'); + +C<memoize> knows that if the normalized version of the arguments is +the same for two argument lists, then it can safely look up the value +that it computed for one argument list and return it as the result of +calling the function with the other argument list, even if the +argument lists look different. + +The default normalizer just concatenates the arguments with C<$;> in +between. This always works correctly for functions with only one +argument, and also when the arguments never contain C<$;> (which is +normally character #28, control-\. ) However, it can confuse certain +argument lists: + + normalizer("a\034", "b") + normalizer("a", "\034b") + normalizer("a\034\034b") + +for example. + +The default normalizer also won't work when the function's arguments +are references. For exampple, consider a function C<g> which gets two +arguments: A number, and a reference to an array of numbers: + + g(13, [1,2,3,4,5,6,7]); + +The default normalizer will turn this into something like +C<"13\024ARRAY(0x436c1f)">. That would be all right, except that a +subsequent array of numbers might be stored at a different location +even though it contains the same data. If this happens, C<Memoize> +will think that the arguments are different, even though they are +equivalent. In this case, a normalizer like this is appropriate: + + sub normalize { join ' ', $_[0], @{$_[1]} } + +For the example above, this produces the key "13 1 2 3 4 5 6 7". + +Another use for normalizers is when the function depends on data other +than those in its arguments. Suppose you have a function which +returns a value which depends on the current hour of the day: + + sub on_duty { + my ($problem_type) = @_; + my $hour = (localtime)[2]; + open my $fh, "$DIR/$problem_type" or die...; + my $line; + while ($hour-- > 0) + $line = <$fh>; + } + return $line; + } + +At 10:23, this function generates the tenth line of a data file; at +3:45 PM it generates the 15th line instead. By default, C<Memoize> +will only see the $problem_type argument. To fix this, include the +current hour in the normalizer: + + sub normalize { join ' ', (localtime)[2], @_ } + +The calling context of the function (scalar or list context) is +propagated to the normalizer. This means that if the memoized +function will treat its arguments differently in list context than it +would in scalar context, you can have the normalizer function select +its behavior based on the results of C<wantarray>. Even if called in +a list context, a normalizer should still return a single string. + +=head2 C<SCALAR_CACHE>, C<LIST_CACHE> + +Normally, C<Memoize> caches your function's return values into an +ordinary Perl hash variable. However, you might like to have the +values cached on the disk, so that they persist from one run of your +program to the next, or you might like to associate some other +interesting semantics with the cached values. + +There's a slight complication under the hood of C<Memoize>: There are +actually I<two> caches, one for scalar values and one for list values. +When your function is called in scalar context, its return value is +cached in one hash, and when your function is called in list context, +its value is cached in the other hash. You can control the caching +behavior of both contexts independently with these options. + +The argument to C<LIST_CACHE> or C<SCALAR_CACHE> must either be one of +the following four strings: + + MEMORY + FAULT + MERGE + HASH + +or else it must be a reference to a list whose first element is one of +these four strings, such as C<[HASH, arguments...]>. + +=over 4 + +=item C<MEMORY> + +C<MEMORY> means that return values from the function will be cached in +an ordinary Perl hash variable. The hash variable will not persist +after the program exits. This is the default. + +=item C<HASH> + +C<HASH> allows you to specify that a particular hash that you supply +will be used as the cache. You can tie this hash beforehand to give +it any behavior you want. + +A tied hash can have any semantics at all. It is typically tied to an +on-disk database, so that cached values are stored in the database and +retrieved from it again when needed, and the disk file typically +persists after your program has exited. See C<perltie> for more +complete details about C<tie>. + +A typical example is: + + use DB_File; + tie my %cache => 'DB_File', $filename, O_RDWR|O_CREAT, 0666; + memoize 'function', SCALAR_CACHE => [HASH => \%cache]; + +This has the effect of storing the cache in a C<DB_File> database +whose name is in C<$filename>. The cache will persist after the +program has exited. Next time the program runs, it will find the +cache already populated from the previous run of the program. Or you +can forcibly populate the cache by constructing a batch program that +runs in the background and populates the cache file. Then when you +come to run your real program the memoized function will be fast +because all its results have been precomputed. + +=item C<TIE> + +This option is B<strongly deprecated> and will be removed +in the B<next> version of C<Memoize>. Use the C<HASH> option instead. + + memoize ... [TIE, ARGS...] + +is merely a shortcut for + + tie my %cache, ARGS...; + memoize ... [HASH => \%cache]; + + +=item C<FAULT> + +C<FAULT> means that you never expect to call the function in scalar +(or list) context, and that if C<Memoize> detects such a call, it +should abort the program. The error message is one of + + `foo' function called in forbidden list context at line ... + `foo' function called in forbidden scalar context at line ... + +=item C<MERGE> + +C<MERGE> normally means the function does not distinguish between list +and sclar context, and that return values in both contexts should be +stored together. C<LIST_CACHE =E<gt> MERGE> means that list context +return values should be stored in the same hash that is used for +scalar context returns, and C<SCALAR_CACHE =E<gt> MERGE> means the +same, mutatis mutandis. It is an error to specify C<MERGE> for both, +but it probably does something useful. + +Consider this function: + + sub pi { 3; } + +Normally, the following code will result in two calls to C<pi>: + + $x = pi(); + ($y) = pi(); + $z = pi(); + +The first call caches the value C<3> in the scalar cache; the second +caches the list C<(3)> in the list cache. The third call doesn't call +the real C<pi> function; it gets the value from the scalar cache. + +Obviously, the second call to C<pi> is a waste of time, and storing +its return value is a waste of space. Specifying C<LIST_CACHE +=E<gt> MERGE> will make C<memoize> use the same cache for scalar and +list context return values, so that the second call uses the scalar +cache that was populated by the first call. C<pi> ends up being +cvalled only once, and both subsequent calls return C<3> from the +cache, regardless of the calling context. + +Another use for C<MERGE> is when you want both kinds of return values +stored in the same disk file; this saves you from having to deal with +two disk files instead of one. You can use a normalizer function to +keep the two sets of return values separate. For example: + + tie my %cache => 'MLDBM', 'DB_File', $filename, ...; + + memoize 'myfunc', + NORMALIZER => 'n', + SCALAR_CACHE => [HASH => \%cache], + LIST_CACHE => MERGE, + ; + + sub n { + my $context = wantarray() ? 'L' : 'S'; + # ... now compute the hash key from the arguments ... + $hashkey = "$context:$hashkey"; + } + +This normalizer function will store scalar context return values in +the disk file under keys that begin with C<S:>, and list context +return values under keys that begin with C<L:>. + +=back + +=head1 OTHER FACILITIES + +=head2 C<unmemoize> + +There's an C<unmemoize> function that you can import if you want to. +Why would you want to? Here's an example: Suppose you have your cache +tied to a DBM file, and you want to make sure that the cache is +written out to disk if someone interrupts the program. If the program +exits normally, this will happen anyway, but if someone types +control-C or something then the program will terminate immediately +without synchronizing the database. So what you can do instead is + + $SIG{INT} = sub { unmemoize 'function' }; + +Thanks to Jonathan Roy for discovering a use for C<unmemoize>. + +C<unmemoize> accepts a reference to, or the name of a previously +memoized function, and undoes whatever it did to provide the memoized +version in the first place, including making the name refer to the +unmemoized version if appropriate. It returns a reference to the +unmemoized version of the function. + +If you ask it to unmemoize a function that was never memoized, it +croaks. + +=head2 C<flush_cache> + +C<flush_cache(function)> will flush out the caches, discarding I<all> +the cached data. The argument may be a funciton name or a reference +to a function. For finer control over when data is discarded or +expired, see the documentation for C<Memoize::Expire>, included in +this package. + +Note that if the cache is a tied hash, C<flush_cache> will attempt to +invoke the C<CLEAR> method on the hash. If there is no C<CLEAR> +method, this will cause a run-time error. + +An alternative approach to cache flushing is to use the C<HASH> option +(see above) to request that C<Memoize> use a particular hash variable +as its cache. Then you can examine or modify the hash at any time in +any way you desire. + +=head1 CAVEATS + +Memoization is not a cure-all: + +=over 4 + +=item * + +Do not memoize a function whose behavior depends on program +state other than its own arguments, such as global variables, the time +of day, or file input. These functions will not produce correct +results when memoized. For a particularly easy example: + + sub f { + time; + } + +This function takes no arguments, and as far as C<Memoize> is +concerned, it always returns the same result. C<Memoize> is wrong, of +course, and the memoized version of this function will call C<time> once +to get the current time, and it will return that same time +every time you call it after that. + +=item * + +Do not memoize a function with side effects. + + sub f { + my ($a, $b) = @_; + my $s = $a + $b; + print "$a + $b = $s.\n"; + } + +This function accepts two arguments, adds them, and prints their sum. +Its return value is the numuber of characters it printed, but you +probably didn't care about that. But C<Memoize> doesn't understand +that. If you memoize this function, you will get the result you +expect the first time you ask it to print the sum of 2 and 3, but +subsequent calls will return 1 (the return value of +C<print>) without actually printing anything. + +=item * + +Do not memoize a function that returns a data structure that is +modified by its caller. + +Consider these functions: C<getusers> returns a list of users somehow, +and then C<main> throws away the first user on the list and prints the +rest: + + sub main { + my $userlist = getusers(); + shift @$userlist; + foreach $u (@$userlist) { + print "User $u\n"; + } + } + + sub getusers { + my @users; + # Do something to get a list of users; + \@users; # Return reference to list. + } + +If you memoize C<getusers> here, it will work right exactly once. The +reference to the users list will be stored in the memo table. C<main> +will discard the first element from the referenced list. The next +time you invoke C<main>, C<Memoize> will not call C<getusers>; it will +just return the same reference to the same list it got last time. But +this time the list has already had its head removed; C<main> will +erroneously remove another element from it. The list will get shorter +and shorter every time you call C<main>. + +Similarly, this: + + $u1 = getusers(); + $u2 = getusers(); + pop @$u1; + +will modify $u2 as well as $u1, because both variables are references +to the same array. Had C<getusers> not been memoized, $u1 and $u2 +would have referred to different arrays. + +=item * + +Do not memoize a very simple function. + +Recently someone mentioned to me that the Memoize module made his +program run slower instead of faster. It turned out that he was +memoizing the following function: + + sub square { + $_[0] * $_[0]; + } + +I pointed out that C<Memoize> uses a hash, and that looking up a +number in the hash is necessarily going to take a lot longer than a +single multiplication. There really is no way to speed up the +C<square> function. + +Memoization is not magical. + +=back + +=head1 PERSISTENT CACHE SUPPORT + +You can tie the cache tables to any sort of tied hash that you want +to, as long as it supports C<TIEHASH>, C<FETCH>, C<STORE>, and +C<EXISTS>. For example, + + tie my %cache => 'GDBM_File', $filename, O_RDWR|O_CREAT, 0666; + memoize 'function', SCALAR_CACHE => [HASH => \%cache]; + +works just fine. For some storage methods, you need a little glue. + +C<SDBM_File> doesn't supply an C<EXISTS> method, so included in this +package is a glue module called C<Memoize::SDBM_File> which does +provide one. Use this instead of plain C<SDBM_File> to store your +cache table on disk in an C<SDBM_File> database: + + tie my %cache => 'Memoize::SDBM_File', $filename, O_RDWR|O_CREAT, 0666; + memoize 'function', SCALAR_CACHE => [HASH => \%cache]; + +C<NDBM_File> has the same problem and the same solution. (Use +C<Memoize::NDBM_File instead of Plain NDBM_File.>) + +C<Storable> isn't a tied hash class at all. You can use it to store a +hash to disk and retrieve it again, but you can't modify the hash while +it's on the disk. So if you want to store your cache table in a +C<Storable> database, use C<Memoize::Storable>, which puts a hashlike +front-end onto C<Storable>. The hash table is actually kept in +memory, and is loaded from your C<Storable> file at the time you +memoize the function, and stored back at the time you unmemoize the +function (or when your program exits): + + tie my %cache => 'Memoize::Storable', $filename; + memoize 'function', SCALAR_CACHE => [HASH => \%cache]; + + tie my %cache => 'Memoize::Storable', $filename, 'nstore'; + memoize 'function', SCALAR_CACHE => [HASH => \%cache]; + +Include the `nstore' option to have the C<Storable> database written +in `network order'. (See L<Storable> for more details about this.) + +=head1 EXPIRATION SUPPORT + +See Memoize::Expire, which is a plug-in module that adds expiration +functionality to Memoize. If you don't like the kinds of policies +that Memoize::Expire implements, it is easy to write your own plug-in +module to implement whatever policy you desire. Memoize comes with +several examples. An expiration manager that implements a LRU policy +is available on CPAN as Memoize::ExpireLRU. + +=head1 BUGS + +The test suite is much better, but always needs improvement. + +There used to be some problem with the way C<goto &f> works under +threaded Perl, because of the lexical scoping of C<@_>. This is a bug +in Perl, and until it is resolved, Memoize won't work with these +Perls. This is probably still the case, although I have not been able +to try it out. If you encounter this problem, you can fix it by +chopping the source code a little. Find the comment in the source +code that says C<--- THREADED PERL COMMENT---> and comment out the +active line and uncomment the commented one. Then try it again. + +Here's a bug that isn't my fault: Some versions of C<DB_File> won't +let you store data under a key of length 0. That means that if you +have a function C<f> which you memoized and the cache is in a +C<DB_File> database, then the value of C<f()> (C<f> called with no +arguments) will not be memoized. Let us all breathe deeply and repeat +this mantra: ``Gosh, Keith, that sure was a stupid thing to do.'' + +=head1 MAILING LIST + +To join a very low-traffic mailing list for announcements about +C<Memoize>, send an empty note to C<mjd-perl-memoize-request@plover.com>. + +=head1 AUTHOR + +Mark-Jason Dominus (C<mjd-perl-memoize+@plover.com>), Plover Systems co. + +See the C<Memoize.pm> Page at http://www.plover.com/~mjd/perl/Memoize/ +for news and upgrades. Near this page, at +http://www.plover.com/~mjd/perl/MiniMemoize/ there is an article about +memoization and about the internals of Memoize that appeared in The +Perl Journal, issue #13. (This article is also included in the +Memoize distribution as `article.html'.) + +To join a mailing list for announcements about C<Memoize>, send an +empty message to C<mjd-perl-memoize-request@plover.com>. This mailing +list is for announcements only and has extremely low traffic---about +four messages per year. + +=head1 THANK YOU + +Many thanks to Jonathan Roy for bug reports and suggestions, to +Michael Schwern for other bug reports and patches, to Mike Cariaso for +helping me to figure out the Right Thing to Do About Expiration, to +Joshua Gerth, Joshua Chamas, Jonathan Roy, Mark D. Anderson, and +Andrew Johnson for more suggestions about expiration, to Brent Powers +for the Memoize::ExpireLRU module, to Ariel Scolnicov for delightful +messages about the Fibonacci function, to Dion Almaer for +thought-provoking suggestions about the default normalizer, to Walt +Mankowski and Kurt Starsinic for much help investigating problems +under threaded Perl, to Alex Dudkevich for reporting the bug in +prototyped functions and for checking my patch, to Tony Bass for many +helpful suggestions, to Philippe Verdret for enlightening discussion +of Hook::PrePostCall, to Nat Torkington for advice I ignored, to Chris +Nandor for portability advice, to Randal Schwartz for suggesting the +'C<flush_cache> function, and to Jenda Krynicky for being a light in +the world. + +=cut diff --git a/lib/Memoize/AnyDBM_File.pm b/lib/Memoize/AnyDBM_File.pm new file mode 100644 index 0000000000..eb2e659c0d --- /dev/null +++ b/lib/Memoize/AnyDBM_File.pm @@ -0,0 +1,18 @@ +package Memoize::AnyDBM_File; + +use vars qw(@ISA); +@ISA = qw(DB_File GDBM_File Memoize::NDBM_File Memoize::SDBM_File ODBM_File) unless @ISA; + +my $verbose = 1; + +my $mod; +for $mod (@ISA) { +# (my $truemod = $mod) =~ s/^Memoize:://; + if (eval "require $mod") { + print STDERR "AnyDBM_File => Selected $mod.\n" if $Verbose; + @ISA = ($mod); # if we leave @ISA alone, warnings abound + return 1; + } +} + +die "No DBM package was successfully found or installed"; diff --git a/lib/Memoize/Expire.pm b/lib/Memoize/Expire.pm new file mode 100644 index 0000000000..0a631a4735 --- /dev/null +++ b/lib/Memoize/Expire.pm @@ -0,0 +1,339 @@ + +package Memoize::Expire; +# require 5.00556; +use Carp; +$DEBUG = 0; +$VERSION = '0.51'; + +# This package will implement expiration by prepending a fixed-length header +# to the font of the cached data. The format of the header will be: +# (4-byte number of last-access-time) (For LRU when I implement it) +# (4-byte expiration time: unsigned seconds-since-unix-epoch) +# (2-byte number-of-uses-before-expire) + +sub _header_fmt () { "N N n" } +sub _header_size () { length(_header_fmt) } + +# Usage: memoize func +# TIE => [Memoize::Expire, LIFETIME => sec, NUM_USES => n, +# TIE => [...] ] + +sub TIEHASH { + my ($package, %args) = @_; + my %cache; + if ($args{TIE}) { + my ($module, @opts) = @{$args{TIE}}; + my $modulefile = $module . '.pm'; + $modulefile =~ s{::}{/}g; + eval { require $modulefile }; + if ($@) { + croak "Memoize::Expire: Couldn't load hash tie module `$module': $@; aborting"; + } + my $rc = (tie %cache => $module, @opts); + unless ($rc) { + croak "Memoize::Expire: Couldn't tie hash to `$module': $@; aborting"; + } + } + $args{LIFETIME} ||= 0; + $args{NUM_USES} ||= 0; + $args{C} = \%cache; + bless \%args => $package; +} + +sub STORE { + $DEBUG and print STDERR " >> Store $_[1] $_[2]\n"; + my ($self, $key, $value) = @_; + my $expire_time = $self->{LIFETIME} > 0 ? $self->{LIFETIME} + time : 0; + # The call that results in a value to store into the cache is the + # first of the NUM_USES allowed calls. + my $header = _make_header(time, $expire_time, $self->{NUM_USES}-1); + $self->{C}{$key} = $header . $value; + $value; +} + +sub FETCH { + $DEBUG and print STDERR " >> Fetch cached value for $_[1]\n"; + my ($data, $last_access, $expire_time, $num_uses_left) = _get_item($_[0]{C}{$_[1]}); + $DEBUG and print STDERR " >> (ttl: ", ($expire_time-time), ", nuses: $num_uses_left)\n"; + $num_uses_left--; + $last_access = time; + _set_header(@_, $data, $last_access, $expire_time, $num_uses_left); + $data; +} + +sub EXISTS { + $DEBUG and print STDERR " >> Exists $_[1]\n"; + unless (exists $_[0]{C}{$_[1]}) { + $DEBUG and print STDERR " Not in underlying hash at all.\n"; + return 0; + } + my $item = $_[0]{C}{$_[1]}; + my ($last_access, $expire_time, $num_uses_left) = _get_header($item); + my $ttl = $expire_time - time; + if ($DEBUG) { + $_[0]{LIFETIME} and print STDERR " Time to live for this item: $ttl\n"; + $_[0]{NUM_USES} and print STDERR " Uses remaining: $num_uses_left\n"; + } + if ( (! $_[0]{LIFETIME} || $expire_time > time) + && (! $_[0]{NUM_USES} || $num_uses_left > 0 )) { + $DEBUG and print STDERR " (Still good)\n"; + return 1; + } else { + $DEBUG and print STDERR " (Expired)\n"; + return 0; + } +} + +# Arguments: last access time, expire time, number of uses remaining +sub _make_header { + pack "N N n", @_; +} + +sub _strip_header { + substr($_[0], 10); +} + +# Arguments: last access time, expire time, number of uses remaining +sub _set_header { + my ($self, $key, $data, @header) = @_; + $self->{C}{$key} = _make_header(@header) . $data; +} + +sub _get_item { + my $data = substr($_[0], 10); + my @header = unpack "N N n", substr($_[0], 0, 10); +# print STDERR " >> _get_item: $data => $data @header\n"; + ($data, @header); +} + +# Return last access time, expire time, number of uses remaining +sub _get_header { + unpack "N N n", substr($_[0], 0, 10); +} + +1; + +# Below is the stub of documentation for your module. You better edit it! + +=head1 NAME + +Memoize::Expire - Plug-in module for automatic expiration of memoized values + +=head1 SYNOPSIS + + use Memoize; + memoize 'function', + SCALAR_CACHE => [TIE, Memoize::Expire, + LIFETIME => $lifetime, # In seconds + NUM_USES => $n_uses, + TIE => [Module, args...], + ], + +=head1 DESCRIPTION + +Memoize::Expire is a plug-in module for Memoize. It allows the cached +values for memoized functions to expire automatically. This manual +assumes you are already familiar with the Memoize module. If not, you +should study that manual carefully first, paying particular attention +to the TIE feature. + +Memoize::Expire is a layer of software that you can insert in between +Memoize itself and whatever underlying package implements the cache. +(By default, plain hash variables implement the cache.) The layer +expires cached values whenever they get too old, have been used too +often, or both. + +To specify a real-time timeout, supply the LIFETIME option with a +numeric value. Cached data will expire after this many seconds, and +will be looked up afresh when it expires. When a data item is looked +up afresh, its lifetime is reset. + +If you specify NUM_USES with an argument of I<n>, then each cached +data item will be discarded and looked up afresh after the I<n>th time +you access it. When a data item is looked up afresh, its number of +uses is reset. + +If you specify both arguments, data will be discarded from the cache +when either expiration condition holds. + +If you want the cache to persist between invocations of your program, +supply a TIE option to specify the package name and arguments for a +the tied hash that will implement the persistence. For example: + + use Memoize; + use DB_File; + memoize 'function', + SCALAR_CACHE => [TIE, Memoize::Expire, + LIFETIME => $lifetime, # In seconds + NUM_USES => $n_uses, + TIE => [DB_File, $filename, O_CREAT|O_RDWR, 0666], + ], ...; + + + +=head1 INTERFACE + +There is nothing special about Memoize::Expire. It is just an +example. If you don't like the policy that it implements, you are +free to write your own expiration policy module that implements +whatever policy you desire. Here is how to do that. Let us suppose +that your module will be named MyExpirePolicy. + +Short summary: You need to create a package that defines four methods: + +=over 4 + +=item +TIEHASH + +Construct and return cache object. + +=item +EXISTS + +Given a function argument, is the corresponding function value in the +cache, and if so, is it fresh enough to use? + +=item +FETCH + +Given a function argument, look up the corresponding function value in +the cache and return it. + +=item +STORE + +Given a function argument and the corresponding function value, store +them into the cache. + +=back + +The user who wants the memoization cache to be expired according to +your policy will say so by writing + + memoize 'function', + SCALAR_CACHE => [TIE, MyExpirePolicy, args...]; + +This will invoke MyExpirePolicy->TIEHASH(args). +MyExpirePolicy::TIEHASH should do whatever is appropriate to set up +the cache, and it should return the cache object to the caller. + +For example, MyExpirePolicy::TIEHASH might create an object that +contains a regular Perl hash (which it will to store the cached +values) and some extra information about the arguments and how old the +data is and things like that. Let us call this object `C'. + +When Memoize needs to check to see if an entry is in the cache +already, it will invoke C->EXISTS(key). C<key> is the normalized +function argument. MyExpirePolicy::EXISTS should return 0 if the key +is not in the cache, or if it has expired, and 1 if an unexpired value +is in the cache. It should I<not> return C<undef>, because there is a +bug in some versions of Perl that will cause a spurious FETCH if the +EXISTS method returns C<undef>. + +If your EXISTS function returns true, Memoize will try to fetch the +cached value by invoking C->FETCH(key). MyExpirePolicy::FETCH should +return the cached value. Otherwise, Memoize will call the memoized +function to compute the appropriate value, and will store it into the +cache by calling C->STORE(key, value). + +Here is a very brief example of a policy module that expires each +cache item after ten seconds. + + package Memoize::TenSecondExpire; + + sub TIEHASH { + my ($package) = @_; + my %cache; + bless \%cache => $package; + } + + sub EXISTS { + my ($cache, $key) = @_; + if (exists $cache->{$key} && + $cache->{$key}{EXPIRE_TIME} > time) { + return 1 + } else { + return 0; # Do NOT return `undef' here. + } + } + + sub FETCH { + my ($cache, $key) = @_; + return $cache->{$key}{VALUE}; + } + + sub STORE { + my ($cache, $key, $newvalue) = @_; + $cache->{$key}{VALUE} = $newvalue; + $cache->{$key}{EXPIRE_TIME} = time + 10; + } + +To use this expiration policy, the user would say + + use Memoize; + memoize 'function', + SCALAR_CACHE => [TIE, Memoize::TenSecondExpire]; + +Memoize would then call C<function> whenever a cached value was +entirely absent or was older than ten seconds. + +It's nice if you allow a C<TIE> argument to C<TIEHASH> that ties the +underlying cache so that the user can specify that the cache is +persistent or that it has some other interesting semantics. The +sample C<Memoize::Expire> module demonstrates how to do this. It +implements a policy that expires cache items when they get too old or +when they have been accessed too many times. + +Another sample module, C<Memoize::Saves>, is included with this +package. It implements a policy that allows you to specify that +certain function values whould always be looked up afresh. See the +documentation for details. + +=head1 ALTERNATIVES + +Joshua Chamas's Tie::Cache module may be useful as an expiration +manager. (If you try this, let me know how it works out.) + +If you develop any useful expiration managers that you think should be +distributed with Memoize, please let me know. + +=head1 CAVEATS + +This module is experimental, and may contain bugs. Please report bugs +to the address below. + +Number-of-uses is stored as a 16-bit unsigned integer, so can't exceed +65535. + +Because of clock granularity, expiration times may occur up to one +second sooner than you expect. For example, suppose you store a value +with a lifetime of ten seconds, and you store it at 12:00:00.998 on a +certain day. Memoize will look at the clock and see 12:00:00. Then +9.01 seconds later, at 12:00:10.008 you try to read it back. Memoize +will look at the clock and see 12:00:10 and conclude that the value +has expired. Solution: Build an expiration policy module that uses +Time::HiRes to examine a clock with better granularity. Contributions +are welcome. Send them to: + +=head1 AUTHOR + +Mark-Jason Dominus (mjd-perl-memoize+@plover.com) + +Mike Cariaso provided valuable insight into the best way to solve this +problem. + +=head1 SEE ALSO + +perl(1) + +The Memoize man page. + +http://www.plover.com/~mjd/perl/Memoize/ (for news and updates) + +I maintain a mailing list on which I occasionally announce new +versions of Memoize. The list is for announcements only, not +discussion. To join, send an empty message to +mjd-perl-memoize-request@Plover.com. + +=cut diff --git a/lib/Memoize/ExpireFile.pm b/lib/Memoize/ExpireFile.pm new file mode 100644 index 0000000000..958b807cfa --- /dev/null +++ b/lib/Memoize/ExpireFile.pm @@ -0,0 +1,48 @@ + +package Memoize::ExpireFile; +use Carp; + +sub TIEHASH { + my ($package, %args) = @_; + my %cache; + if ($args{TIE}) { + my ($module, @opts) = @{$args{TIE}}; + my $modulefile = $module . '.pm'; + $modulefile =~ s{::}{/}g; + eval { require $modulefile }; + if ($@) { + croak "Memoize::ExpireFile: Couldn't load hash tie module `$module': $@; aborting"; + } + my $rc = (tie %cache => $module, @opts); + unless ($rc) { + croak "Memoize::ExpireFile: Couldn't tie hash to `$module': $@; aborting"; + } + } + bless {ARGS => \%args, C => \%cache} => $package; +} + + +sub STORE { + my ($self, $key, $data) = @_; + my $cache = $self->{C}; + my $cur_date = pack("N", (stat($key))[9]); + $cache->{"C$key"} = $data; + $cache->{"T$key"} = $cur_date; +} + +sub FETCH { + my ($self, $key) = @_; + $self->{C}{"C$key"}; +} + +sub EXISTS { + my ($self, $key) = @_; + my $old_date = $self->{C}{"T$key"} || "0"; + my $cur_date = pack("N", (stat($key))[9]); + if ($self->{ARGS}{CHECK_DATE} && $old_date gt $cur_date) { + return $self->{ARGS}{CHECK_DATE}->($key, $old_date, $cur_date); + } + return $old_date ge $cur_date; +} + +1; diff --git a/lib/Memoize/ExpireTest.pm b/lib/Memoize/ExpireTest.pm new file mode 100644 index 0000000000..1c889ed934 --- /dev/null +++ b/lib/Memoize/ExpireTest.pm @@ -0,0 +1,42 @@ + +# This is just for testing expiration semantics. +# It's not actually a very good example of how to write +# an expiration module. +# +# If you are looking for an example, I recommend that you look at the +# simple example in the Memoize::Expire documentation, or at the +# code for Memoize::Expire itself. +# +# If you have questions, I will be happy to answer them if you +# send them to mjd-perl/memoize+@plover.com. + +package Memoize::ExpireTest; + +my %cache; + +sub TIEHASH { + my ($pack) = @_; + bless \%cache => $pack; +} + +sub EXISTS { + my ($cache, $key) = @_; + exists $cache->{$key} ? 1 : 0; +} + +sub FETCH { + my ($cache, $key) = @_; + $cache->{$key}; +} + +sub STORE { + my ($cache, $key, $val) = @_; + $cache->{$key} = $val; +} + +sub expire { + my ($key) = @_; + delete $cache{$key}; +} + +1; diff --git a/lib/Memoize/NDBM_File.pm b/lib/Memoize/NDBM_File.pm new file mode 100644 index 0000000000..ee58cc4d7c --- /dev/null +++ b/lib/Memoize/NDBM_File.pm @@ -0,0 +1,63 @@ +package Memoize::NDBM_File; +use NDBM_File; +@ISA = qw(NDBM_File); + +$Verbose = 0; + +sub AUTOLOAD { + warn "Nonexistent function $AUTOLOAD invoked in Memoize::NDBM_File\n"; +} + +sub import { + warn "Importing Memoize::NDBM_File\n" if $Verbose; +} + + +my %keylist; + +# This is so ridiculous... +sub _backhash { + my $self = shift; + my %fakehash; + my $k; + for ($k = $self->FIRSTKEY(); defined $k; $k = $self->NEXTKEY($k)) { + $fakehash{$k} = undef; + } + $keylist{$self} = \%fakehash; +} + +sub EXISTS { + warn "Memoize::NDBM_File EXISTS (@_)\n" if $Verbose; + my $self = shift; + _backhash($self) unless exists $keylist{$self}; + my $r = exists $keylist{$self}{$_[0]}; + warn "Memoize::NDBM_File EXISTS (@_) ==> $r\n" if $Verbose; + $r; +} + +sub DEFINED { + warn "Memoize::NDBM_File DEFINED (@_)\n" if $Verbose; + my $self = shift; + _backhash($self) unless exists $keylist{$self}; + defined $keylist{$self}{$_[0]}; +} + +sub DESTROY { + warn "Memoize::NDBM_File DESTROY (@_)\n" if $Verbose; + my $self = shift; + delete $keylist{$self}; # So much for reference counting... + $self->SUPER::DESTROY(@_); +} + +# Maybe establish the keylist at TIEHASH time instead? + +sub STORE { + warn "Memoize::NDBM_File STORE (@_)\n" if $VERBOSE; + my $self = shift; + $keylist{$self}{$_[0]} = undef; + $self->SUPER::STORE(@_); +} + +# Inherit FETCH and TIEHASH + +1; diff --git a/lib/Memoize/README b/lib/Memoize/README new file mode 100644 index 0000000000..60f9b8387b --- /dev/null +++ b/lib/Memoize/README @@ -0,0 +1,714 @@ + +Name: Memoize +What: Transparently speed up functions by caching return values. +Version: 0.51 +Author: Mark-Jason Dominus (mjd-perl-memoize+@plover.com) + +################################################################ + +How to build me: + + perl Makefile.PL + make + make test + +There's a very small chance that the tests in speed.t and +expire_module_t.t might fail because of clock skew or bizarre system +load conditions. If the tests there fail, rerun them and see if the +problem persists. + +If the tests work, + + make install + +If not, please send me a report that mentions which tests failed. +The address is: mjd-perl-memoize+@plover.com. + +################################################################ +What's new since 0.49: + +Just a maintenance release. I made the tests a little more robust, +and I included the Memoization article that I forgot to put into 0.48. + +################################################################ +What's new since 0.48: + +You can now expire data from the memoization cache according to any +expiration policy you desire. A sample policy is provided in the +Memoize::Expire module. It supports expiration of items that have +been in the cache a certain number of seconds and items that have been +accessed a certain number of times. When you call a memoized +function, and Memoize discovers that a cache item has expired, it +calls the real function and stores the result in the cache, just as if +the data had not been in the cache in the first place. + +Many people asked for a cache expiration feature, and some people even +sent patches. Thanks for the patches! But I did not accept them, +because they all added the expiration stuff into the module, and I was +sure that this was a bad way to do it. Everyone had a different idea +of what useful expiration behavior was, so I foresaw an endless series +of creeeping features and an expiration mechansim that got more and +more and more complicated and slower and slower and slower. + +The new expiration policy mechanism makes use of the TIE feature. You +write a cache policy module ( which might be very simple) and use the +TIE feature to insert it between memoize and the real cache. The +Memoize::Expire module. included in this package, is a useful example +of this that might satisfy many people. The documentation for that +module includes an even simpler module for those who would like to +implement their own expiration policies. + +Big win: If you don't use the expiration feature, you don't pay for +it. Memoize 0.49 with expiration turned off runs *exactly* as fast as +Memoize 0.48 did. Not one line of code has been changed. + +Moral of the story: Sometimes, there is a Right Way to Do Things that +really is better than the obvious way. It might not be obvious at +first, and sometimes you have to make people wait for features so that +the Right Way to Do Things can make itself known. + +Many thanks to Mike Cariaso for helping me figure out The Right Way to +Do Things. + +Also: If you try to use ODBM_File, NDBM_File, SDBM_File, GDBM_File, or +DB_File for the LIST_CACHE, you get an error right away, because those +kinds of files will only store strings. Thanks to Jonathan Roy for +suggesting this. If you want to store list values in a persistent +cache, try Memoize::Storable. + +################################################################ + +What's new since 0.46: + +Caching of function return values into NDBM files is now supported. +You can cache function return values into Memoize::AnyDBM files, which +is a pseudo-module that selects the `best' available DBM +implementation. + +Bug fix: Prototyped functions are now memoized correctly; memoizing +used to remove the prototype and issue a warning. Also new tests for +this feature. (Thanks Alex Dudkevich) + +New test suites for SDBM and NDBM caching and prototyped functions. +Various small fixes in the test suite. +Various documentation enhancements and fixes. + +################################################################ + +What's new since 0.45: + +Now has an interface to `Storable'. This wasn't formerly possible, +because the base package can only store caches via modules that +present a tied hash interface, and `Storable' doesn't. Solution: +Memoize::Storable is a tied hash interface to `Storable'. + +################################################################ + +What's new since 0.06: + +Storage of cached function return values in a static file is now +tentatively supported. `memoize' now accepts new options SCALAR_CACHE +and LIST_CACHE to specify the destination and protocol for saving +cached values to disk. + +Consider these features alpha, and please report bugs to +mjd-perl-memoize@plover.com. The beta version is awaiting a more +complete test suite. + +Much new documentation to support all this. + +################################################################ + +What's new since 0.05: + +Calling syntax is now + + memoize(function, OPTION1 => VALUE1, ...) + +instead of + + memoize(function, { OPTION1 => VALUE1, ... }) + + +Functions that return lists can now be memoized. + +New tests for list-returning functions and their normalizers. + +Various documentation changes. + +Return value from `unmemoize' is now the resulting unmemoized +function, instead of the constant `1'. It was already docmuented to +do so. + +################################################################ + + +=head1 NAME + +Memoize - Make your functions faster by trading space for time + +=head1 SYNOPSIS + + use Memoize; + memoize('slow_function'); + slow_function(arguments); # Is faster than it was before + + +This is normally all you need to know. However, many options are available: + + memoize(function, options...); + +Options include: + + NORMALIZER => function + INSTALL => new_name + + SCALAR_CACHE => 'MEMORY' + SCALAR_CACHE => ['TIE', Module, arguments...] + SCALAR_CACHE => 'FAULT' + SCALAR_CACHE => 'MERGE' + + LIST_CACHE => 'MEMORY' + LIST_CACHE => ['TIE', Module, arguments...] + LIST_CACHE => 'FAULT' + LIST_CACHE => 'MERGE' + + +=head1 DESCRIPTION + +`Memoizing' a function makes it faster by trading space for time. It +does this by caching the return values of the function in a table. +If you call the function again with the same arguments, C<memoize> +jmups in and gives you the value out of the table, instead of letting +the function compute the value all over again. + +Here is an extreme example. Consider the Fibonacci sequence, defined +by the following function: + + # Compute Fibonacci numbers + sub fib { + my $n = shift; + return $n if $n < 2; + fib($n-1) + fib($n-2); + } + +This function is very slow. Why? To compute fib(14), it first wants +to compute fib(13) and fib(12), and add the results. But to compute +fib(13), it first has to compute fib(12) and fib(11), and then it +comes back and computes fib(12) all over again even though the answer +is the same. And both of the times that it wants to compute fib(12), +it has to compute fib(11) from scratch, and then it has to do it +again each time it wants to compute fib(13). This function does so +much recomputing of old results that it takes a really long time to +run---fib(14) makes 1,200 extra recursive calls to itself, to compute +and recompute things that it already computed. + +This function is a good candidate for memoization. If you memoize the +`fib' function above, it will compute fib(14) exactly once, the first +time it needs to, and then save the result in a table. Then if you +ask for fib(14) again, it gives you the result out of the table. +While computing fib(14), instead of computing fib(12) twice, it does +it once; the second time it needs the value it gets it from the table. +It doesn't compute fib(11) four times; it computes it once, getting it +from the table the next three times. Instead of making 1,200 +recursive calls to `fib', it makes 15. This makes the function about +150 times faster. + +You could do the memoization yourself, by rewriting the function, like +this: + + # Compute Fibonacci numbers, memoized version + { my @fib; + sub fib { + my $n = shift; + return $fib[$n] if defined $fib[$n]; + return $fib[$n] = $n if $n < 2; + $fib[$n] = fib($n-1) + fib($n-2); + } + } + +Or you could use this module, like this: + + use Memoize; + memoize('fib'); + + # Rest of the fib function just like the original version. + +This makes it easy to turn memoizing on and off. + +Here's an even simpler example: I wrote a simple ray tracer; the +program would look in a certain direction, figure out what it was +looking at, and then convert the `color' value (typically a string +like `red') of that object to a red, green, and blue pixel value, like +this: + + for ($direction = 0; $direction < 300; $direction++) { + # Figure out which object is in direction $direction + $color = $object->{color}; + ($r, $g, $b) = @{&ColorToRGB($color)}; + ... + } + +Since there are relatively few objects in a picture, there are only a +few colors, which get looked up over and over again. Memoizing +C<ColorToRGB> speeded up the program by several percent. + +=head1 DETAILS + +This module exports exactly one function, C<memoize>. The rest of the +functions in this package are None of Your Business. + +You should say + + memoize(function) + +where C<function> is the name of the function you want to memoize, or +a reference to it. C<memoize> returns a reference to the new, +memoized version of the function, or C<undef> on a non-fatal error. +At present, there are no non-fatal errors, but there might be some in +the future. + +If C<function> was the name of a function, then C<memoize> hides the +old version and installs the new memoized version under the old name, +so that C<&function(...)> actually invokes the memoized version. + +=head1 OPTIONS + +There are some optional options you can pass to C<memoize> to change +the way it behaves a little. To supply options, invoke C<memoize> +like this: + + memoize(function, NORMALIZER => function, + INSTALL => newname, + SCALAR_CACHE => option, + LIST_CACHE => option + ); + +Each of these options is optional; you can include some, all, or none +of them. + +=head2 INSTALL + +If you supply a function name with C<INSTALL>, memoize will install +the new, memoized version of the function under the name you give. +For example, + + memoize('fib', INSTALL => 'fastfib') + +installs the memoized version of C<fib> as C<fastfib>; without the +C<INSTALL> option it would have replaced the old C<fib> with the +memoized version. + +To prevent C<memoize> from installing the memoized version anywhere, use +C<INSTALL =E<gt> undef>. + +=head2 NORMALIZER + +Suppose your function looks like this: + + # Typical call: f('aha!', A => 11, B => 12); + sub f { + my $a = shift; + my %hash = @_; + $hash{B} ||= 2; # B defaults to 2 + $hash{C} ||= 7; # C defaults to 7 + + # Do something with $a, %hash + } + +Now, the following calls to your function are all completely equivalent: + + f(OUCH); + f(OUCH, B => 2); + f(OUCH, C => 7); + f(OUCH, B => 2, C => 7); + f(OUCH, C => 7, B => 2); + (etc.) + +However, unless you tell C<Memoize> that these calls are equivalent, +it will not know that, and it will compute the values for these +invocations of your function separately, and store them separately. + +To prevent this, supply a C<NORMALIZER> function that turns the +program arguments into a string in a way that equivalent arguments +turn into the same string. A C<NORMALIZER> function for C<f> above +might look like this: + + sub normalize_f { + my $a = shift; + my %hash = @_; + $hash{B} ||= 2; + $hash{C} ||= 7; + + join($;, $a, map ($_ => $hash{$_}) sort keys %hash); + } + +Each of the argument lists above comes out of the C<normalize_f> +function looking exactly the same, like this: + + OUCH^\B^\2^\C^\7 + +You would tell C<Memoize> to use this normalizer this way: + + memoize('f', NORMALIZER => 'normalize_f'); + +C<memoize> knows that if the normalized version of the arguments is +the same for two argument lists, then it can safely look up the value +that it computed for one argument list and return it as the result of +calling the function with the other argument list, even if the +argument lists look different. + +The default normalizer just concatenates the arguments with C<$;> in +between. This always works correctly for functions with only one +argument, and also when the arguments never contain C<$;> (which is +normally character #28, control-\. ) However, it can confuse certain +argument lists: + + normalizer("a\034", "b") + normalizer("a", "\034b") + normalizer("a\034\034b") + +for example. + +The calling context of the function (scalar or list context) is +propagated to the normalizer. This means that if the memoized +function will treat its arguments differently in list context than it +would in scalar context, you can have the normalizer function select +its behavior based on the results of C<wantarray>. Even if called in +a list context, a normalizer should still return a single string. + +=head2 C<SCALAR_CACHE>, C<LIST_CACHE> + +Normally, C<Memoize> caches your function's return values into an +ordinary Perl hash variable. However, you might like to have the +values cached on the disk, so that they persist from one run of your +program to the next, or you might like to associate some other +interesting semantics with the cached values. + +There's a slight complication under the hood of C<Memoize>: There are +actually I<two> caches, one for scalar values and one for list values. +When your function is called in scalar context, its return value is +cached in one hash, and when your function is called in list context, +its value is cached in the other hash. You can control the caching +behavior of both contexts independently with these options. + +The argument to C<LIST_CACHE> or C<SCALAR_CACHE> must either be one of +the following four strings: + + MEMORY + TIE + FAULT + MERGE + +or else it must be a reference to a list whose first element is one of +these four strings, such as C<[TIE, arguments...]>. + +=over 4 + +=item C<MEMORY> + +C<MEMORY> means that return values from the function will be cached in +an ordinary Perl hash variable. The hash variable will not persist +after the program exits. This is the default. + +=item C<TIE> + +C<TIE> means that the function's return values will be cached in a +tied hash. A tied hash can have any semantics at all. It is +typically tied to an on-disk database, so that cached values are +stored in the database and retrieved from it again when needed, and +the disk file typically persists after your pogram has exited. + +If C<TIE> is specified as the first element of a list, the remaining +list elements are taken as arguments to the C<tie> call that sets up +the tied hash. For example, + + SCALAR_CACHE => [TIE, DB_File, $filename, O_RDWR | O_CREAT, 0666] + +says to tie the hash into the C<DB_File> package, and to pass the +C<$filename>, C<O_RDWR | O_CREAT>, and C<0666> arguments to the C<tie> +call. This has the effect of storing the cache in a C<DB_File> +database whose name is in C<$filename>. + +Other typical uses of C<TIE>: + + LIST_CACHE => [TIE, GDBM_File, $filename, O_RDWR | O_CREAT, 0666] + SCALAR_CACHE => [TIE, MLDBM, DB_File, $filename, O_RDWR|O_CREAT, 0666] + LIST_CACHE => [TIE, My_Package, $tablename, $key_field, $val_field] + +This last might tie the cache hash to a package that you wrote +yourself that stores the cache in a SQL-accessible database. +A useful use of this feature: You can construct a batch program that +runs in the background and populates the memo table, and then when you +come to run your real program the memoized function will be +screamingly fast because all its results have been precomputed. + +=item C<FAULT> + +C<FAULT> means that you never expect to call the function in scalar +(or list) context, and that if C<Memoize> detects such a call, it +should abort the program. The error message is one of + + `foo' function called in forbidden list context at line ... + `foo' function called in forbidden scalar context at line ... + +=item C<MERGE> + +C<MERGE> normally means the function does not distinguish between list +and sclar context, and that return values in both contexts should be +stored together. C<LIST_CACHE =E<gt> MERGE> means that list context +return values should be stored in the same hash that is used for +scalar context returns, and C<SCALAR_CACHE =E<gt> MERGE> means the +same, mutatis mutandis. It is an error to specify C<MERGE> for both, +but it probably does something useful. + +Consider this function: + + sub pi { 3; } + +Normally, the following code will result in two calls to C<pi>: + + $x = pi(); + ($y) = pi(); + $z = pi(); + +The first call caches the value C<3> in the scalar cache; the second +caches the list C<(3)> in the list cache. The third call doesn't call +the real C<pi> function; it gets the value from the scalar cache. + +Obviously, the second call to C<pi> is a waste of time, and storing +its return value is a waste of space. Specifying C<LIST_CACHE +=E<gt> MERGE> will make C<memoize> use the same cache for scalar and +list context return values, so that the second call uses the scalar +cache that was populated by the first call. C<pi> ends up being +cvalled only once, and both subsequent calls return C<3> from the +cache, regardless of the calling context. + +Another use for C<MERGE> is when you want both kinds of return values +stored in the same disk file; this saves you from having to deal with +two disk files instead of one. You can use a normalizer function to +keep the two sets of return values separate. For example: + + memoize 'myfunc', + NORMALIZER => 'n', + SCALAR_CACHE => [TIE, MLDBM, DB_File, $filename, ...], + LIST_CACHE => MERGE, + ; + + sub n { + my $context = wantarray() ? 'L' : 'S'; + # ... now compute the hash key from the arguments ... + $hashkey = "$context:$hashkey"; + } + +This normalizer function will store scalar context return values in +the disk file under keys that begin with C<S:>, and list context +return values under keys that begin with C<L:>. + +=back + +=head1 OTHER FUNCTION + +There's an C<unmemoize> function that you can import if you want to. +Why would you want to? Here's an example: Suppose you have your cache +tied to a DBM file, and you want to make sure that the cache is +written out to disk if someone interrupts the program. If the program +exits normally, this will happen anyway, but if someone types +control-C or something then the program will terminate immediately +without syncronizing the database. So what you can do instead is + + $SIG{INT} = sub { unmemoize 'function' }; + + +Thanks to Jonathan Roy for discovering a use for C<unmemoize>. + +C<unmemoize> accepts a reference to, or the name of a previously +memoized function, and undoes whatever it did to provide the memoized +version in the first place, including making the name refer to the +unmemoized version if appropriate. It returns a reference to the +unmemoized version of the function. + +If you ask it to unmemoize a function that was never memoized, it +croaks. + +=head1 CAVEATS + +Memoization is not a cure-all: + +=over 4 + +=item * + +Do not memoize a function whose behavior depends on program +state other than its own arguments, such as global variables, the time +of day, or file input. These functions will not produce correct +results when memoized. For a particularly easy example: + + sub f { + time; + } + +This function takes no arguments, and as far as C<Memoize> is +concerned, it always returns the same result. C<Memoize> is wrong, of +course, and the memoized version of this function will call C<time> once +to get the current time, and it will return that same time +every time you call it after that. + +=item * + +Do not memoize a function with side effects. + + sub f { + my ($a, $b) = @_; + my $s = $a + $b; + print "$a + $b = $s.\n"; + } + +This function accepts two arguments, adds them, and prints their sum. +Its return value is the numuber of characters it printed, but you +probably didn't care about that. But C<Memoize> doesn't understand +that. If you memoize this function, you will get the result you +expect the first time you ask it to print the sum of 2 and 3, but +subsequent calls will return the number 11 (the return value of +C<print>) without actually printing anything. + +=item * + +Do not memoize a function that returns a data structure that is +modified by its caller. + +Consider these functions: C<getusers> returns a list of users somehow, +and then C<main> throws away the first user on the list and prints the +rest: + + sub main { + my $userlist = getusers(); + shift @$userlist; + foreach $u (@$userlist) { + print "User $u\n"; + } + } + + sub getusers { + my @users; + # Do something to get a list of users; + \@users; # Return reference to list. + } + +If you memoize C<getusers> here, it will work right exactly once. The +reference to the users list will be stored in the memo table. C<main> +will discard the first element from the referenced list. The next +time you invoke C<main>, C<Memoize> will not call C<getusers>; it will +just return the same reference to the same list it got last time. But +this time the list has already had its head removed; C<main> will +erroneously remove another element from it. The list will get shorter +and shorter every time you call C<main>. + + +=back + +=head1 PERSISTENT CACHE SUPPORT + +You can tie the cache tables to any sort of tied hash that you want +to, as long as it supports C<TIEHASH>, C<FETCH>, C<STORE>, and +C<EXISTS>. For example, + + memoize 'function', SCALAR_CACHE => + [TIE, GDBM_File, $filename, O_RDWR|O_CREAT, 0666]; + +works just fine. For some storage methods, you need a little glue. + +C<SDBM_File> doesn't supply an C<EXISTS> method, so included in this +package is a glue module called C<Memoize::SDBM_File> which does +provide one. Use this instead of plain C<SDBM_File> to store your +cache table on disk in an C<SDBM_File> database: + + memoize 'function', + SCALAR_CACHE => + [TIE, Memoize::SDBM_File, $filename, O_RDWR|O_CREAT, 0666]; + +C<NDBM_File> has the same problem and the same solution. + +C<Storable> isn't a tied hash class at all. You can use it to store a +hash to disk and retrieve it again, but you can't modify the hash while +it's on the disk. So if you want to store your cache table in a +C<Storable> database, use C<Memoize::Storable>, which puts a hashlike +front-end onto C<Storable>. The hash table is actually kept in +memory, and is loaded from your C<Storable> file at the time you +memoize the function, and stored back at the time you unmemoize the +function (or when your program exits): + + memoize 'function', + SCALAR_CACHE => [TIE, Memoize::Storable, $filename]; + + memoize 'function', + SCALAR_CACHE => [TIE, Memoize::Storable, $filename, 'nstore']; + +Include the `nstore' option to have the C<Storable> database written +in `network order'. (See L<Storable> for more details about this.) + +=head1 EXPIRATION SUPPORT + +See Memoize::Expire, which is a plug-in module that adds expiration +functionality to Memoize. If you don't like the kinds of policies +that Memoize::Expire implements, it is easy to write your own plug-in +module to implement whatever policy you desire. + +=head1 MY BUGS + +Needs a better test suite, especially for the tied and expiration stuff. + +Also, there is some problem with the way C<goto &f> works under +threaded Perl, because of the lexical scoping of C<@_>. This is a bug +in Perl, and until it is resolved, Memoize won't work with these +Perls. To fix it, you need to chop the source code a little. Find +the comment in the source code that says C<--- THREADED PERL +COMMENT---> and comment out the active line and uncomment the +commented one. Then try it again. + +I wish I could investigate this threaded Perl problem. If someone +could lend me an account on a machine with threaded Perl for a few +hours, it would be very helpful. + +That is why the version number is 0.49 instead of 1.00. + +=head1 MAILING LIST + +To join a very low-traffic mailing list for announcements about +C<Memoize>, send an empty note to C<mjd-perl-memoize-request@plover.com>. + +=head1 AUTHOR + +Mark-Jason Dominus (C<mjd-perl-memoize+@plover.com>), Plover Systems co. + +See the C<Memoize.pm> Page at http://www.plover.com/~mjd/perl/Memoize/ +for news and upgrades. Near this page, at +http://www.plover.com/~mjd/perl/MiniMemoize/ there is an article about +memoization and about the internals of Memoize that appeared in The +Perl Journal, issue #13. (This article is also included in the +Memoize distribution as `article.html'.) + +To join a mailing list for announcements about C<Memoize>, send an +empty message to C<mjd-perl-memoize-request@plover.com>. This mailing +list is for announcements only and has extremely low traffic---about +four messages per year. + +=head1 THANK YOU + +Many thanks to Jonathan Roy for bug reports and suggestions, to +Michael Schwern for other bug reports and patches, to Mike Cariaso for +helping me to figure out the Right Thing to Do About Expiration, to +Joshua Gerth, Joshua Chamas, Jonathan Roy, Mark D. Anderson, and +Andrew Johnson for more suggestions about expiration, to Ariel +Scolnikov for delightful messages about the Fibonacci function, to +Dion Almaer for thought-provoking suggestions about the default +normalizer, to Walt Mankowski and Kurt Starsinic for much help +investigating problems under threaded Perl, to Alex Dudkevich for +reporting the bug in prototyped functions and for checking my patch, +to Tony Bass for many helpful suggestions, to Philippe Verdret for +enlightening discussion of Hook::PrePostCall, to Nat Torkington for +advice I ignored, to Chris Nandor for portability advice, and to Jenda +Krynicky for being a light in the world. + +=cut + diff --git a/lib/Memoize/SDBM_File.pm b/lib/Memoize/SDBM_File.pm new file mode 100644 index 0000000000..46e550f9a2 --- /dev/null +++ b/lib/Memoize/SDBM_File.pm @@ -0,0 +1,63 @@ +package Memoize::SDBM_File; +use SDBM_File; +@ISA = qw(SDBM_File); + +$Verbose = 0; + +sub AUTOLOAD { + warn "Nonexistent function $AUTOLOAD invoked in Memoize::SDBM_File\n"; +} + +sub import { + warn "Importing Memoize::SDBM_File\n" if $Verbose; +} + + +my %keylist; + +# This is so ridiculous... +sub _backhash { + my $self = shift; + my %fakehash; + my $k; + for ($k = $self->FIRSTKEY(); defined $k; $k = $self->NEXTKEY($k)) { + $fakehash{$k} = undef; + } + $keylist{$self} = \%fakehash; +} + +sub EXISTS { + warn "Memoize::SDBM_File EXISTS (@_)\n" if $Verbose; + my $self = shift; + _backhash($self) unless exists $keylist{$self}; + my $r = exists $keylist{$self}{$_[0]}; + warn "Memoize::SDBM_File EXISTS (@_) ==> $r\n" if $Verbose; + $r; +} + +sub DEFINED { + warn "Memoize::SDBM_File DEFINED (@_)\n" if $Verbose; + my $self = shift; + _backhash($self) unless exists $keylist{$self}; + defined $keylist{$self}{$_[0]}; +} + +sub DESTROY { + warn "Memoize::SDBM_File DESTROY (@_)\n" if $Verbose; + my $self = shift; + delete $keylist{$self}; # So much for reference counting... + $self->SUPER::DESTROY(@_); +} + +# Maybe establish the keylist at TIEHASH time instead? + +sub STORE { + warn "Memoize::SDBM_File STORE (@_)\n" if $VERBOSE; + my $self = shift; + $keylist{$self}{$_[0]} = undef; + $self->SUPER::STORE(@_); +} + +# Inherit FETCH and TIEHASH + +1; diff --git a/lib/Memoize/Saves.pm b/lib/Memoize/Saves.pm new file mode 100644 index 0000000000..8738a810b2 --- /dev/null +++ b/lib/Memoize/Saves.pm @@ -0,0 +1,197 @@ +package Memoize::Saves; + +$DEBUG = 0; + +sub TIEHASH +{ + my ($package, %args) = @_; + my %cache; + + # Convert the CACHE to a referenced hash for quick lookup + # + if( $args{CACHE} ) + { + my %hash; + $args{CACHE} = [ $args{CACHE} ] unless ref $args{CACHE} eq "ARRAY"; + foreach my $value ( @{$args{CACHE}} ) + { + $hash{$value} = 1; + } + $args{CACHE} = \%hash; + } + + # Convert the DUMP list to a referenced hash for quick lookup + # + if( $args{DUMP} ) + { + my %hash; + $args{DUMP} = [ $args{DUMP} ] unless ref $args{DUMP} eq "ARRAY"; + foreach my $value ( @{$args{DUMP}} ) + { + $hash{$value} = 1; + } + $args{DUMP} = \%hash; + } + + if ($args{TIE}) + { + my ($module, @opts) = @{$args{TIE}}; + my $modulefile = $module . '.pm'; + $modulefile =~ s{::}{/}g; + eval { require $modulefile }; + if ($@) { + die "Memoize::Saves: Couldn't load hash tie module `$module': $@; aborting"; + } + my $rc = (tie %cache => $module, @opts); + unless ($rc) { + die "Memoize::Saves: Couldn't tie hash to `$module': $@; aborting"; + } + } + + $args{C} = \%cache; + bless \%args => $package; +} + +sub EXISTS +{ + my $self = shift; + my $key = shift; + + if( exists $self->{C}->{$key} ) + { + return 1; + } + + return 0; +} + + +sub FETCH +{ + my $self = shift; + my $key = shift; + + return $self->{C}->{$key}; +} + +sub STORE +{ + my $self = shift; + my $key = shift; + my $value = shift; + + # If CACHE defined and this is not in our list don't save it + # + if(( defined $self->{CACHE} )&& + ( ! defined $self->{CACHE}->{$value} )) + { + print "$value not in CACHE list.\n" if $DEBUG; + return; + } + + # If DUMP is defined and this is in our list don't save it + # + if(( defined $self->{DUMP} )&& + ( defined $self->{DUMP}->{$value} )) + { + print "$value in DUMP list.\n" if $DEBUG; + return; + } + + # If REGEX is defined we will store it only if its true + # + if(( defined $self->{REGEX} )&& + ( $value !~ /$self->{REGEX}/ )) + { + print "$value did not match regex.\n" if $DEBUG; + return; + } + + # If we get this far we should save the value + # + print "Saving $key:$value\n" if $DEBUG; + $self->{C}->{$key} = $value; +} + +1; + +# Documentation +# + +=head1 NAME + +Memoize::Saves - Plug-in module to specify which return values should be memoized + +=head1 SYNOPSIS + + use Memoize; + + memoize 'function', + SCALAR_CACHE => [TIE, Memoize::Saves, + CACHE => [ "word1", "word2" ], + DUMP => [ "word3", "word4" ], + REGEX => "Regular Expression", + TIE => [Module, args...], + ], + +=head1 DESCRIPTION + +Memoize::Saves is a plug-in module for Memoize. It allows the +user to specify which values should be cached or which should be +dumped. Please read the manual for Memoize for background +information. + +Use the CACHE option to specify a list of return values which should +be memoized. All other values will need to be recomputed each time. + +Use the DUMP option to specify a list of return values which should +not be memoized. Only these values will need to be recomputed each +time. + +Use the REGEX option to specify a Regular Expression which must match +for the return value to be saved. You can supply either a plain text +string or a compiled regular expression using qr//. Obviously the +second method is prefered. + +Specifying multiple options will result in the least common denominator +being saved. + +You can use the TIE option to string multiple Memoize Plug-ins together: + + +memoize ('printme', + SCALAR_CACHE => + [TIE, Memoize::Saves, + REGEX => qr/my/, + TIE => [Memoize::Expire, + LIFETIME => 5, + TIE => [ GDBM_File, $filename, + O_RDWR | O_CREAT, 0666] + ] + ] + ); + + +=head1 CAVEATS + +This module is experimental, and may contain bugs. Please report bugs +to the address below. + +If you are going to use Memoize::Saves with Memoize::Expire it is +import to use it in that order. Memoize::Expire changes the return +value to include expire information and it may no longer match +your CACHE, DUMP, or REGEX. + + +=head1 AUTHOR + +Joshua Gerth <gerth@teleport.com> + +=head1 SEE ALSO + +perl(1) + +The Memoize man page. + + + diff --git a/lib/Memoize/Storable.pm b/lib/Memoize/Storable.pm new file mode 100644 index 0000000000..ff712aecc4 --- /dev/null +++ b/lib/Memoize/Storable.pm @@ -0,0 +1,61 @@ + +package Memoize::Storable; +use Storable (); +$Verbose = 0; + +sub TIEHASH { + require Carp if $Verbose; + my $package = shift; + my $filename = shift; + my $truehash = (-e $filename) ? Storable::retrieve($filename) : {}; + my %options; + print STDERR "Memoize::Storable::TIEHASH($filename, @_)\n" if $Verbose; + @options{@_} = (); + my $self = + {FILENAME => $filename, + H => $truehash, + OPTIONS => \%options + }; + bless $self => $package; +} + +sub STORE { + require Carp if $Verbose; + my $self = shift; + print STDERR "Memoize::Storable::STORE(@_)\n" if $Verbose; + $self->{H}{$_[0]} = $_[1]; +} + +sub FETCH { + require Carp if $Verbose; + my $self = shift; + print STDERR "Memoize::Storable::FETCH(@_)\n" if $Verbose; + $self->{H}{$_[0]}; +} + +sub EXISTS { + require Carp if $Verbose; + my $self = shift; + print STDERR "Memoize::Storable::EXISTS(@_)\n" if $Verbose; + exists $self->{H}{$_[0]}; +} + +sub DESTROY { + require Carp if $Verbose; + my $self= shift; + print STDERR "Memoize::Storable::DESTROY(@_)\n" if $Verbose; + if ($self->{OPTIONS}{'nstore'}) { + Storable::nstore($self->{H}, $self->{FILENAME}); + } else { + Storable::store($self->{H}, $self->{FILENAME}); + } +} + +sub FIRSTKEY { + 'Fake hash from Memoize::Storable'; +} + +sub NEXTKEY { + undef; +} +1; diff --git a/lib/Memoize/TODO b/lib/Memoize/TODO new file mode 100644 index 0000000000..db0843b2a9 --- /dev/null +++ b/lib/Memoize/TODO @@ -0,0 +1,335 @@ +# Version 0.05 alpha $Revision: 1.5 $ $Date: 1999/09/17 14:57:55 $ + +=head1 TO DO + +=over 4 + +=item * + +LIST_CACHE doesn't work with ties to most DBM implementations, because +Memouze tries to save a listref, and DB_File etc. can only store +strings. This should at least be documented. Maybe Memoize could +detect the problem at TIE time and throw a fatal error. + +Try out MLDBM here and document it if it works. + +=item * + +We should extend the benchmarking module to allow + + timethis(main, { MEMOIZED => [ suba, subb ] }) + +What would this do? It would time C<main> three times, once with +C<suba> and C<subb> unmemoized, twice with them memoized. + +Why would you want to do this? By the third set of runs, the memo +tables would be fully populated, so all calls by C<main> to C<suba> +and C<subb> would return immediately. You would be able to see how +much of C<main>'s running time was due to time spent computing in +C<suba> and C<subb>. If that was just a little time, you would know +that optimizing or improving C<suba> and C<subb> would not have a +large effect on the performance of C<main>. But if there was a big +difference, you would know that C<suba> or C<subb> was a good +candidate for optimization if you needed to make C<main> go faster. + +Done. + +=item * + +Perhaps C<memoize> should return a reference to the original function +as well as one to the memoized version? But the programmer could +always construct such a reference themselves, so perhaps it's not +necessary. We save such a reference anyway, so a new package method +could return it on demand even if it wasn't provided by C<memoize>. +We could even bless the new function reference so that it could have +accessor methods for getting to the original function, the options, +the memo table, etc. + +Naah. + +=item * + +The TODISK feature is not ready yet. It will have to be rather +complicated, providing options for which disk method to use (GDBM? +DB_File? Flat file? Storable? User-supplied?) and which stringizing +method to use (FreezeThaw? Marshal? User-supplied?) + +Done! + +=item * + +Maybe an option for automatic expiration of cache values? (`After one +day,' `After five uses,' etc.) Also possibly an option to limit the +number of active entries with automatic LRU expiration. + +You have a long note to Mike Cariaso that outlines a good approach +that you sent on 9 April 1999. + +What's the timeout stuff going to look like? + + EXPIRE_TIME => time_in_sec + EXPIRE_USES => num_uses + MAXENTRIES => n + +perhaps? Is EXPIRE_USES actually useful? + +19990916: Memoize::Expire does EXPIRE_TIME and EXPIRE_USES. +MAXENTRIES can come later as a separate module. + +=item * + +Put in a better example than C<fibo>. Show an example of a +nonrecursive function that simply takes a long time to run. +C<getpwuid> for example? But this exposes the bug that you can't say +C<memoize('getpwuid')>, so perhaps it's not a very good example. + +Well, I did add the ColorToRGB example, but it's still not so good. +These examples need a lot of work. C<factorial> might be a better +example than C<fibo>. + +=item * + +Add more regression tests for normalizers. + +=item * + +Maybe resolve normalizer function to code-ref at memoize time instead +of at function call time for efficiency? I think there was some +reason not to do this, but I can't remember what it was. + +=item * + +Add more array value tests to the test suite. + +Does it need more now? + +=item * + +Fix that `Subroutine u redefined ... line 484' message. + +Fixed, I think. + +=item * + +Get rid of any remaining *{$ref}{CODE} or similar magic hashes. + +=item * + +There should be an option to dump out the memoized values or to +otherwise traverse them. + +What for? + +Maybe the tied hash interface taskes care of this anyway? + +=item * + +Include an example that caches DNS lookups. + +=item * + +Make tie for Storable (Memoize::Storable) + +A prototype of Memoize::Storable is finished. Test it and add to the +test suite. + +Done. + +=item * + +Make tie for DBI (Memoize::DBI) + +=item * + +I think there's a bug. See `###BUG'. + +=item * + +Storable probably can't be done, because it doesn't allow updating. +Maybe a different interface that supports readonly caches fronted by a +writable in-memory cache? A generic tied hash maybe? + + FETCH { + if (it's in the memory hash) { + return it + } elsif (it's in the readonly disk hash) { + return it + } else { + not-there + } + } + + STORE { + put it into the in-memory hash + } + +Maybe `save' and `restore' methods? + +It isn't working right because the destructor doesn't get called at +the right time. + +This is fixed. `use strict vars' would have caught it immediately. Duh. + +=item * + +Don't forget about generic interface to Storable-like packages + +=item * + + +Maybe add in TODISK after all, with TODISK => 'filename' equivalent to + + SCALAR_CACHE => [TIE, Memoize::SDBM_File, $filename, O_RDWR|O_CREAT, 0666], + LIST_CACHE => MERGE + +=item * + +Maybe the default for LIST_CACHE should be MERGE anyway. + +=item * + +There's some terrible bug probably related to use under threaded perl, +possibly connected with line 56: + + my $wrapper = eval "sub { unshift \@_, qq{$cref}; goto &_memoizer; }"; + +I think becayse C<@_> is lexically scoped in threadperl, the effect of +C<unshift> never makes it into C<_memoizer>. That's probably a bug in +Perl, but maybe I should work around it. Can anyone provide more +information here, or lend me a machine with threaded Perl where I can +test this theory? Line 59, currently commented out, may fix the +problem. + +=item * + +Maybe if the original function has a prototype, the module can use +that to select the most appropriate default normalizer. For example, +if the prototype was C<($)>, there's no reason to use `join'. If it's +C<(\@)> then it can use C<join $;,@$_[0];> instead of C<join $;,@_;>. + +=item * + +Ariel Scolnikov suggests using the change counting problem as an +example. (How many ways to make change of a dollar?) + +=item * + +I found a use for `unmemoize'. If you're using the Storable glue, and +your program gets SIGINT, you find that the cache data is not in the +cache, because Perl normally writes it all out at once from a +DESTROY method, and signals skip DESTROY processing. So you could add + + $sig{INT} = sub { unmemoize ... }; + +(Jonathan Roy pointed this out) + +=item * + +This means it would be useful to have a method to return references to +all the currently-memoized functions so that you could say + + $sig{INT} = sub { for $f (Memoize->all_memoized) { + unmemoize $f; + } + } + + +=item * + +19990917 There should be a call you can make to get back the cache +itself. If there were, then you could delete stuff from it to +manually expire data items. + +=item * + +19990925 Randal says that the docs for Memoize;:Expire should make it +clear that the expired entries are never flushed all at once. He +asked if you would need to do that manually. I said: + + Right, if that's what you want. If you have EXISTS return false, + it'll throw away the old cached item and replace it in the cache + with a new item. But if you want the cache to actually get smaller, + you have to do that yourself. + + I was planning to build an Expire module that implemented an LRU + queue and kept the cache at a constant fixed size, but I didn't get + to it yet. It's not clear to me that the automatic exptynig-out + behavior is very useful anyway. The whole point of a cache is to + trade space for time, so why bother going through the cache to throw + away old items before you need to? + +Randal then pointed out that it could discard expired items at DESTRoY +or TIEHASH time, which seemed like a good idea, because if the cache +is on disk you might like to keep it as small as possible. + +=item * + +19991219 Philip Gwyn suggests this technique: You have a load_file +function that memoizes the file contexts. But then if the file +changes you get the old contents. So add a normalizer that does + + return join $;, (stat($_[0])[9]), $_[0]; + +Now when the modification date changes, the true key returned by the +normalizer is different, so you get a cache miss and it loads the new +contents. Disadvantage: The old contents are still in the cache. I +think it makes more sense to have a special expiration manager for +this. Make one up and bundle it. + +19991220 I have one written: Memoize::ExpireFile. But how can you +make this work when the function might have several arguments, of +which some are filenames and some aren't? + +=item * + +19991219 There should be an inheritable TIEHASH method that does the +argument processing properly. + +19991220 Philip Gwyn contributed a patch for this. + +20001231 You should really put this in. Jonathan Roy uncovered a +problem that it will be needed to solve. Here's the problem: He has: + + memoize "get_items", + LIST_CACHE => ["TIE", "Memoize::Expire", + LIFETIME => 86400, + TIE => ["DB_File", "debug.db", O_CREAT|O_RDWR, 0666] + ]; + +This won't work, because memoize is trying to store listrefs in a +DB_File. He owuld have gotten a fatal error if he had done this: + + memoize "get_items", + LIST_CACHE => ["TIE", "DB_File", "debug.db", O_CREAT|O_RDWR, 0666]' + + +But in this case, he tied the cache to Memoize::Expire, which is *not* +scalar-only, and the check for scalar-only ties is missing from +Memoize::Expire. The inheritable method can take care of this. + +=item * + +20001130 Custom cache manager that checks to make sure the function +return values actually match the memoized values. + +=item * + +20001231 Expiration manager that watches cache performance and +accumulates statistics. Variation: Have it automatically unmemoize +the function if performance is bad. + +=item * + +20010517 Option to have normalizer *modify* @_ for use by memoized +function. This would save code and time in cases like the one in the +manual under 'NORMALIZER', where both f() and normalize_f() do the +same analysis and make the same adjustments to the hash. If the +normalizer could make the adjustments and save the changes in @_, you +wouldn't have to do it twice. + +=item * +There was probably some other stuff that I forgot. + + + +=back diff --git a/lib/Memoize/t/array.t b/lib/Memoize/t/array.t new file mode 100755 index 0000000000..b7057ea58a --- /dev/null +++ b/lib/Memoize/t/array.t @@ -0,0 +1,68 @@ +#!/usr/bin/perl + +use lib '..'; +use Memoize; + + +print "1..11\n"; + +sub timelist { + return (time) x $_[0]; +} + +memoize('timelist'); + +@t1 = &timelist(1); +sleep 2; +@u1 = &timelist(1); +print ((("@t1" eq "@u1") ? '' : 'not '), "ok 1\n"); + +@t7 = &timelist(7); +print (((@t7 == 7) ? '' : 'not '), "ok 2\n"); +$BAD = 0; +for ($i = 1; $i < 7; $i++) { + $BAD++ unless $t7[$i-1] == $t7[$i]; +} +print (($BAD ? 'not ' : ''), "ok 3\n"); + +sleep 2; +@u7 = &timelist(7); +print (((@u7 == 7) ? '' : 'not '), "ok 4\n"); +$BAD = 0; +for ($i = 1; $i < 7; $i++) { + $BAD++ unless $u7[$i-1] == $u7[$i]; +} +print (($BAD ? 'not ' : ''), "ok 5\n"); +# Properly memoized? +print ((("@t7" eq "@u7") ? '' : 'not '), "ok 6\n"); + +sub con { + return wantarray() +} + +# Same arguments yield different results in different contexts? +memoize('con'); +$s = con(1); +@a = con(1); +print ((($s == $a[0]) ? 'not ' : ''), "ok 7\n"); + +# Context propagated correctly? +print ((($s eq '') ? '' : 'not '), "ok 8\n"); # Scalar context +print ((("@a" eq '1' && @a == 1) ? '' : 'not '), "ok 9\n"); # List context + +# Context propagated correctly to normalizer? +sub n { + my $arg = shift; + my $test = shift; + if (wantarray) { + print ((($arg eq ARRAY) ? '' : 'not '), "ok $test\n"); # List context + } else { + print ((($arg eq SCALAR) ? '' : 'not '), "ok $test\n"); # Scalar context + } +} + +sub f { 1 } +memoize('f', NORMALIZER => 'n'); +$s = f('SCALAR', 10); # Test 10 +@a = f('ARRAY' , 11); # Test 11 + diff --git a/lib/Memoize/t/correctness.t b/lib/Memoize/t/correctness.t new file mode 100755 index 0000000000..ae56787255 --- /dev/null +++ b/lib/Memoize/t/correctness.t @@ -0,0 +1,129 @@ +#!/usr/bin/perl + +use lib '..'; +use Memoize; + +print "1..25\n"; + +print "# Basic\n"; + +# A function that should only be called once. +{ my $COUNT = 0; + sub no_args { + $FAIL++ if $COUNT++; + 11; + } +} + +# +memoize('no_args'); + +$c1 = &no_args(); +print (($c1 == 11) ? "ok 1\n" : "not ok 1\n"); +$c2 = &no_args(); +print (($c2 == 11) ? "ok 2\n" : "not ok 2\n"); +print $FAIL ? "not ok 3\n" : "ok 3\n"; # Was it really memoized? + +$FAIL = 0; +$f = do { my $COUNT = 0; sub { $FAIL++ if $COUNT++; 12 } }; +$fm = memoize($f); + +$c1 = &$fm(); +print (($c1 == 12) ? "ok 4\n" : "not ok 4\n"); +$c2 = &$fm(); +print (($c2 == 12) ? "ok 5\n" : "not ok 5\n"); +print $FAIL ? "not ok 6\n" : "ok 6\n"; # Was it really memoized? + +$f = do { my $COUNT = 0; sub { $FAIL++ if $COUNT++; 13 } }; +$fm = memoize($f, INSTALL => 'another'); + +$c1 = &another(); # Was it really installed? +print (($c1 == 13) ? "ok 7\n" : "not ok 7\n"); +$c2 = &another(); +print (($c2 == 13) ? "ok 8\n" : "not ok 8\n"); +print $FAIL ? "not ok 9\n" : "ok 9\n"; # Was it really memoized? +$c3 = &$fm(); # Call memoized version through returned ref +print (($c3 == 13) ? "ok 10\n" : "not ok 10\n"); +print $FAIL ? "not ok 11\n" : "ok 11\n"; # Was it really memoized? +$c4 = &$f(); # Call original version again +print (($c4 == 13) ? "ok 12\n" : "not ok 12\n"); +print $FAIL ? "ok 13\n" : "not ok 13\n"; # Did we get the original? + +print "# Fibonacci\n"; + +sub mt1 { # Fibonacci + my $n = shift; + return $n if $n < 2; + mt1($n-1) + mt2($n-2); +} +sub mt2 { + my $n = shift; + return $n if $n < 2; + mt1($n-1) + mt2($n-2); +} + +@f1 = map { mt1($_) } (0 .. 15); +@f2 = map { mt2($_) } (0 .. 15); +memoize('mt1'); +@f3 = map { mt1($_) } (0 .. 15); +@f4 = map { mt1($_) } (0 .. 15); +@arrays = (\@f1, \@f2, \@f3, \@f4); +$n = 13; +for ($i=0; $i<3; $i++) { + for ($j=$i+1; $j<3; $j++) { + $n++; + print ((@{$arrays[$i]} == @{$arrays[$j]}) ? "ok $n\n" : "not ok $n\n"); + $n++; + for ($k=0; $k < @{$arrays[$i]}; $k++) { + (print "not ok $n\n", next) if $arrays[$i][$k] != $arrays[$j][$k]; + } + print "ok $n\n"; + } +} + + + +print "# Normalizers\n"; + +sub fake_normalize { + return ''; +} + +sub f1 { + return shift; +} +sub f2 { + return shift; +} +sub f3 { + return shift; +} +&memoize('f1'); +&memoize('f2', NORMALIZER => 'fake_normalize'); +&memoize('f3', NORMALIZER => \&fake_normalize); +@f1r = map { f1($_) } (1 .. 10); +@f2r = map { f2($_) } (1 .. 10); +@f3r = map { f3($_) } (1 .. 10); +$n++; +print (("@f1r" eq "1 2 3 4 5 6 7 8 9 10") ? "ok $n\n" : "not ok $n\n"); +$n++; +print (("@f2r" eq "1 1 1 1 1 1 1 1 1 1") ? "ok $n\n" : "not ok $n\n"); +$n++; +print (("@f3r" eq "1 1 1 1 1 1 1 1 1 1") ? "ok $n\n" : "not ok $n\n"); + +print "# INSTALL => undef option.\n"; +{ my $i = 1; + sub u1 { $i++ } +} +my $um = memoize('u1', INSTALL => undef); +@umr = (&$um, &$um, &$um); +@u1r = (&u1, &u1, &u1 ); # Did *not* clobber &u1 +$n++; +print (("@umr" eq "1 1 1") ? "ok $n\n" : "not ok $n\n"); # Increment once +$n++; +print (("@u1r" eq "2 3 4") ? "ok $n\n" : "not ok $n\n"); # Increment thrice +$n++; +print ((defined &{"undef"}) ? "not ok $n\n" : "ok $n\n"); # Just in case + +print "# $n tests in all.\n"; + diff --git a/lib/Memoize/t/errors.t b/lib/Memoize/t/errors.t new file mode 100755 index 0000000000..4c74954855 --- /dev/null +++ b/lib/Memoize/t/errors.t @@ -0,0 +1,37 @@ +#!/usr/bin/perl + +use lib '..'; +use Memoize; +use Config; + +print "1..11\n"; + +eval { memoize({}) }; +print $@ ? "ok 1\n" : "not ok 1 # $@\n"; + +eval { memoize([]) }; +print $@ ? "ok 2\n" : "not ok 2 # $@\n"; + +eval { my $x; memoize(\$x) }; +print $@ ? "ok 3\n" : "not ok 3 # $@\n"; + +# 4--8 +$n = 4; +for $mod (qw(DB_File GDBM_File SDBM_File ODBM_File NDBM_File)) { + eval { memoize(sub {}, LIST_CACHE => ['TIE', $mod]) }; + print $@ ? "ok $n\n" : "not ok $n # $@\n"; + $n++; +} + +# 9 +eval { memoize(sub {}, LIST_CACHE => ['TIE', WuggaWugga]) }; +print $@ ? "ok 9\n" : "not ok 9 # $@\n"; + +# 10 +eval { memoize(sub {}, LIST_CACHE => 'YOB GORGLE') }; +print $@ ? "ok 10\n" : "not ok 10 # $@\n"; + +# 11 +eval { memoize(sub {}, SCALAR_CACHE => ['YOB GORGLE']) }; +print $@ ? "ok 11\n" : "not ok 11 # $@\n"; + diff --git a/lib/Memoize/t/expire.t b/lib/Memoize/t/expire.t new file mode 100644 index 0000000000..28cf559391 --- /dev/null +++ b/lib/Memoize/t/expire.t @@ -0,0 +1,70 @@ +#!/usr/bin/perl + +use lib '..'; +use Memoize; +use Memoize::ExpireTest; + +my $n = 0; + +print "1..17\n"; + +$n++; print "ok $n\n"; + +my %CALLS; +sub id { + my($arg) = @_; + ++$CALLS{$arg}; + $arg; +} + +memoize 'id', SCALAR_CACHE => ['TIE', 'Memoize::ExpireTest'], + LIST_CACHE => 'FAULT'; +$n++; print "ok $n\n"; + +for $i (1, 2, 3, 1, 2, 1) { + $n++; + unless ($i == id($i)) { + print "not "; + } + print "ok $n\n"; +} + +for $i (1, 2, 3) { + $n++; + unless ($CALLS{$i} == 1) { + print "not "; + } + print "ok $n\n"; +} + +Memoize::ExpireTest::expire(1); + +for $i (1, 2, 3) { + my $v = id($i); +} + +for $i (1, 2, 3) { + $n++; + unless ($CALLS{$i} == 1 + ($i == 1)) { + print "not "; + } + print "ok $n\n"; +} + +Memoize::ExpireTest::expire(1); +Memoize::ExpireTest::expire(2); + +for $i (1, 2, 3) { + my $v = id($i); +} + +for $i (1, 2, 3) { + $n++; + unless ($CALLS{$i} == 4 - $i) { + print "not "; + } + print "ok $n\n"; +} + +exit 0; + diff --git a/lib/Memoize/t/expire_file.t b/lib/Memoize/t/expire_file.t new file mode 100644 index 0000000000..c6abb507ea --- /dev/null +++ b/lib/Memoize/t/expire_file.t @@ -0,0 +1,64 @@ +#!/usr/bin/perl + +use lib '..'; +use Memoize; + +my $n = 0; + + +if (-e '.fast') { + print "1..0\n"; + exit 0; +} + +print "1..11\n"; + +++$n; print "ok $n\n"; + +my $READFILE_CALLS = 0; +my $FILE = './TESTFILE'; + +sub writefile { + my $FILE = shift; + open F, "> $FILE" or die "Couldn't write temporary file $FILE: $!"; + print F scalar(localtime), "\n"; + close F; +} + +sub readfile { + $READFILE_CALLS++; + my $FILE = shift; + open F, "< $FILE" or die "Couldn't write temporary file $FILE: $!"; + my $data = <F>; + close F; + $data; +} + +memoize 'readfile', + SCALAR_CACHE => ['TIE', 'Memoize::ExpireFile', ], + LIST_CACHE => 'FAULT' + ; + +++$n; print "ok $n\n"; + +writefile($FILE); +++$n; print "ok $n\n"; +sleep 1; + +my $t1 = readfile($FILE); +++$n; print "ok $n\n"; +++$n; print ((($READFILE_CALLS == 1) ? '' : 'not '), "ok $n\n"); + +my $t2 = readfile($FILE); +++$n; print "ok $n\n"; +++$n; print ((($READFILE_CALLS == 1) ? '' : 'not '), "ok $n\n"); +++$n; print ((($t1 eq $t2) ? '' : 'not '), "ok $n\n"); + +sleep 2; +writefile($FILE); +my $t3 = readfile($FILE); +++$n; print "ok $n\n"; +++$n; print ((($READFILE_CALLS == 2) ? '' : 'not '), "ok $n\n"); +++$n; print ((($t1 ne $t3) ? '' : 'not '), "ok $n\n"); + +END { 1 while unlink 'TESTFILE' } diff --git a/lib/Memoize/t/expire_module_n.t b/lib/Memoize/t/expire_module_n.t new file mode 100644 index 0000000000..b6b4521f7b --- /dev/null +++ b/lib/Memoize/t/expire_module_n.t @@ -0,0 +1,60 @@ +#!/usr/bin/perl + +use lib '..'; +use Memoize; + +my $n = 0; + + +print "1..21\n"; + +++$n; print "ok $n\n"; + +$RETURN = 1; + +%CALLS = (); +sub call { +# print "CALL $_[0] => $RETURN\n"; + ++$CALLS{$_[0]}; + $RETURN; +} + +memoize 'call', + SCALAR_CACHE => ['TIE', 'Memoize::Expire', NUM_USES => 2], + LIST_CACHE => 'FAULT'; + +# $Memoize::Expire::DEBUG = 1; +++$n; print "ok $n\n"; + +# 3--6 +for (0,1,2,3) { + print "not " unless call($_) == 1; + ++$n; print "ok $n\n"; +} + +# 7--10 +for (keys %CALLS) { + print "not " unless $CALLS{$_} == (1,1,1,1)[$_]; + ++$n; print "ok $n\n"; +} + +# 11--13 +$RETURN = 2; +++$n; print ((call(1) == 1 ? '' : 'not '), "ok $n\n"); # 1 expires +++$n; print ((call(1) == 2 ? '' : 'not '), "ok $n\n"); # 1 gets new val +++$n; print ((call(2) == 1 ? '' : 'not '), "ok $n\n"); # 2 expires + +# 14--17 +$RETURN = 3; +for (0,1,2,3) { + # 0 expires, 1 expires, 2 gets new val, 3 expires + print "not " unless call($_) == (1,2,3,1)[$_]; + ++$n; print "ok $n\n"; +} + +for (0,1,2,3) { + print "not " unless $CALLS{$_} == (1,2,2,1)[$_]; + ++$n; print "ok $n\n"; +} + + diff --git a/lib/Memoize/t/expire_module_t.t b/lib/Memoize/t/expire_module_t.t new file mode 100644 index 0000000000..22d64e82d5 --- /dev/null +++ b/lib/Memoize/t/expire_module_t.t @@ -0,0 +1,84 @@ +#!/usr/bin/perl + +use lib '..'; +use Memoize; + +my $n = 0; + +if (-e '.fast') { + print "1..0\n"; + exit 0; +} + +print "# Warning: I'm testing the timed expiration policy.\nThis will take about thirty seconds.\n"; + +print "1..14\n"; + +++$n; print "ok $n\n"; + +sub close_enough { +# print "Close enough? @_[0,1]\n"; + abs($_[0] - $_[1]) <= 1; +} + +sub now { +# print "NOW: @_ ", time(), "\n"; + time; +} + +memoize 'now', + SCALAR_CACHE => ['TIE', 'Memoize::Expire', LIFETIME => 15], + LIST_CACHE => 'FAULT' + ; + +++$n; print "ok $n\n"; + + +# T +for (1,2,3) { + $when{$_} = now($_); + ++$n; + print "not " unless $when{$_} == time; + print "ok $n\n"; + sleep 5 if $_ < 3; +} + +# T+10 +for (1,2,3) { + $again{$_} = now($_); # Should be the sameas before, because of memoization +} + +# T+10 +foreach (1,2,3) { + ++$n; + print "not " unless $when{$_} == $again{$_}; + print "ok $n\n"; +} + +sleep 6; # now(1) expires + +# T+16 +print "not " unless close_enough(time, $again{1} = now(1)); +++$n; print "ok $n\n"; + +# T+16 +foreach (2,3) { # Have not expired yet. + ++$n; + print "not " unless now($_) == $again{$_}; + print "ok $n\n"; +} + +sleep 6; # now(2) expires + +# T+22 +print "not " unless close_enough(time, $again{2} = now(2)); +++$n; print "ok $n\n"; + +# T+22 +foreach (1,3) { + ++$n; + print "not " unless now($_) == $again{$_}; + print "ok $n\n"; +} + + diff --git a/lib/Memoize/t/flush.t b/lib/Memoize/t/flush.t new file mode 100644 index 0000000000..bf9262ec7c --- /dev/null +++ b/lib/Memoize/t/flush.t @@ -0,0 +1,42 @@ +#!/usr/bin/perl + +use lib '..'; +use Memoize 'flush_cache', 'memoize'; +print "1..8\n"; +print "ok 1\n"; + + + +my $V = 100; +sub VAL { $V } + +memoize 'VAL'; +print "ok 2\n"; + +my $c1 = VAL(); +print (($c1 == 100) ? "ok 3\n" : "not ok 3\n"); + +$V = 200; +$c1 = VAL(); +print (($c1 == 100) ? "ok 4\n" : "not ok 4\n"); + +flush_cache('VAL'); +$c1 = VAL(); +print (($c1 == 200) ? "ok 5\n" : "not ok 5\n"); + +$V = 300; +$c1 = VAL(); +print (($c1 == 200) ? "ok 6\n" : "not ok 6\n"); + +flush_cache(\&VAL); +$c1 = VAL(); +print (($c1 == 300) ? "ok 7\n" : "not ok 7\n"); + +$V = 400; +$c1 = VAL(); +print (($c1 == 300) ? "ok 8\n" : "not ok 8\n"); + + + + + diff --git a/lib/Memoize/t/normalize.t b/lib/Memoize/t/normalize.t new file mode 100755 index 0000000000..a920ff4b30 --- /dev/null +++ b/lib/Memoize/t/normalize.t @@ -0,0 +1,57 @@ +#!/usr/bin/perl + +use lib '..'; +use Memoize; + +print "1..7\n"; + + +sub n_null { '' } + +{ my $I = 0; + sub n_diff { $I++ } +} + +{ my $I = 0; + sub a1 { $I++; "$_[0]-$I" } + my $J = 0; + sub a2 { $J++; "$_[0]-$J" } + my $K = 0; + sub a3 { $K++; "$_[0]-$K" } +} + +my $a_normal = memoize('a1', INSTALL => undef); +my $a_nomemo = memoize('a2', INSTALL => undef, NORMALIZER => 'n_diff'); +my $a_allmemo = memoize('a3', INSTALL => undef, NORMALIZER => 'n_null'); + +@ARGS = (1, 2, 3, 2, 1); + +@res = map { &$a_normal($_) } @ARGS; +print ((("@res" eq "1-1 2-2 3-3 2-2 1-1") ? '' : 'not '), "ok 1\n"); + +@res = map { &$a_nomemo($_) } @ARGS; +print ((("@res" eq "1-1 2-2 3-3 2-4 1-5") ? '' : 'not '), "ok 2\n"); + +@res = map { &$a_allmemo($_) } @ARGS; +print ((("@res" eq "1-1 1-1 1-1 1-1 1-1") ? '' : 'not '), "ok 3\n"); + + + +# Test fully-qualified name and installation +$COUNT = 0; +sub parity { $COUNT++; $_[0] % 2 } +sub parnorm { $_[0] % 2 } +memoize('parity', NORMALIZER => 'main::parnorm'); +@res = map { &parity($_) } @ARGS; +print ((("@res" eq "1 0 1 0 1") ? '' : 'not '), "ok 4\n"); +print (( ($COUNT == 2) ? '' : 'not '), "ok 5\n"); + +# Test normalization with reference to normalizer function +$COUNT = 0; +sub par2 { $COUNT++; $_[0] % 2 } +memoize('par2', NORMALIZER => \&parnorm); +@res = map { &par2($_) } @ARGS; +print ((("@res" eq "1 0 1 0 1") ? '' : 'not '), "ok 6\n"); +print (( ($COUNT == 2) ? '' : 'not '), "ok 7\n"); + + diff --git a/lib/Memoize/t/prototype.t b/lib/Memoize/t/prototype.t new file mode 100644 index 0000000000..f3859e329d --- /dev/null +++ b/lib/Memoize/t/prototype.t @@ -0,0 +1,36 @@ +#!/usr/bin/perl + +use lib '..'; +use Memoize; +$EXPECTED_WARNING = '(no warning expected)'; + + +print "1..4\n"; + +sub q1 ($) { $_[0] + 1 } +sub q2 () { time } +sub q3 { join "--", @_ } + +$SIG{__WARN__} = \&handle_warnings; + +$RES = 'ok'; +memoize 'q1'; +print "$RES 1\n"; + +$RES = 'ok'; +memoize 'q2'; +print "$RES 2\n"; + +$RES = 'ok'; +memoize 'q3'; +print "$RES 3\n"; + +# Let's see if the prototype is actually honored +@q = (1..5); +$r = q1(@q); +print (($r == 6) ? '' : 'not ', "ok 4\n"); + +sub handle_warnings { + print $_[0]; + $RES = 'not ok' unless $_[0] eq $EXPECTED_WARNING; +} diff --git a/lib/Memoize/t/speed.t b/lib/Memoize/t/speed.t new file mode 100755 index 0000000000..d887aae60c --- /dev/null +++ b/lib/Memoize/t/speed.t @@ -0,0 +1,59 @@ +#!/usr/bin/perl + +use lib '..'; +use Memoize; + +if (-e '.fast') { + print "1..0\n"; + exit 0; +} + +print "# Warning: I'm testing the speedup. This might take up to sixty seconds.\n"; + +print "1..6\n"; + +sub fib { + my $n = shift; + $COUNT++; + return $n if $n < 2; + fib($n-1) + fib($n-2); +} + +$N = 0; + +$ELAPSED = 0; +until ($ELAPSED > 10) { + $N++; + my $start = time; + $COUNT=0; + $RESULT = fib($N); + $ELAPSED = time - $start; + print "# fib($N) took $ELAPSED seconds.\n" if $N % 1 == 0; +} + +print "# OK, fib($N) was slow enough; it took $ELAPSED seconds.\n"; + + +&memoize('fib'); + +$COUNT=0; +$start = time; +$RESULT2 = fib($N); +$ELAPSED2 = time - $start + .001; # prevent division by 0 errors + +print (($RESULT == $RESULT2) ? "ok 1\n" : "not ok 1\n"); +# If it's not ten times as fast, something is seriously wrong. +print (($ELAPSED/$ELAPSED2 > 10) ? "ok 2\n" : "not ok 2\n"); +# If it called the function more than $N times, it wasn't memoized properly +print (($COUNT > $N) ? "ok 3\n" : "not ok 3\n"); + +# Do it again. Should be even faster this time. +$start = time; +$RESULT2 = fib($N); +$ELAPSED2 = time - $start + .001; # prevent division by 0 errors + + +print (($RESULT == $RESULT2) ? "ok 4\n" : "not ok 4\n"); +print (($ELAPSED/$ELAPSED2 > 10) ? "ok 5\n" : "not ok 5\n"); +# This time it shouldn't have called the function at all. +print ($COUNT ? "ok 6\n" : "not ok 6\n"); diff --git a/lib/Memoize/t/tie.t b/lib/Memoize/t/tie.t new file mode 100755 index 0000000000..098fb050b5 --- /dev/null +++ b/lib/Memoize/t/tie.t @@ -0,0 +1,83 @@ +#!/usr/bin/perl + +use lib qw(. ..); +use Memoize 0.52 qw(memoize unmemoize); +use Fcntl; +use Memoize::AnyDBM_File; + +print "1..4\n"; + +sub i { + $_[0]; +} + +$ARG = 'Keith Bostic is a pinhead'; + +sub c119 { 119 } +sub c7 { 7 } +sub c43 { 43 } +sub c23 { 23 } +sub c5 { 5 } + +sub n { + $_[0]+1; +} + +$tmpdir = $ENV{TMP} || $ENV{TMPDIR} || '/tmp'; +if (eval {require File::Spec::Functions}) { + File::Spec::Functions->import(); +} else { + *catfile = sub { join '/', @_ }; +} +$file = catfile($tmpdir, "md$$"); +@files = ($file, "$file.db", "$file.dir", "$file.pag"); +{ + my @present = grep -e, @files; + if (@present && (@failed = grep { not unlink } @present)) { + warn "Can't unlink @failed! ($!)"; + } +} + + +tryout('Memoize::AnyDBM_File', $file, 1); # Test 1..4 +# tryout('DB_File', $file, 1); # Test 1..4 +unlink $file, "$file.dir", "$file.pag"; + +sub tryout { + my ($tiepack, $file, $testno) = @_; + + + memoize 'c5', + SCALAR_CACHE => ['TIE', $tiepack, $file, O_RDWR | O_CREAT, 0666], + LIST_CACHE => 'FAULT' + ; + + my $t1 = c5($ARG); + my $t2 = c5($ARG); + print (($t1 == 5) ? "ok $testno\n" : "not ok $testno\n"); + $testno++; + print (($t2 == 5) ? "ok $testno\n" : "not ok $testno\n"); + unmemoize 'c5'; + + # Now something tricky---we'll memoize c23 with the wrong table that + # has the 5 already cached. + memoize 'c23', + SCALAR_CACHE => ['TIE', $tiepack, $file, O_RDWR, 0666], + LIST_CACHE => 'FAULT' + ; + + my $t3 = c23($ARG); + my $t4 = c23($ARG); + $testno++; + print (($t3 == 5) ? "ok $testno\n" : "not ok $testno # Result $t3\n"); + $testno++; + print (($t4 == 5) ? "ok $testno\n" : "not ok $testno # Result $t4\n"); + unmemoize 'c23'; +} + +{ + my @present = grep -e, @files; + if (@present && (@failed = grep { not unlink } @present)) { + warn "Can't unlink @failed! ($!)"; + } +} diff --git a/lib/Memoize/t/tie_gdbm.t b/lib/Memoize/t/tie_gdbm.t new file mode 100755 index 0000000000..cd3915459c --- /dev/null +++ b/lib/Memoize/t/tie_gdbm.t @@ -0,0 +1,71 @@ +#!/usr/bin/perl + +use lib qw(. ..); +use Memoize 0.45 qw(memoize unmemoize); +use Fcntl; + +sub i { + $_[0]; +} + +sub c119 { 119 } +sub c7 { 7 } +sub c43 { 43 } +sub c23 { 23 } +sub c5 { 5 } + +sub n { + $_[0]+1; +} + +eval {require GDBM_File}; +if ($@) { + print "1..0\n"; + exit 0; +} + +print "1..4\n"; + +if (eval {require File::Spec::Functions}) { + File::Spec::Functions->import(); +} else { + *catfile = sub { join '/', @_ }; +} +$tmpdir = $ENV{TMP} || $ENV{TMPDIR} || '/tmp'; +$file = catfile($tmpdir, "md$$"); +unlink $file, "$file.dir", "$file.pag"; +tryout('GDBM_File', $file, 1); # Test 1..4 +unlink $file, "$file.dir", "$file.pag"; + +sub tryout { + my ($tiepack, $file, $testno) = @_; + + + memoize 'c5', + SCALAR_CACHE => ['TIE', $tiepack, $file, O_RDWR | O_CREAT, 0666], + LIST_CACHE => 'FAULT' + ; + + my $t1 = c5(); + my $t2 = c5(); + print (($t1 == 5) ? "ok $testno\n" : "not ok $testno\n"); + $testno++; + print (($t2 == 5) ? "ok $testno\n" : "not ok $testno\n"); + unmemoize 'c5'; + + # Now something tricky---we'll memoize c23 with the wrong table that + # has the 5 already cached. + memoize 'c23', + SCALAR_CACHE => ['TIE', $tiepack, $file, O_RDWR, 0666], + LIST_CACHE => 'FAULT' + ; + + my $t3 = c23(); + my $t4 = c23(); + $testno++; + print (($t3 == 5) ? "ok $testno\n" : "not ok $testno\n"); + $testno++; + print (($t4 == 5) ? "ok $testno\n" : "not ok $testno\n"); + unmemoize 'c23'; +} + diff --git a/lib/Memoize/t/tie_ndbm.t b/lib/Memoize/t/tie_ndbm.t new file mode 100644 index 0000000000..dfbd0f5858 --- /dev/null +++ b/lib/Memoize/t/tie_ndbm.t @@ -0,0 +1,74 @@ +#!/usr/bin/perl + +use lib qw(. ..); +use Memoize 0.45 qw(memoize unmemoize); +use Fcntl; +# use Memoize::NDBM_File; +# $Memoize::NDBM_File::Verbose = 0; + +sub i { + $_[0]; +} + +sub c119 { 119 } +sub c7 { 7 } +sub c43 { 43 } +sub c23 { 23 } +sub c5 { 5 } + +sub n { + $_[0]+1; +} + +eval {require Memoize::NDBM_File}; +if ($@) { + print "1..0\n"; + exit 0; +} + +print "1..4\n"; + + +if (eval {require File::Spec::Functions}) { + File::Spec::Functions->import(); +} else { + *catfile = sub { join '/', @_ }; +} +$tmpdir = $ENV{TMP} || $ENV{TMPDIR} || '/tmp'; +$file = catfile($tmpdir, "md$$"); +unlink $file, "$file.dir", "$file.pag"; +tryout('Memoize::NDBM_File', $file, 1); # Test 1..4 +unlink $file, "$file.dir", "$file.pag"; + +sub tryout { + my ($tiepack, $file, $testno) = @_; + + + memoize 'c5', + SCALAR_CACHE => ['TIE', $tiepack, $file, O_RDWR | O_CREAT, 0666], + LIST_CACHE => 'FAULT' + ; + + my $t1 = c5(); + my $t2 = c5(); + print (($t1 == 5) ? "ok $testno\n" : "not ok $testno\n"); + $testno++; + print (($t2 == 5) ? "ok $testno\n" : "not ok $testno\n"); + unmemoize 'c5'; + + # Now something tricky---we'll memoize c23 with the wrong table that + # has the 5 already cached. + memoize 'c23', + SCALAR_CACHE => ['TIE', $tiepack, $file, O_RDWR, 0666], + LIST_CACHE => 'FAULT' + ; + + my $t3 = c23(); + my $t4 = c23(); + $testno++; + print (($t3 == 5) ? "ok $testno\n" : "not ok $testno\n"); + $testno++; + print (($t4 == 5) ? "ok $testno\n" : "not ok $testno\n"); + unmemoize 'c23'; +} + diff --git a/lib/Memoize/t/tie_sdbm.t b/lib/Memoize/t/tie_sdbm.t new file mode 100644 index 0000000000..c628d98c97 --- /dev/null +++ b/lib/Memoize/t/tie_sdbm.t @@ -0,0 +1,73 @@ +#!/usr/bin/perl + +use lib qw(. ..); +use Memoize 0.45 qw(memoize unmemoize); +use Fcntl; +# use Memoize::GDBM_File; +# $Memoize::GDBM_File::Verbose = 0; + +sub i { + $_[0]; +} + +sub c119 { 119 } +sub c7 { 7 } +sub c43 { 43 } +sub c23 { 23 } +sub c5 { 5 } + +sub n { + $_[0]+1; +} + +eval {require GDBM_File}; +if ($@) { + print "1..0\n"; + exit 0; +} + +print "1..4\n"; + +if (eval {require File::Spec::Functions}) { + File::Spec::Functions->import(); +} else { + *catfile = sub { join '/', @_ }; +} +$tmpdir = $ENV{TMP} || $ENV{TMPDIR} || '/tmp'; +$file = catfile($tmpdir, "md$$"); +unlink $file, "$file.dir", "$file.pag"; +tryout('GDBM_File', $file, 1); # Test 1..4 +unlink $file, "$file.dir", "$file.pag"; + +sub tryout { + my ($tiepack, $file, $testno) = @_; + + + memoize 'c5', + SCALAR_CACHE => ['TIE', $tiepack, $file, O_RDWR | O_CREAT, 0666], + LIST_CACHE => 'FAULT' + ; + + my $t1 = c5(); + my $t2 = c5(); + print (($t1 == 5) ? "ok $testno\n" : "not ok $testno\n"); + $testno++; + print (($t2 == 5) ? "ok $testno\n" : "not ok $testno\n"); + unmemoize 'c5'; + + # Now something tricky---we'll memoize c23 with the wrong table that + # has the 5 already cached. + memoize 'c23', + SCALAR_CACHE => ['TIE', $tiepack, $file, O_RDWR, 0666], + LIST_CACHE => 'FAULT' + ; + + my $t3 = c23(); + my $t4 = c23(); + $testno++; + print (($t3 == 5) ? "ok $testno\n" : "not ok $testno\n"); + $testno++; + print (($t4 == 5) ? "ok $testno\n" : "not ok $testno\n"); + unmemoize 'c23'; +} + diff --git a/lib/Memoize/t/tie_storable.t b/lib/Memoize/t/tie_storable.t new file mode 100644 index 0000000000..2dd77d0b4f --- /dev/null +++ b/lib/Memoize/t/tie_storable.t @@ -0,0 +1,76 @@ +#!/usr/bin/perl +# -*- mode: perl; perl-indent-level: 2 -*- + +use lib qw(. ..); +use Memoize 0.45 qw(memoize unmemoize); +# use Memoize::Storable; +# $Memoize::Storable::Verbose = 0; + +sub i { + $_[0]; +} + +sub c119 { 119 } +sub c7 { 7 } +sub c43 { 43 } +sub c23 { 23 } +sub c5 { 5 } + +sub n { + $_[0]+1; +} + +eval {require Storable}; +if ($@) { + print "1..0\n"; + exit 0; +} + +print "1..4\n"; + + +if (eval {require File::Spec::Functions}) { + File::Spec::Functions->import(); +} else { + *catfile = sub { join '/', @_ }; +} +$tmpdir = $ENV{TMP} || $ENV{TMPDIR} || '/tmp'; +$file = catfile($tmpdir, "storable$$"); +unlink $file; +tryout('Memoize::Storable', $file, 1); # Test 1..4 +unlink $file; + +sub tryout { + my ($tiepack, $file, $testno) = @_; + + + memoize 'c5', + SCALAR_CACHE => ['TIE', $tiepack, $file], + LIST_CACHE => 'FAULT' + ; + + my $t1 = c5(); + my $t2 = c5(); + print (($t1 == 5) ? "ok $testno\n" : "not ok $testno\n"); + $testno++; + print (($t2 == 5) ? "ok $testno\n" : "not ok $testno\n"); + unmemoize 'c5'; + 1; + 1; + + # Now something tricky---we'll memoize c23 with the wrong table that + # has the 5 already cached. + memoize 'c23', + SCALAR_CACHE => ['TIE', $tiepack, $file], + LIST_CACHE => 'FAULT' + ; + + my $t3 = c23(); + my $t4 = c23(); + $testno++; + print (($t3 == 5) ? "ok $testno\n" : "not ok $testno\n"); + $testno++; + print (($t4 == 5) ? "ok $testno\n" : "not ok $testno\n"); + unmemoize 'c23'; +} + diff --git a/lib/Memoize/t/tiefeatures.t b/lib/Memoize/t/tiefeatures.t new file mode 100755 index 0000000000..7306d9f4f8 --- /dev/null +++ b/lib/Memoize/t/tiefeatures.t @@ -0,0 +1,50 @@ +#!/usr/bin/perl + +use lib 'blib/lib'; +use Memoize 0.45 qw(memoize unmemoize); +use Fcntl; + +# print STDERR $INC{'Memoize.pm'}, "\n"; + +print "1..10\n"; + +# Test MERGE +sub xx { + wantarray(); +} + +my $s = xx(); +print ((!$s) ? "ok 1\n" : "not ok 1\n"); +my ($a) = xx(); +print (($a) ? "ok 2\n" : "not ok 2\n"); +memoize 'xx', LIST_CACHE => MERGE; +$s = xx(); +print ((!$s) ? "ok 3\n" : "not ok 3\n"); +($a) = xx(); # Should return cached false value from previous invocation +print ((!$a) ? "ok 4\n" : "not ok 4\n"); + + +# Test FAULT +sub ns {} +sub na {} +memoize 'ns', SCALAR_CACHE => FAULT; +memoize 'na', LIST_CACHE => FAULT; +eval { my $s = ns() }; # Should fault +print (($@) ? "ok 5\n" : "not ok 5\n"); +eval { my ($a) = na() }; # Should fault +print (($@) ? "ok 6\n" : "not ok 6\n"); + + +# Test HASH +my (%s, %l); +sub nul {} +memoize 'nul', SCALAR_CACHE => [HASH => \%s], LIST_CACHE => [HASH => \%l]; +nul('x'); +nul('y'); +print ((join '', sort keys %s) eq 'xy' ? "ok 7\n" : "not ok 7\n"); +print ((join '', sort keys %l) eq '' ? "ok 8\n" : "not ok 8\n"); +() = nul('p'); +() = nul('q'); +print ((join '', sort keys %s) eq 'xy' ? "ok 9\n" : "not ok 9\n"); +print ((join '', sort keys %l) eq 'pq' ? "ok 10\n" : "not ok 10\n"); + diff --git a/lib/Memoize/t/unmemoize.t b/lib/Memoize/t/unmemoize.t new file mode 100755 index 0000000000..82b318c645 --- /dev/null +++ b/lib/Memoize/t/unmemoize.t @@ -0,0 +1,26 @@ +#!/usr/bin/perl + +use lib '..'; +use Memoize qw(memoize unmemoize); + +print "1..5\n"; + +eval { unmemoize('f') }; # Should fail +print (($@ ? '' : 'not '), "ok 1\n"); + +{ my $I = 0; + sub u { $I++ } +} +memoize('u'); +my @ur = (&u, &u, &u); +print (("@ur" eq "0 0 0") ? "ok 2\n" : "not ok 2\n"); + +eval { unmemoize('u') }; # Should succeed +print ($@ ? "not ok 3\n" : "ok 3\n"); + +@ur = (&u, &u, &u); +print (("@ur" eq "1 2 3") ? "ok 4\n" : "not ok 4\n"); + +eval { unmemoize('u') }; # Should fail +print ($@ ? "ok 5\n" : "not ok 5\n"); + diff --git a/t/lib/1_compile.t b/t/lib/1_compile.t index a713c6c6e0..29db2ccb70 100644 --- a/t/lib/1_compile.t +++ b/t/lib/1_compile.t @@ -75,6 +75,10 @@ unless (using_feature('threads') && has_extension('Thread')) { delete_by_prefix('Thread::'); } +unless (has_extension('NDBM_File')) { + delete_by_name('Memoize::NDBM_File'); +} + delete_by_prefix('unicode::'); add_by_name('unicode::distinct'); # put this back |