diff options
author | Radu Greab <radu@netsoft.ro> | 2000-08-21 06:10:05 +0300 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2000-08-21 02:57:03 +0000 |
commit | 7a6a85bf4acedfcba42e5cccce98ec9a408bc69e (patch) | |
tree | 7635e7cdf2c917f07f4ca813e96bcf77858c3620 /ext/Storable | |
parent | b37ebb13f6aaa3a1b8a20b4a97e08bf9427d7e2f (diff) | |
download | perl-7a6a85bf4acedfcba42e5cccce98ec9a408bc69e.tar.gz |
Add Storable 0.7.2 from Raphael Manfredi,
plus the patch from
Subject: Re: someone with too much time and a 64-bit box and interest in Storable?
Message-ID: <Pine.LNX.4.10.10008210258160.1292-100000@busy.netsoft.ro>
plus changes to get Storable to compile with
picky ANSI compilers.
p4raw-id: //depot/perl@6734
Diffstat (limited to 'ext/Storable')
-rw-r--r-- | ext/Storable/ChangeLog | 366 | ||||
-rw-r--r-- | ext/Storable/MANIFEST | 7 | ||||
-rw-r--r-- | ext/Storable/Makefile.PL | 23 | ||||
-rw-r--r-- | ext/Storable/README | 81 | ||||
-rw-r--r-- | ext/Storable/Storable.pm | 627 | ||||
-rw-r--r-- | ext/Storable/Storable.xs | 4510 | ||||
-rw-r--r-- | ext/Storable/patchlevel.h | 1 |
7 files changed, 5615 insertions, 0 deletions
diff --git a/ext/Storable/ChangeLog b/ext/Storable/ChangeLog new file mode 100644 index 0000000000..3f130a9a24 --- /dev/null +++ b/ext/Storable/ChangeLog @@ -0,0 +1,366 @@ +Mon Aug 14 09:22:04 MEST 2000 Raphael Manfredi <Raphael_Manfredi@pobox.com> + +. Description: + + Added a refcnt dec in retrieve_tied_key(): sv_magic() increases + the refcnt on the mg_ptr as well. + + Removed spurious dependency to Devel::Peek, which was used for + testing only in t/tied_items.t. Thanks to Conrad Heiney + <conrad@fringehead.org> for spotting it first. + +Sun Aug 13 22:12:59 MEST 2000 Raphael Manfredi <Raphael_Manfredi@pobox.com> + +. Description: + + Marc Lehmann kindly contributed code to add overloading support + and to handle references to tied variables. + + Rewrote leading blurb about compatibility to make it clearer what + "backward compatibility" is about: when I say 0.7 is backward + compatible with 0.6, it means the revision 0.7 can read files + produced by 0.6. + + Mention new Clone(3) extension in SEE ALSO. + + Was wrongly optimizing for "undef" values in hashes by not + fully recursing: as a result, tied "undef" values were incorrectly + serialized. + +Sun Jul 30 12:59:17 MEST 2000 Raphael Manfredi <Raphael_Manfredi@pobox.com> + + First revision of Storable 0.7. + + The serializing format is new, known as version 2.0. It is fully + backward compatible with 0.6. Earlier formats are deprecated and + have not even been tested: next version will drop pre-0.6 format. + + Changes since 0.6@11: + + - Moved interface to the "beta" status. Some tiny parts are still + subject to change, but nothing important enough to warrant an "alpha" + status any longer. + + - Slightly reduced the size of the Storable image by factorizing + object class names and removing final object storage notification due + to a redesign of the blessed object storing. + + - Classes can now redefine how they wish their instances to be serialized + and/or deep cloned. Serializing hooks are written in Perl code. + + - The engine is now fully re-entrant. + +Sun Apr 2 23:47:50 MEST 2000 Raphael Manfredi <Raphael_Manfredi@pobox.com> + +. Description: + + Added provision to detect more recent binary formats, since + the new upcoming Storable-0.7 will use a different format. + In order to prevent attempting the de-serialization of newer + formats by older versions, I'm adding this now to the 0.6 series. + + I'm expecting this revision to be the last of the 0.6 series. + Unless it does not work with perl 5.6, which I don't use yet, + and therefore against which I cannot test. + +Wed Mar 29 19:55:21 MEST 2000 Raphael Manfredi <Raphael_Manfredi@pobox.com> + +. Description: + + Added note about format incompatibilities with old versions + (i.e. pre 0.5@9 formats, which cannot be understood as there + was no versionning information in the file by then). + + Protect all $@ variables when eval {} used, to avoid corrupting + it when store/retrieve is called within an exception handler. + + Mistakenly included "patchlevel.h" instead of <patchlevel.h>, + preventing Perl's patchlevel from being included, which is + needed starting from 5.6. + +Tue May 12 09:15:15 METDST 1998 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Fixed shared "undef" bug in hashes, which did not remain shared + through store/retrieve. + +Thu Feb 10 19:48:16 MET 2000 Raphael Manfredi <Raphael_Manfredi@pobox.com> + +. Description: + + added last_op_in_netorder() predicate + documented last_op_in_netorder() + added tests for the new last_op_in_netorder() predicate + +Wed Oct 20 19:07:36 MEST 1999 Raphael Manfredi <Raphael_Manfredi@pobox.com> + +. Description: + + Forgot to update VERSION + +Tue Oct 19 21:25:02 MEST 1999 Raphael Manfredi <Raphael_Manfredi@pobox.com> + +. Description: + + Added mention of japanese translation for the manual page. + + Fixed typo in macro that made threaded code not compilable, + especially on Win32 platforms. + + Changed detection of older perls (pre-5.005) by testing PATCHLEVEL + directly instead of relying on internal symbols. + +Tue Sep 14 22:13:28 MEST 1999 Raphael Manfredi <Raphael_Manfredi@pobox.com> + +. Description: + + Integrated "thread-safe" patch from Murray Nesbitt. + Note that this may not be very efficient for threaded code, + see comment in the code. + + Try to avoid compilation warning on 64-bit CPUs. Can't test it, + since I don't have access to such machines. + +Mon Jul 12 14:37:19 METDST 1999 Raphael Manfredi <Raphael_Manfredi@pobox.com> + +. Description: + + changed my e-mail to pobox. + + mentionned it is not thread-safe. + + updated version number. + + uses new internal PL_* naming convention. + +Fri Jul 3 13:38:16 METDST 1998 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Updated benchmark figures due to recent optimizations done in + store(): tagnums are now stored as-is in the hash table, so + no surrounding SV is created. And the "shared keys" mode for + hash table was turned off. + + Fixed backward compatibility (wrt 0.5@9) for retrieval of + blessed refs. That old version did something wrong, but the + bugfix prevented correct retrieval of the old format. + +Mon Jun 22 11:00:48 METDST 1998 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Changed benchmark figures. + + Adjust refcnt of tied objects after calling sv_magic() to avoid + memory leaks. Contributed by Jeff Gresham. + +Fri Jun 12 11:50:04 METDST 1998 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Added workaround for persistent LVALUE-ness in perl5.004. All + scalars tagged as being an lvalue are handled as if they were + not an lvalue at all. Added test for that LVALUE bug workaround. + + Now handles Perl immortal scalars explicitely, by storing &sv_yes + as such, explicitely. + + Retrieval of non-immortal undef cannot be shared. Previous + version was over-optimizing by not creating a separate SV for + all undefined scalars seen. + +Thu Jun 4 17:21:51 METDST 1998 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Baseline for Storable-0.6@0. + + This version introduces a binary incompatibility in the generated + binary image, which is more compact than older ones by approximatively + 15%, depending on the exact degree of sharing in your structures. + + The good news is that your older images can still be retrieved with + this version, i.e. backward compatibility is preserved. This version + of Storable can only generate new binaries however. + + Another good news is that the retrieval of data structure is + significantly quicker than before, because a Perl array is used + instead of a hash table to keep track of retrieved objects, and + also because the image being smaller, less I/O function calls are + made. + +Tue May 12 09:15:15 METDST 1998 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Version number now got from Storable.pm directly. + + Fixed overzealous sv_type() optimization, which would make + Storable fail when faced with an "upgraded" SV to the PVIV + or PVNV kind containing a reference. + +Thu Apr 30 15:11:30 METDST 1998 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Extended the SYNOPSIS section to give quick overview of the + routines and their signature. + + Optimized sv_type() to avoid flags checking when not needed, i.e. + when their type makes it impossible for them to be refs or tied. + This slightly increases throughput by a few percents when refs + and tied variables are marginal occurrences in your data. + + Stubs for XS now use OutputStream and InputStream file types to + make it work when the given file is actually a socket. Perl + makes a distinction for sockets in its internal I/O structures + by having both a read and a write structure, whereas plain files + share the same one. + +Tue Jun 3 09:41:33 METDST 1997 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Thanks to a contribution from Benjamin A. Holzman, Storable is now + able to correctly serialize tied SVs, i.e. tied arrays, hashes + and scalars. + +Thu Apr 9 18:07:51 METDST 1998 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + I said SvPOK() had changed to SvPOKp(), but that was a lie... + +Wed Apr 8 13:14:29 METDST 1998 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Wrote sizeof(SV *) instead of sizeof(I32) when portable, which + in effect mangled the object tags and prevented portability + accross 32/64 bit architectures! + +Wed Mar 25 14:57:02 MET 1998 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Added code example for store_fd() and retrieve_fd() in the + man page, to emphasize that file descriptors must be passed as + globs, not as plain strings. + + Cannot use SV addresses as tag when using nstore() on LP64. This + was the cause of problems when creating a storable image on an + LP64 machine and retrieving it on an ILP32 system, which is + exactly what nstore() is meant for... + + However, we continue to use SV addresses as tags for plain store(), + because benchamarking shows that it saves up to 8% of the store + time, and store() is meant to be fast at the expense of lack + of portability. + + This means there will be approximately an 8% degradation of + performance for nstore(), but it's now working as expected. + That cost may vary on your machine of course, since it is + solely caused by the memory allocation overhead used to create + unique SV tags for each distinct stored SV. + +Tue Jan 20 09:21:53 MET 1998 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Don't use any '_' in version number. + +Tue Jan 13 17:51:50 MET 1998 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Updated version number. + + added binmode() calls for systems where it matters. + + Be sure to pass globs, not plain file strings, to C routines, + so that Storable can be used under the Perl debugger. + +Wed Nov 5 10:53:22 MET 1997 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Fix memory leaks on seen hash table and returned SV refs. + + Storable did not work properly when tainting enabled. + + Fixed "Allocation too large" messages in freeze/thaw and added. + proper regression test in t/freeze.t. + +Tue Jun 3 09:41:33 METDST 1997 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Updated version number + + Added freeze/thaw interface and dclone. + +Fri May 16 10:45:47 METDST 1997 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Forgot that AutoLoader does not export its own AUTOLOAD. + I could use + + use AutoLoader 'AUTOLOAD'; + + but that would not be backward compatible. So the export is + done by hand... + +Tue Mar 25 11:21:32 MET 1997 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Empty scalar strings are now "defined" at retrieval time. + + New test to ensure an empty string is defined when retrieved. + +Thu Feb 27 16:32:44 MET 1997 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Updated version number + + Declare VERSION as being used + + Fixed a typo in the PerlIO_putc remapping. + PerlIO_read and perlIO_write inverted size/nb_items. + (only relevant for pre-perl5.004 versions) + +Thu Feb 27 15:58:31 MET 1997 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Updated version number + + Added VERSION identification + + Allow build with perl5.003, which is ante perlIO time + +Mon Jan 13 17:53:18 MET 1997 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Random code fixes. + +Wed Jan 22 15:19:56 MET 1997 Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> + +. Description: + + Updated version number in Makefile.PL. + + Added "thanks to" section to README. + + Documented new forgive_me variable. + + Made 64-bit clean. + + Added forgive_me support to allow store() of data structures + containing non-storable items like CODE refs. + diff --git a/ext/Storable/MANIFEST b/ext/Storable/MANIFEST new file mode 100644 index 0000000000..8833380179 --- /dev/null +++ b/ext/Storable/MANIFEST @@ -0,0 +1,7 @@ +README Read this first +MANIFEST This shipping list +Makefile.PL Generic Makefile template +Storable.pm The perl side of Storable +Storable.xs The C side of Storable +patchlevel.h Records current patchlevel +ChangeLog Changes since baseline diff --git a/ext/Storable/Makefile.PL b/ext/Storable/Makefile.PL new file mode 100644 index 0000000000..3b5aa2ced5 --- /dev/null +++ b/ext/Storable/Makefile.PL @@ -0,0 +1,23 @@ +# $Id: Makefile.PL,v 0.7 2000/08/03 22:04:44 ram Exp $ +# +# Copyright (c) 1995-2000, Raphael Manfredi +# +# You may redistribute only under the terms of the Artistic License, +# as specified in the README file that comes with the distribution. +# +# $Log: Makefile.PL,v $ +# Revision 0.7 2000/08/03 22:04:44 ram +# Baseline for second beta release. +# + +use ExtUtils::MakeMaker; +use Config; + +WriteMakefile( + 'NAME' => 'Storable', + 'DISTNAME' => "Storable", + 'VERSION_FROM' => 'Storable.pm', + 'dist' => { SUFFIX => 'gz', COMPRESS => 'gzip -f' }, + 'clean' => {'FILES' => '*%'}, +); + diff --git a/ext/Storable/README b/ext/Storable/README new file mode 100644 index 0000000000..4c574a0f68 --- /dev/null +++ b/ext/Storable/README @@ -0,0 +1,81 @@ + Storable 0.7 + Copyright (c) 1995-2000, Raphael Manfredi + +------------------------------------------------------------------------ + This program is free software; you can redistribute it and/or modify + it under the terms of the Artistic License, a copy of which can be + found with perl. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + Artistic License for more details. +------------------------------------------------------------------------ + + *** This is beta software -- use at your own risks *** + ++======================================================================= +| PLEASE NOTE CAREFULLY +| +| The serialization format changed between 0.5 and 0.6, and the module +| is NOT backward compatible. Think about it when upgrading from a +| pre-0.5@9 version -- images from versions 0.5@9 could still be read +| by 0.6, but have not been tested with 0.7. +| +| The next release (0.8 or 1.0) will DROP support for pre-0.6 format. +| +| The serialization format changed between 0.6 and 0.7, and the module +| is fully backward compatible, meaning 0.7 can read binary images from +| 0.6, although it only generates new ones. If you encounter a situation +| where it is not AND can duplicate it via a small test case, please +| send it to me, along with a patch to fix the problem if you can. ++======================================================================= + +The Storable extension brings persistency to your data. + +You may recursively store to disk any data structure, no matter how +complex and circular it is, provided it contains only SCALAR, ARRAY, +HASH (possibly tied) and references (possibly blessed) to those items. + +At a later stage, or in another program, you may retrieve data from +the stored file and recreate the same hiearchy in memory. If you +had blessed references, the retrieved references are blessed into +the same package, so you must make sure you have access to the +same perl class than the one used to create the relevant objects. + +There is also a dclone() routine which performs an optimized mirroring +of any data structure, preserving its topology. + +Objects (blessed references) may also redefine the way storage and +retrieval is performed, and/or what deep cloning should do on those +objects. + +To compile this extension, run: + + perl Makefile.PL [PERL_SRC=...where you put perl sources...] + make + make install + +There is an embeded POD manual page in Storable.pm. + +Raphael Manfredi <Raphael_Manfredi@pobox.com> + +------------------------------------------------------------------------ +Thanks to: + + Jarkko Hietaniemi <jhi@iki.fi> + Ulrich Pfeifer <pfeifer@charly.informatik.uni-dortmund.de> + Benjamin A. Holzman <bah@ecnvantage.com> + Andrew Ford <A.Ford@ford-mason.co.uk> + Gisle Aas <gisle@aas.no> + Jeff Gresham <gresham_jeffrey@jpmorgan.com> + Murray Nesbitt <murray@activestate.com> + Albert N. Micheev <Albert.N.Micheev@f80.n5049.z2.fidonet.org> + Marc Lehmann <pcg@opengroup.org> + +for their contributions. + +There is a Japanese translation of this man page available at +http://member.nifty.ne.jp/hippo2000/perltips/storable.htm, +courtesy of Kawai, Takanori <kawai@nippon-rad.co.jp>. +------------------------------------------------------------------------ diff --git a/ext/Storable/Storable.pm b/ext/Storable/Storable.pm new file mode 100644 index 0000000000..15d194ceb0 --- /dev/null +++ b/ext/Storable/Storable.pm @@ -0,0 +1,627 @@ +;# $Id: Storable.pm,v 0.7.1.2 2000/08/14 07:18:40 ram Exp $ +;# +;# Copyright (c) 1995-2000, Raphael Manfredi +;# +;# You may redistribute only under the terms of the Artistic License, +;# as specified in the README file that comes with the distribution. +;# +;# $Log: Storable.pm,v $ +;# Revision 0.7.1.2 2000/08/14 07:18:40 ram +;# patch2: increased version number +;# +;# Revision 0.7.1.1 2000/08/13 20:08:58 ram +;# patch1: mention new Clone(3) extension in SEE ALSO +;# patch1: contributor Marc Lehmann added overloading and ref to tied items +;# patch1: updated e-mail from Benjamin Holzman +;# +;# Revision 0.7 2000/08/03 22:04:44 ram +;# Baseline for second beta release. +;# + +require DynaLoader; +require Exporter; +package Storable; @ISA = qw(Exporter DynaLoader); + +@EXPORT = qw(store retrieve); +@EXPORT_OK = qw( + nstore store_fd nstore_fd retrieve_fd + freeze nfreeze thaw + dclone +); + +use AutoLoader; +use vars qw($forgive_me $VERSION); + +$VERSION = '0.702'; +*AUTOLOAD = \&AutoLoader::AUTOLOAD; # Grrr... + +# +# Use of Log::Agent is optional +# + +eval "use Log::Agent"; + +unless (defined @Log::Agent::EXPORT) { + eval q{ + sub logcroak { + require Carp; + Carp::croak(@_); + } + }; +} + +sub logcroak; + +bootstrap Storable; +1; +__END__ + +# +# store +# +# Store target object hierarchy, identified by a reference to its root. +# The stored object tree may later be retrieved to memory via retrieve. +# Returns undef if an I/O error occurred, in which case the file is +# removed. +# +sub store { + return _store(\&pstore, @_); +} + +# +# nstore +# +# Same as store, but in network order. +# +sub nstore { + return _store(\&net_pstore, @_); +} + +# Internal store to file routine +sub _store { + my $xsptr = shift; + my $self = shift; + my ($file) = @_; + logcroak "not a reference" unless ref($self); + logcroak "too many arguments" unless @_ == 1; # No @foo in arglist + local *FILE; + open(FILE, ">$file") || logcroak "can't create $file: $!"; + binmode FILE; # Archaic systems... + my $da = $@; # Don't mess if called from exception handler + my $ret; + # Call C routine nstore or pstore, depending on network order + eval { $ret = &$xsptr(*FILE, $self) }; + close(FILE) or $ret = undef; + unlink($file) or warn "Can't unlink $file: $!\n" if $@ || !defined $ret; + logcroak $@ if $@ =~ s/\.?\n$/,/; + $@ = $da; + return $ret ? $ret : undef; +} + +# +# store_fd +# +# Same as store, but perform on an already opened file descriptor instead. +# Returns undef if an I/O error occurred. +# +sub store_fd { + return _store_fd(\&pstore, @_); +} + +# +# nstore_fd +# +# Same as store_fd, but in network order. +# +sub nstore_fd { + my ($self, $file) = @_; + return _store_fd(\&net_pstore, @_); +} + +# Internal store routine on opened file descriptor +sub _store_fd { + my $xsptr = shift; + my $self = shift; + my ($file) = @_; + logcroak "not a reference" unless ref($self); + logcroak "too many arguments" unless @_ == 1; # No @foo in arglist + my $fd = fileno($file); + logcroak "not a valid file descriptor" unless defined $fd; + my $da = $@; # Don't mess if called from exception handler + my $ret; + # Call C routine nstore or pstore, depending on network order + eval { $ret = &$xsptr($file, $self) }; + logcroak $@ if $@ =~ s/\.?\n$/,/; + $@ = $da; + return $ret ? $ret : undef; +} + +# +# freeze +# +# Store oject and its hierarchy in memory and return a scalar +# containing the result. +# +sub freeze { + _freeze(\&mstore, @_); +} + +# +# nfreeze +# +# Same as freeze but in network order. +# +sub nfreeze { + _freeze(\&net_mstore, @_); +} + +# Internal freeze routine +sub _freeze { + my $xsptr = shift; + my $self = shift; + logcroak "not a reference" unless ref($self); + logcroak "too many arguments" unless @_ == 0; # No @foo in arglist + my $da = $@; # Don't mess if called from exception handler + my $ret; + # Call C routine mstore or net_mstore, depending on network order + eval { $ret = &$xsptr($self) }; + logcroak $@ if $@ =~ s/\.?\n$/,/; + $@ = $da; + return $ret ? $ret : undef; +} + +# +# retrieve +# +# Retrieve object hierarchy from disk, returning a reference to the root +# object of that tree. +# +sub retrieve { + my ($file) = @_; + local *FILE; + open(FILE, "$file") || logcroak "can't open $file: $!"; + binmode FILE; # Archaic systems... + my $self; + my $da = $@; # Could be from exception handler + eval { $self = pretrieve(*FILE) }; # Call C routine + close(FILE); + logcroak $@ if $@ =~ s/\.?\n$/,/; + $@ = $da; + return $self; +} + +# +# retrieve_fd +# +# Same as retrieve, but perform from an already opened file descriptor instead. +# +sub retrieve_fd { + my ($file) = @_; + my $fd = fileno($file); + logcroak "not a valid file descriptor" unless defined $fd; + my $self; + my $da = $@; # Could be from exception handler + eval { $self = pretrieve($file) }; # Call C routine + logcroak $@ if $@ =~ s/\.?\n$/,/; + $@ = $da; + return $self; +} + +# +# thaw +# +# Recreate objects in memory from an existing frozen image created +# by freeze. If the frozen image passed is undef, return undef. +# +sub thaw { + my ($frozen) = @_; + return undef unless defined $frozen; + my $self; + my $da = $@; # Could be from exception handler + eval { $self = mretrieve($frozen) }; # Call C routine + logcroak $@ if $@ =~ s/\.?\n$/,/; + $@ = $da; + return $self; +} + +=head1 NAME + +Storable - persistency for perl data structures + +=head1 SYNOPSIS + + use Storable; + store \%table, 'file'; + $hashref = retrieve('file'); + + use Storable qw(nstore store_fd nstore_fd freeze thaw dclone); + + # Network order + nstore \%table, 'file'; + $hashref = retrieve('file'); # There is NO nretrieve() + + # Storing to and retrieving from an already opened file + store_fd \@array, \*STDOUT; + nstore_fd \%table, \*STDOUT; + $aryref = retrieve_fd(\*SOCKET); + $hashref = retrieve_fd(\*SOCKET); + + # Serializing to memory + $serialized = freeze \%table; + %table_clone = %{ thaw($serialized) }; + + # Deep (recursive) cloning + $cloneref = dclone($ref); + +=head1 DESCRIPTION + +The Storable package brings persistency to your perl data structures +containing SCALAR, ARRAY, HASH or REF objects, i.e. anything that can be +convenientely stored to disk and retrieved at a later time. + +It can be used in the regular procedural way by calling C<store> with +a reference to the object to be stored, along with the file name where +the image should be written. +The routine returns C<undef> for I/O problems or other internal error, +a true value otherwise. Serious errors are propagated as a C<die> exception. + +To retrieve data stored to disk, use C<retrieve> with a file name, +and the objects stored into that file are recreated into memory for you, +a I<reference> to the root object being returned. In case an I/O error +occurs while reading, C<undef> is returned instead. Other serious +errors are propagated via C<die>. + +Since storage is performed recursively, you might want to stuff references +to objects that share a lot of common data into a single array or hash +table, and then store that object. That way, when you retrieve back the +whole thing, the objects will continue to share what they originally shared. + +At the cost of a slight header overhead, you may store to an already +opened file descriptor using the C<store_fd> routine, and retrieve +from a file via C<retrieve_fd>. Those names aren't imported by default, +so you will have to do that explicitely if you need those routines. +The file descriptor you supply must be already opened, for read +if you're going to retrieve and for write if you wish to store. + + store_fd(\%table, *STDOUT) || die "can't store to stdout\n"; + $hashref = retrieve_fd(*STDIN); + +You can also store data in network order to allow easy sharing across +multiple platforms, or when storing on a socket known to be remotely +connected. The routines to call have an initial C<n> prefix for I<network>, +as in C<nstore> and C<nstore_fd>. At retrieval time, your data will be +correctly restored so you don't have to know whether you're restoring +from native or network ordered data. + +When using C<retrieve_fd>, objects are retrieved in sequence, one +object (i.e. one recursive tree) per associated C<store_fd>. + +If you're more from the object-oriented camp, you can inherit from +Storable and directly store your objects by invoking C<store> as +a method. The fact that the root of the to-be-stored tree is a +blessed reference (i.e. an object) is special-cased so that the +retrieve does not provide a reference to that object but rather the +blessed object reference itself. (Otherwise, you'd get a reference +to that blessed object). + +=head1 MEMORY STORE + +The Storable engine can also store data into a Perl scalar instead, to +later retrieve them. This is mainly used to freeze a complex structure in +some safe compact memory place (where it can possibly be sent to another +process via some IPC, since freezing the structure also serializes it in +effect). Later on, and maybe somewhere else, you can thaw the Perl scalar +out and recreate the original complex structure in memory. + +Surprisingly, the routines to be called are named C<freeze> and C<thaw>. +If you wish to send out the frozen scalar to another machine, use +C<nfreeze> instead to get a portable image. + +Note that freezing an object structure and immediately thawing it +actually achieves a deep cloning of that structure: + + dclone(.) = thaw(freeze(.)) + +Storable provides you with a C<dclone> interface which does not create +that intermediary scalar but instead freezes the structure in some +internal memory space and then immediatly thaws it out. + +=head1 SPEED + +The heart of Storable is written in C for decent speed. Extra low-level +optimization have been made when manipulating perl internals, to +sacrifice encapsulation for the benefit of a greater speed. + +=head1 CANONICAL REPRESENTATION + +Normally Storable stores elements of hashes in the order they are +stored internally by Perl, i.e. pseudo-randomly. If you set +C<$Storable::canonical> to some C<TRUE> value, Storable will store +hashes with the elements sorted by their key. This allows you to +compare data structures by comparing their frozen representations (or +even the compressed frozen representations), which can be useful for +creating lookup tables for complicated queries. + +Canonical order does not imply network order, those are two orthogonal +settings. + +=head1 ERROR REPORTING + +Storable uses the "exception" paradigm, in that it does not try to workaround +failures: if something bad happens, an exception is generated from the +caller's perspective (see L<Carp> and C<croak()>). Use eval {} to trap +those exceptions. + +When Storable croaks, it tries to report the error via the C<logcroak()> +routine from the C<Log::Agent> package, if it is available. + +=head1 WIZARDS ONLY + +=head2 Hooks + +Any class may define hooks that will be called during the serialization +and deserialization process on objects that are instances of that class. +Those hooks can redefine the way serialization is performed (and therefore, +how the symetrical deserialization should be conducted). + +Since we said earlier: + + dclone(.) = thaw(freeze(.)) + +everything we say about hooks should also hold for deep cloning. However, +hooks get to know whether the operation is a mere serialization, or a cloning. + +Therefore, when serializing hooks are involved, + + dclone(.) <> thaw(freeze(.)) + +Well, you could keep them in sync, but there's no guarantee it will always +hold on classes somebody else wrote. Besides, there is little to gain in +doing so: a serializing hook could only keep one attribute of an object, +which is probably not what should happen during a deep cloning of that +same object. + +Here is the hooking interface: + +=over + +=item C<STORABLE_freeze> I<obj>, I<cloning> + +The serializing hook, called on the object during serialization. It can be +inherited, or defined in the class itself, like any other method. + +Arguments: I<obj> is the object to serialize, I<cloning> is a flag indicating +whether we're in a dclone() or a regular serialization via store() or freeze(). + +Returned value: A LIST C<($serialized, $ref1, $ref2, ...)> where $serialized +is the serialized form to be used, and the optional $ref1, $ref2, etc... are +extra references that you wish to let the Storable engine serialize. + +At deserialization time, you will be given back the same LIST, but all the +extra references will be pointing into the deserialized structure. + +The B<first time> the hook is hit in a serialization flow, you may have it +return an empty list. That will signal the Storable engine to further +discard that hook for this class and to therefore revert to the default +serialization of the underlying Perl data. The hook will again be normally +processed in the next serialization. + +Unless you know better, serializing hook should always say: + + sub STORABLE_freeze { + my ($self, $cloning) = @_; + return if $cloning; # Regular default serialization + .... + } + +in order to keep reasonable dclone() semantics. + +=item C<STORABLE_thaw> I<obj>, I<cloning>, I<serialized>, ... + +The deserializing hook called on the object during deserialization. +But wait. If we're deserializing, there's no object yet... right? + +Wrong: the Storable engine creates an empty one for you. If you know Eiffel, +you can view C<STORABLE_thaw> as an alternate creation routine. + +This means the hook can be inherited like any other method, and that +I<obj> is your blessed reference for this particular instance. + +The other arguments should look familiar if you know C<STORABLE_freeze>: +I<cloning> is true when we're part of a deep clone operation, I<serialized> +is the serialized string you returned to the engine in C<STORABLE_freeze>, +and there may be an optional list of references, in the same order you gave +them at serialization time, pointing to the deserialized objects (which +have been processed courtesy of the Storable engine). + +It is up to you to use these information to populate I<obj> the way you want. + +Returned value: none. + +=back + +=head2 Predicates + +Predicates are not exportable. They must be called by explicitely prefixing +them with the Storable package name. + +=over + +=item C<Storable::last_op_in_netorder> + +The C<Storable::last_op_in_netorder()> predicate will tell you whether +network order was used in the last store or retrieve operation. If you +don't know how to use this, just forget about it. + +=item C<Storable::is_storing> + +Returns true if within a store operation (via STORABLE_freeze hook). + +=item C<Storable::is_retrieving> + +Returns true if within a retrieve operation, (via STORABLE_thaw hook). + +=back + +=head2 Recursion + +With hooks comes the ability to recurse back to the Storable engine. Indeed, +hooks are regular Perl code, and Storable is convenient when it comes to +serialize and deserialize things, so why not use it to handle the +serialization string? + +There are a few things you need to know however: + +=over + +=item * + +You can create endless loops if the things you serialize via freeze() +(for instance) point back to the object we're trying to serialize in the hook. + +=item * + +Shared references among objects will not stay shared: if we're serializing +the list of object [A, C] where both object A and C refer to the SAME object +B, and if there is a serializing hook in A that says freeze(B), then when +deserializing, we'll get [A', C'] where A' refers to B', but C' refers to D, +a deep clone of B'. The topology was not preserved. + +=back + +That's why C<STORABLE_freeze> lets you provide a list of references +to serialize. The engine guarantees that those will be serialized in the +same context as the other objects, and therefore that shared objects will +stay shared. + +In the above [A, C] example, the C<STORABLE_freeze> hook could return: + + ("something", $self->{B}) + +and the B part would be serialized by the engine. In C<STORABLE_thaw>, you +would get back the reference to the B' object, deserialized for you. + +Therefore, recursion should normally be avoided, but is nonetheless supported. + +=head2 Deep Cloning + +There is a new Clone module available on CPAN which implements deep cloning +natively, i.e. without freezing to memory and thawing the result. It is +aimed to replace Storable's dclone() some day. However, it does not currently +support Storable hooks to redefine the way deep cloning is performed. + +=head1 EXAMPLES + +Here are some code samples showing a possible usage of Storable: + + use Storable qw(store retrieve freeze thaw dclone); + + %color = ('Blue' => 0.1, 'Red' => 0.8, 'Black' => 0, 'White' => 1); + + store(\%color, '/tmp/colors') or die "Can't store %a in /tmp/colors!\n"; + + $colref = retrieve('/tmp/colors'); + die "Unable to retrieve from /tmp/colors!\n" unless defined $colref; + printf "Blue is still %lf\n", $colref->{'Blue'}; + + $colref2 = dclone(\%color); + + $str = freeze(\%color); + printf "Serialization of %%color is %d bytes long.\n", length($str); + $colref3 = thaw($str); + +which prints (on my machine): + + Blue is still 0.100000 + Serialization of %color is 102 bytes long. + +=head1 WARNING + +If you're using references as keys within your hash tables, you're bound +to disapointment when retrieving your data. Indeed, Perl stringifies +references used as hash table keys. If you later wish to access the +items via another reference stringification (i.e. using the same +reference that was used for the key originally to record the value into +the hash table), it will work because both references stringify to the +same string. + +It won't work across a C<store> and C<retrieve> operations however, because +the addresses in the retrieved objects, which are part of the stringified +references, will probably differ from the original addresses. The +topology of your structure is preserved, but not hidden semantics +like those. + +On platforms where it matters, be sure to call C<binmode()> on the +descriptors that you pass to Storable functions. + +Storing data canonically that contains large hashes can be +significantly slower than storing the same data normally, as +temprorary arrays to hold the keys for each hash have to be allocated, +populated, sorted and freed. Some tests have shown a halving of the +speed of storing -- the exact penalty will depend on the complexity of +your data. There is no slowdown on retrieval. + +=head1 BUGS + +You can't store GLOB, CODE, FORMLINE, etc... If you can define +semantics for those operations, feel free to enhance Storable so that +it can deal with them. + +The store functions will C<croak> if they run into such references +unless you set C<$Storable::forgive_me> to some C<TRUE> value. In that +case, the fatal message is turned in a warning and some +meaningless string is stored instead. + +Setting C<$Storable::canonical> may not yield frozen strings that +compare equal due to possible stringification of numbers. When the +string version of a scalar exists, it is the form stored, therefore +if you happen to use your numbers as strings between two freezing +operations on the same data structures, you will get different +results. + +Due to the aforementionned optimizations, Storable is at the mercy +of perl's internal redesign or structure changes. If that bothers +you, you can try convincing Larry that what is used in Storable +should be documented and consistently kept in future revisions. + +=head1 CREDITS + +Thank you to (in chronological order): + + Jarkko Hietaniemi <jhi@iki.fi> + Ulrich Pfeifer <pfeifer@charly.informatik.uni-dortmund.de> + Benjamin A. Holzman <bah@ecnvantage.com> + Andrew Ford <A.Ford@ford-mason.co.uk> + Gisle Aas <gisle@aas.no> + Jeff Gresham <gresham_jeffrey@jpmorgan.com> + Murray Nesbitt <murray@activestate.com> + Marc Lehmann <pcg@opengroup.org> + +for their bug reports, suggestions and contributions. + +Benjamin Holzman contributed the tied variable support, Andrew Ford +contributed the canonical order for hashes, and Gisle Aas fixed +a few misunderstandings of mine regarding the Perl internals, +and optimized the emission of "tags" in the output streams by +simply counting the objects instead of tagging them (leading to +a binary incompatibility for the Storable image starting at version +0.6--older images are of course still properly understood). +Murray Nesbitt made Storable thread-safe. Marc Lehmann added overloading +and reference to tied items support. + +=head1 TRANSLATIONS + +There is a Japanese translation of this man page available at +http://member.nifty.ne.jp/hippo2000/perltips/storable.htm , +courtesy of Kawai, Takanori <kawai@nippon-rad.co.jp>. + +=head1 AUTHOR + +Raphael Manfredi F<E<lt>Raphael_Manfredi@pobox.comE<gt>> + +=head1 SEE ALSO + +Clone(3). + +=cut + diff --git a/ext/Storable/Storable.xs b/ext/Storable/Storable.xs new file mode 100644 index 0000000000..3de5891d1c --- /dev/null +++ b/ext/Storable/Storable.xs @@ -0,0 +1,4510 @@ +/* + * Store and retrieve mechanism. + */ + +/* + * $Id: Storable.xs,v 0.7.1.2 2000/08/14 07:19:27 ram Exp $ + * + * Copyright (c) 1995-2000, Raphael Manfredi + * + * You may redistribute only under the terms of the Artistic License, + * as specified in the README file that comes with the distribution. + * + * $Log: Storable.xs,v $ + * Revision 0.7.1.2 2000/08/14 07:19:27 ram + * patch2: added a refcnt dec in retrieve_tied_key() + * + * Revision 0.7.1.1 2000/08/13 20:10:06 ram + * patch1: was wrongly optimizing for "undef" values in hashes + * patch1: added support for ref to tied items in hash/array + * patch1: added overloading support + * + * Revision 0.7 2000/08/03 22:04:44 ram + * Baseline for second beta release. + * + */ + +#include <EXTERN.h> +#include <perl.h> +#include <patchlevel.h> /* Perl's one, needed since 5.6 */ +#include <XSUB.h> + +/*#define DEBUGME /* Debug mode, turns assertions on as well */ +/*#define DASSERT /* Assertion mode */ + +/* + * Pre PerlIO time when none of USE_PERLIO and PERLIO_IS_STDIO is defined + * Provide them with the necessary defines so they can build with pre-5.004. + */ +#ifndef USE_PERLIO +#ifndef PERLIO_IS_STDIO +#define PerlIO FILE +#define PerlIO_getc(x) getc(x) +#define PerlIO_putc(f,x) putc(x,f) +#define PerlIO_read(x,y,z) fread(y,1,z,x) +#define PerlIO_write(x,y,z) fwrite(y,1,z,x) +#define PerlIO_stdoutf printf +#endif /* PERLIO_IS_STDIO */ +#endif /* USE_PERLIO */ + +/* + * Earlier versions of perl might be used, we can't assume they have the latest! + */ +#ifndef newRV_noinc +#define newRV_noinc(sv) ((Sv = newRV(sv)), --SvREFCNT(SvRV(Sv)), Sv) +#endif +#if (PATCHLEVEL <= 4) /* Older perls (<= 5.004) lack PL_ namespace */ +#define PL_sv_yes sv_yes +#define PL_sv_no sv_no +#define PL_sv_undef sv_undef +#endif +#ifndef HvSHAREKEYS_off +#define HvSHAREKEYS_off(hv) /* Ignore */ +#endif + +#ifdef DEBUGME +#ifndef DASSERT +#define DASSERT +#endif +#define TRACEME(x) do { PerlIO_stdoutf x; PerlIO_stdoutf("\n"); } while (0) +#else +#define TRACEME(x) +#endif + +#ifdef DASSERT +#define ASSERT(x,y) do { \ + if (!(x)) { \ + PerlIO_stdoutf("ASSERT FAILED (\"%s\", line %d): ", \ + __FILE__, __LINE__); \ + PerlIO_stdoutf y; PerlIO_stdoutf("\n"); \ + } \ +} while (0) +#else +#define ASSERT(x,y) +#endif + +/* + * Type markers. + */ + +#define C(x) ((char) (x)) /* For markers with dynamic retrieval handling */ + +#define SX_OBJECT C(0) /* Already stored object */ +#define SX_LSCALAR C(1) /* Scalar (string) forthcoming (length, data) */ +#define SX_ARRAY C(2) /* Array forthcominng (size, item list) */ +#define SX_HASH C(3) /* Hash forthcoming (size, key/value pair list) */ +#define SX_REF C(4) /* Reference to object forthcoming */ +#define SX_UNDEF C(5) /* Undefined scalar */ +#define SX_INTEGER C(6) /* Integer forthcoming */ +#define SX_DOUBLE C(7) /* Double forthcoming */ +#define SX_BYTE C(8) /* (signed) byte forthcoming */ +#define SX_NETINT C(9) /* Integer in network order forthcoming */ +#define SX_SCALAR C(10) /* Scalar (small) forthcoming (length, data) */ +#define SX_TIED_ARRAY C(11) /* Tied array forthcoming */ +#define SX_TIED_HASH C(12) /* Tied hash forthcoming */ +#define SX_TIED_SCALAR C(13) /* Tied scalar forthcoming */ +#define SX_SV_UNDEF C(14) /* Perl's immortal PL_sv_undef */ +#define SX_SV_YES C(15) /* Perl's immortal PL_sv_yes */ +#define SX_SV_NO C(16) /* Perl's immortal PL_sv_no */ +#define SX_BLESS C(17) /* Object is blessed */ +#define SX_IX_BLESS C(18) /* Object is blessed, classname given by index */ +#define SX_HOOK C(19) /* Stored via hook, user-defined */ +#define SX_OVERLOAD C(20) /* Overloaded reference */ +#define SX_TIED_KEY C(21) /* Tied magic key forthcoming */ +#define SX_TIED_IDX C(22) /* Tied magic index forthcoming */ +#define SX_ERROR C(23) /* Error */ + +/* + * Those are only used to retrieve "old" pre-0.6 binary images. + */ +#define SX_ITEM 'i' /* An array item introducer */ +#define SX_IT_UNDEF 'I' /* Undefined array item */ +#define SX_KEY 'k' /* An hash key introducer */ +#define SX_VALUE 'v' /* An hash value introducer */ +#define SX_VL_UNDEF 'V' /* Undefined hash value */ + +/* + * Those are only used to retrieve "old" pre-0.7 binary images + */ + +#define SX_CLASS 'b' /* Object is blessed, class name length <255 */ +#define SX_LG_CLASS 'B' /* Object is blessed, class name length >255 */ +#define SX_STORED 'X' /* End of object */ + +/* + * Limits between short/long length representation. + */ + +#define LG_SCALAR 255 /* Large scalar length limit */ +#define LG_BLESS 127 /* Large classname bless limit */ + +/* + * Operation types + */ + +#define ST_STORE 0x1 /* Store operation */ +#define ST_RETRIEVE 0x2 /* Retrieval operation */ +#define ST_CLONE 0x4 /* Deep cloning operation */ + +/* + * The following structure is used for hash table key retrieval. Since, when + * retrieving objects, we'll be facing blessed hash references, it's best + * to pre-allocate that buffer once and resize it as the need arises, never + * freeing it (keys will be saved away someplace else anyway, so even large + * keys are not enough a motivation to reclaim that space). + * + * This structure is also used for memory store/retrieve operations which + * happen in a fixed place before being malloc'ed elsewhere if persistency + * is required. Hence the aptr pointer. + */ +struct extendable { + char *arena; /* Will hold hash key strings, resized as needed */ + STRLEN asiz; /* Size of aforementionned buffer */ + char *aptr; /* Arena pointer, for in-place read/write ops */ + char *aend; /* First invalid address */ +}; + +/* + * At store time: + * An hash table records the objects which have already been stored. + * Those are referred to as SX_OBJECT in the file, and their "tag" (i.e. + * an arbitrary sequence number) is used to identify them. + * + * At retrieve time: + * An array table records the objects which have already been retrieved, + * as seen by the tag determind by counting the objects themselves. The + * reference to that retrieved object is kept in the table, and is returned + * when an SX_OBJECT is found bearing that same tag. + * + * The same processing is used to record "classname" for blessed objects: + * indexing by a hash at store time, and via an array at retrieve time. + */ + +typedef unsigned long stag_t; /* Used by pre-0.6 binary format */ + +/* + * The following "thread-safe" related defines were contributed by + * Murray Nesbitt <murray@activestate.com> and integrated by RAM, who + * only renamed things a little bit to ensure consistency with surrounding + * code. -- RAM, 14/09/1999 + * + * The original patch suffered from the fact that the stcxt_t structure + * was global. Murray tried to minimize the impact on the code as much as + * possible. + * + * Starting with 0.7, Storable can be re-entrant, via the STORABLE_xxx hooks + * on objects. Therefore, the notion of context needs to be generalized, + * threading or not. + */ + +#define MY_VERSION "Storable(" XS_VERSION ")" + +typedef struct stcxt { + int entry; /* flags recursion */ + int optype; /* type of traversal operation */ + HV *hseen; /* which objects have been seen, store time */ + AV *aseen; /* which objects have been seen, retrieve time */ + HV *hclass; /* which classnames have been seen, store time */ + AV *aclass; /* which classnames have been seen, retrieve time */ + HV *hook; /* cache for hook methods per class name */ + I32 tagnum; /* incremented at store time for each seen object */ + I32 classnum; /* incremented at store time for each seen classname */ + int netorder; /* true if network order used */ + int forgive_me; /* whether to be forgiving... */ + int canonical; /* whether to store hashes sorted by key */ + int dirty; /* context is dirty due to CROAK() -- can be cleaned */ + struct extendable keybuf; /* for hash key retrieval */ + struct extendable membuf; /* for memory store/retrieve operations */ + PerlIO *fio; /* where I/O are performed, NULL for memory */ + int ver_major; /* major of version for retrieved object */ + int ver_minor; /* minor of version for retrieved object */ + SV *(**retrieve_vtbl)(); /* retrieve dispatch table */ + struct stcxt *prev; /* contexts chained backwards in real recursion */ +} stcxt_t; + +#if defined(MULTIPLICITY) || defined(PERL_OBJECT) || defined(PERL_CAPI) + +#if (PATCHLEVEL <= 4) && (SUBVERSION < 68) +#define dSTCXT_SV \ + SV *perinterp_sv = perl_get_sv(MY_VERSION, FALSE) +#else /* >= perl5.004_68 */ +#define dSTCXT_SV \ + SV *perinterp_sv = *hv_fetch(PL_modglobal, \ + MY_VERSION, sizeof(MY_VERSION)-1, TRUE) +#endif /* < perl5.004_68 */ + +#define dSTCXT_PTR(T,name) \ + T name = (T)(perinterp_sv && SvIOK(perinterp_sv)\ + ? SvIVX(perinterp_sv) : NULL) +#define dSTCXT \ + dSTCXT_SV; \ + dSTCXT_PTR(stcxt_t *, cxt) + +#define INIT_STCXT \ + dSTCXT; \ + Newz(0, cxt, 1, stcxt_t); \ + sv_setiv(perinterp_sv, (IV) cxt) + +#define SET_STCXT(x) do { \ + dSTCXT_SV; \ + sv_setiv(perinterp_sv, (IV) (x)); \ +} while (0) + +#else /* !MULTIPLICITY && !PERL_OBJECT && !PERL_CAPI */ + +static stcxt_t Context; +static stcxt_t *Context_ptr = &Context; +#define dSTCXT stcxt_t *cxt = Context_ptr +#define INIT_STCXT dSTCXT +#define SET_STCXT(x) Context_ptr = x + +#endif /* MULTIPLICITY || PERL_OBJECT || PERL_CAPI */ + +/* + * KNOWN BUG: + * Croaking implies a memory leak, since we don't use setjmp/longjmp + * to catch the exit and free memory used during store or retrieve + * operations. This is not too difficult to fix, but I need to understand + * how Perl does it, and croaking is exceptional anyway, so I lack the + * motivation to do it. + * + * The current workaround is to mark the context as dirty when croaking, + * so that data structures can be freed whenever we renter Storable code + * (but only *then*: it's a workaround, not a fix). + * + * This is also imperfect, because we don't really know how far they trapped + * the croak(), and when we were recursing, we won't be able to clean anything + * but the topmost context stacked. + */ + +#define CROAK(x) do { cxt->dirty = 1; croak x; } while (0) + +/* + * End of "thread-safe" related definitions. + */ + +/* + * key buffer handling + */ +#define kbuf (cxt->keybuf).arena +#define ksiz (cxt->keybuf).asiz +#define KBUFINIT() do { \ + if (!kbuf) { \ + TRACEME(("** allocating kbuf of 128 bytes")); \ + New(10003, kbuf, 128, char); \ + ksiz = 128; \ + } \ +} while (0) +#define KBUFCHK(x) do { \ + if (x >= ksiz) { \ + TRACEME(("** extending kbuf to %d bytes", x+1)); \ + Renew(kbuf, x+1, char); \ + ksiz = x+1; \ + } \ +} while (0) + +/* + * memory buffer handling + */ +#define mbase (cxt->membuf).arena +#define msiz (cxt->membuf).asiz +#define mptr (cxt->membuf).aptr +#define mend (cxt->membuf).aend + +#define MGROW (1 << 13) +#define MMASK (MGROW - 1) + +#define round_mgrow(x) \ + ((unsigned long) (((unsigned long) (x) + MMASK) & ~MMASK)) +#define trunc_int(x) \ + ((unsigned long) ((unsigned long) (x) & ~(sizeof(int)-1))) +#define int_aligned(x) \ + ((unsigned long) (x) == trunc_int(x)) + +#define MBUF_INIT(x) do { \ + if (!mbase) { \ + TRACEME(("** allocating mbase of %d bytes", MGROW)); \ + New(10003, mbase, MGROW, char); \ + msiz = MGROW; \ + } \ + mptr = mbase; \ + if (x) \ + mend = mbase + x; \ + else \ + mend = mbase + msiz; \ +} while (0) + +#define MBUF_TRUNC(x) mptr = mbase + x +#define MBUF_SIZE() (mptr - mbase) + +/* + * Use SvPOKp(), because SvPOK() fails on tainted scalars. + * See store_scalar() for other usage of this workaround. + */ +#define MBUF_LOAD(v) do { \ + if (!SvPOKp(v)) \ + CROAK(("Not a scalar string")); \ + mptr = mbase = SvPV(v, msiz); \ + mend = mbase + msiz; \ +} while (0) + +#define MBUF_XTEND(x) do { \ + int nsz = (int) round_mgrow((x)+msiz); \ + int offset = mptr - mbase; \ + TRACEME(("** extending mbase to %d bytes", nsz)); \ + Renew(mbase, nsz, char); \ + msiz = nsz; \ + mptr = mbase + offset; \ + mend = mbase + nsz; \ +} while (0) + +#define MBUF_CHK(x) do { \ + if ((mptr + (x)) > mend) \ + MBUF_XTEND(x); \ +} while (0) + +#define MBUF_GETC(x) do { \ + if (mptr < mend) \ + x = (int) (unsigned char) *mptr++; \ + else \ + return (SV *) 0; \ +} while (0) + +#define MBUF_GETINT(x) do { \ + if ((mptr + sizeof(int)) <= mend) { \ + if (int_aligned(mptr)) \ + x = *(int *) mptr; \ + else \ + memcpy(&x, mptr, sizeof(int)); \ + mptr += sizeof(int); \ + } else \ + return (SV *) 0; \ +} while (0) + +#define MBUF_READ(x,s) do { \ + if ((mptr + (s)) <= mend) { \ + memcpy(x, mptr, s); \ + mptr += s; \ + } else \ + return (SV *) 0; \ +} while (0) + +#define MBUF_SAFEREAD(x,s,z) do { \ + if ((mptr + (s)) <= mend) { \ + memcpy(x, mptr, s); \ + mptr += s; \ + } else { \ + sv_free(z); \ + return (SV *) 0; \ + } \ +} while (0) + +#define MBUF_PUTC(c) do { \ + if (mptr < mend) \ + *mptr++ = (char) c; \ + else { \ + MBUF_XTEND(1); \ + *mptr++ = (char) c; \ + } \ +} while (0) + +#define MBUF_PUTINT(i) do { \ + MBUF_CHK(sizeof(int)); \ + if (int_aligned(mptr)) \ + *(int *) mptr = i; \ + else \ + memcpy(mptr, &i, sizeof(int)); \ + mptr += sizeof(int); \ +} while (0) + +#define MBUF_WRITE(x,s) do { \ + MBUF_CHK(s); \ + memcpy(mptr, x, s); \ + mptr += s; \ +} while (0) + +/* + * LOW_32BITS + * + * Keep only the low 32 bits of a pointer (used for tags, which are not + * really pointers). + */ + +#if PTRSIZE <= 4 +#define LOW_32BITS(x) ((I32) (x)) +#else +#if BYTEORDER == 0x87654321 +#define LOW_32BITS(x) ((I32) ((unsigned long) (x) & 0xffffffff00000000UL)) +#else /* BYTEORDER == 0x12345678 */ +#define LOW_32BITS(x) ((I32) ((unsigned long) (x) & 0xffffffffUL)) +#endif +#endif + +/* + * Possible return values for sv_type(). + */ + +#define svis_REF 0 +#define svis_SCALAR 1 +#define svis_ARRAY 2 +#define svis_HASH 3 +#define svis_TIED 4 +#define svis_TIED_ITEM 5 +#define svis_OTHER 6 + +/* + * Flags for SX_HOOK. + */ + +#define SHF_TYPE_MASK 0x03 +#define SHF_LARGE_CLASSLEN 0x04 +#define SHF_LARGE_STRLEN 0x08 +#define SHF_LARGE_LISTLEN 0x10 +#define SHF_IDX_CLASSNAME 0x20 +#define SHF_NEED_RECURSE 0x40 +#define SHF_HAS_LIST 0x80 + +/* + * Types for SX_HOOK (2 bits). + */ + +#define SHT_SCALAR 0 +#define SHT_ARRAY 1 +#define SHT_HASH 2 + +/* + * Before 0.6, the magic string was "perl-store" (binary version number 0). + * + * Since 0.6 introduced many binary incompatibilities, the magic string has + * been changed to "pst0" to allow an old image to be properly retrieved by + * a newer Storable, but ensure a newer image cannot be retrieved with an + * older version. + * + * At 0.7, objects are given the ability to serialize themselves, and the + * set of markers is extended, backward compatibility is not jeopardized, + * so the binary version number could have remained unchanged. To correctly + * spot errors if a file making use of 0.7-specific extensions is given to + * 0.6 for retrieval, the binary version was moved to "2". And I'm introducing + * a "minor" version, to better track this kind of evolution from now on. + * + */ +static char old_magicstr[] = "perl-store"; /* Magic number before 0.6 */ +static char magicstr[] = "pst0"; /* Used as a magic number */ + +#define STORABLE_BIN_MAJOR 2 /* Binary major "version" */ +#define STORABLE_BIN_MINOR 1 /* Binary minor "version" */ + +/* + * Useful store shortcuts... + */ + +#define PUTMARK(x) do { \ + if (!cxt->fio) \ + MBUF_PUTC(x); \ + else if (PerlIO_putc(cxt->fio, x) == EOF) \ + return -1; \ +} while (0) + +#ifdef HAS_HTONL +#define WLEN(x) do { \ + if (cxt->netorder) { \ + int y = (int) htonl(x); \ + if (!cxt->fio) \ + MBUF_PUTINT(y); \ + else if (PerlIO_write(cxt->fio, &y, sizeof(y)) != sizeof(y)) \ + return -1; \ + } else { \ + if (!cxt->fio) \ + MBUF_PUTINT(x); \ + else if (PerlIO_write(cxt->fio, &x, sizeof(x)) != sizeof(x)) \ + return -1; \ + } \ +} while (0) +#else +#define WLEN(x) do { \ + if (!cxt->fio) \ + MBUF_PUTINT(x); \ + else if (PerlIO_write(cxt->fio, &x, sizeof(x)) != sizeof(x)) \ + return -1; \ + } while (0) +#endif + +#define WRITE(x,y) do { \ + if (!cxt->fio) \ + MBUF_WRITE(x,y); \ + else if (PerlIO_write(cxt->fio, x, y) != y) \ + return -1; \ + } while (0) + +#define STORE_SCALAR(pv, len) do { \ + if (len <= LG_SCALAR) { \ + unsigned char clen = (unsigned char) len; \ + PUTMARK(SX_SCALAR); \ + PUTMARK(clen); \ + if (len) \ + WRITE(pv, len); \ + } else { \ + PUTMARK(SX_LSCALAR); \ + WLEN(len); \ + WRITE(pv, len); \ + } \ +} while (0) + +/* + * Store undef in arrays and hashes without recursing through store(). + */ +#define STORE_UNDEF() do { \ + cxt->tagnum++; \ + PUTMARK(SX_UNDEF); \ +} while (0) + +/* + * Useful retrieve shortcuts... + */ + +#define GETCHAR() \ + (cxt->fio ? PerlIO_getc(cxt->fio) : (mptr >= mend ? EOF : (int) *mptr++)) + +#define GETMARK(x) do { \ + if (!cxt->fio) \ + MBUF_GETC(x); \ + else if ((x = PerlIO_getc(cxt->fio)) == EOF) \ + return (SV *) 0; \ +} while (0) + +#ifdef HAS_NTOHL +#define RLEN(x) do { \ + if (!cxt->fio) \ + MBUF_GETINT(x); \ + else if (PerlIO_read(cxt->fio, &x, sizeof(x)) != sizeof(x)) \ + return (SV *) 0; \ + if (cxt->netorder) \ + x = (int) ntohl(x); \ +} while (0) +#else +#define RLEN(x) do { \ + if (!cxt->fio) \ + MBUF_GETINT(x); \ + else if (PerlIO_read(cxt->fio, &x, sizeof(x)) != sizeof(x)) \ + return (SV *) 0; \ +} while (0) +#endif + +#define READ(x,y) do { \ + if (!cxt->fio) \ + MBUF_READ(x, y); \ + else if (PerlIO_read(cxt->fio, x, y) != y) \ + return (SV *) 0; \ +} while (0) + +#define SAFEREAD(x,y,z) do { \ + if (!cxt->fio) \ + MBUF_SAFEREAD(x,y,z); \ + else if (PerlIO_read(cxt->fio, x, y) != y) { \ + sv_free(z); \ + return (SV *) 0; \ + } \ +} while (0) + +/* + * This macro is used at retrieve time, to remember where object 'y', bearing a + * given tag 'tagnum', has been retrieved. Next time we see an SX_OBJECT marker, + * we'll therefore know where it has been retrieved and will be able to + * share the same reference, as in the original stored memory image. + */ +#define SEEN(y) do { \ + if (!y) \ + return (SV *) 0; \ + if (av_store(cxt->aseen, cxt->tagnum++, SvREFCNT_inc(y)) == 0) \ + return (SV *) 0; \ + TRACEME(("aseen(#%d) = 0x%lx (refcnt=%d)", cxt->tagnum-1, \ + (unsigned long) y, SvREFCNT(y)-1)); \ +} while (0) + +/* + * Bless `s' in `p', via a temporary reference, required by sv_bless(). + */ +#define BLESS(s,p) do { \ + SV *ref; \ + HV *stash; \ + TRACEME(("blessing 0x%lx in %s", (unsigned long) (s), (p))); \ + stash = gv_stashpv((p), TRUE); \ + ref = newRV_noinc(s); \ + (void) sv_bless(ref, stash); \ + SvRV(ref) = 0; \ + SvREFCNT_dec(ref); \ +} while (0) + +static int store(); +static SV *retrieve(); + +/* + * Dynamic dispatching table for SV store. + */ + +static int store_ref(stcxt_t *cxt, SV *sv); +static int store_scalar(stcxt_t *cxt, SV *sv); +static int store_array(stcxt_t *cxt, AV *av); +static int store_hash(stcxt_t *cxt, HV *hv); +static int store_tied(stcxt_t *cxt, SV *sv); +static int store_tied_item(stcxt_t *cxt, SV *sv); +static int store_other(stcxt_t *cxt, SV *sv); + +static int (*sv_store[])() = { + store_ref, /* svis_REF */ + store_scalar, /* svis_SCALAR */ + store_array, /* svis_ARRAY */ + store_hash, /* svis_HASH */ + store_tied, /* svis_TIED */ + store_tied_item, /* svis_TIED_ITEM */ + store_other, /* svis_OTHER */ +}; + +#define SV_STORE(x) (*sv_store[x]) + +/* + * Dynamic dispatching tables for SV retrieval. + */ + +static SV *retrieve_lscalar(stcxt_t *cxt); +static SV *old_retrieve_array(stcxt_t *cxt); +static SV *old_retrieve_hash(stcxt_t *cxt); +static SV *retrieve_ref(stcxt_t *cxt); +static SV *retrieve_undef(stcxt_t *cxt); +static SV *retrieve_integer(stcxt_t *cxt); +static SV *retrieve_double(stcxt_t *cxt); +static SV *retrieve_byte(stcxt_t *cxt); +static SV *retrieve_netint(stcxt_t *cxt); +static SV *retrieve_scalar(stcxt_t *cxt); +static SV *retrieve_tied_array(stcxt_t *cxt); +static SV *retrieve_tied_hash(stcxt_t *cxt); +static SV *retrieve_tied_scalar(stcxt_t *cxt); +static SV *retrieve_other(stcxt_t *cxt); + +static SV *(*sv_old_retrieve[])() = { + 0, /* SX_OBJECT -- entry unused dynamically */ + retrieve_lscalar, /* SX_LSCALAR */ + old_retrieve_array, /* SX_ARRAY -- for pre-0.6 binaries */ + old_retrieve_hash, /* SX_HASH -- for pre-0.6 binaries */ + retrieve_ref, /* SX_REF */ + retrieve_undef, /* SX_UNDEF */ + retrieve_integer, /* SX_INTEGER */ + retrieve_double, /* SX_DOUBLE */ + retrieve_byte, /* SX_BYTE */ + retrieve_netint, /* SX_NETINT */ + retrieve_scalar, /* SX_SCALAR */ + retrieve_tied_array, /* SX_ARRAY */ + retrieve_tied_hash, /* SX_HASH */ + retrieve_tied_scalar, /* SX_SCALAR */ + retrieve_other, /* SX_SV_UNDEF not supported */ + retrieve_other, /* SX_SV_YES not supported */ + retrieve_other, /* SX_SV_NO not supported */ + retrieve_other, /* SX_BLESS not supported */ + retrieve_other, /* SX_IX_BLESS not supported */ + retrieve_other, /* SX_HOOK not supported */ + retrieve_other, /* SX_OVERLOADED not supported */ + retrieve_other, /* SX_TIED_KEY not supported */ + retrieve_other, /* SX_TIED_IDX not supported */ + retrieve_other, /* SX_ERROR */ +}; + +static SV *retrieve_array(stcxt_t *cxt); +static SV *retrieve_hash(stcxt_t *cxt); +static SV *retrieve_sv_undef(stcxt_t *cxt); +static SV *retrieve_sv_yes(stcxt_t *cxt); +static SV *retrieve_sv_no(stcxt_t *cxt); +static SV *retrieve_blessed(stcxt_t *cxt); +static SV *retrieve_idx_blessed(stcxt_t *cxt); +static SV *retrieve_hook(stcxt_t *cxt); +static SV *retrieve_overloaded(stcxt_t *cxt); +static SV *retrieve_tied_key(stcxt_t *cxt); +static SV *retrieve_tied_idx(stcxt_t *cxt); + +static SV *(*sv_retrieve[])() = { + 0, /* SX_OBJECT -- entry unused dynamically */ + retrieve_lscalar, /* SX_LSCALAR */ + retrieve_array, /* SX_ARRAY */ + retrieve_hash, /* SX_HASH */ + retrieve_ref, /* SX_REF */ + retrieve_undef, /* SX_UNDEF */ + retrieve_integer, /* SX_INTEGER */ + retrieve_double, /* SX_DOUBLE */ + retrieve_byte, /* SX_BYTE */ + retrieve_netint, /* SX_NETINT */ + retrieve_scalar, /* SX_SCALAR */ + retrieve_tied_array, /* SX_ARRAY */ + retrieve_tied_hash, /* SX_HASH */ + retrieve_tied_scalar, /* SX_SCALAR */ + retrieve_sv_undef, /* SX_SV_UNDEF */ + retrieve_sv_yes, /* SX_SV_YES */ + retrieve_sv_no, /* SX_SV_NO */ + retrieve_blessed, /* SX_BLESS */ + retrieve_idx_blessed, /* SX_IX_BLESS */ + retrieve_hook, /* SX_HOOK */ + retrieve_overloaded, /* SX_OVERLOAD */ + retrieve_tied_key, /* SX_TIED_KEY */ + retrieve_tied_idx, /* SX_TIED_IDX */ + retrieve_other, /* SX_ERROR */ +}; + +#define RETRIEVE(c,x) (*(c)->retrieve_vtbl[(x) >= SX_ERROR ? SX_ERROR : (x)]) + +static SV *mbuf2sv(); +static int store_blessed(); + +/*** + *** Context management. + ***/ + +/* + * init_perinterp + * + * Called once per "thread" (interpreter) to initialize some global context. + */ +static void init_perinterp() { + INIT_STCXT; + + cxt->netorder = 0; /* true if network order used */ + cxt->forgive_me = -1; /* whether to be forgiving... */ +} + +/* + * init_store_context + * + * Initialize a new store context for real recursion. + */ +static void init_store_context(cxt, f, optype, network_order) +stcxt_t *cxt; +PerlIO *f; +int optype; +int network_order; +{ + TRACEME(("init_store_context")); + + cxt->netorder = network_order; + cxt->forgive_me = -1; /* Fetched from perl if needed */ + cxt->canonical = -1; /* Idem */ + cxt->tagnum = -1; /* Reset tag numbers */ + cxt->classnum = -1; /* Reset class numbers */ + cxt->fio = f; /* Where I/O are performed */ + cxt->optype = optype; /* A store, or a deep clone */ + cxt->entry = 1; /* No recursion yet */ + + /* + * The `hseen' table is used to keep track of each SV stored and their + * associated tag numbers is special. It is "abused" because the + * values stored are not real SV, just integers cast to (SV *), + * which explains the freeing below. + * + * It is also one possible bottlneck to achieve good storing speed, + * so the "shared keys" optimization is turned off (unlikely to be + * of any use here), and the hash table is "pre-extended". Together, + * those optimizations increase the throughput by 12%. + */ + + cxt->hseen = newHV(); /* Table where seen objects are stored */ + HvSHAREKEYS_off(cxt->hseen); + + /* + * The following does not work well with perl5.004_04, and causes + * a core dump later on, in a completely unrelated spot, which + * makes me think there is a memory corruption going on. + * + * Calling hv_ksplit(hseen, HBUCKETS) instead of manually hacking + * it below does not make any difference. It seems to work fine + * with perl5.004_68 but given the probable nature of the bug, + * that does not prove anything. + * + * It's a shame because increasing the amount of buckets raises + * store() throughput by 5%, but until I figure this out, I can't + * allow for this to go into production. + * + * It is reported fixed in 5.005, hence the #if. + */ +#if PATCHLEVEL < 5 +#define HBUCKETS 4096 /* Buckets for %hseen */ + HvMAX(cxt->hseen) = HBUCKETS - 1; /* keys %hseen = $HBUCKETS; */ +#endif + + /* + * The `hclass' hash uses the same settings as `hseen' above, but it is + * used to assign sequential tags (numbers) to class names for blessed + * objects. + * + * We turn the shared key optimization on. + */ + + cxt->hclass = newHV(); /* Where seen classnames are stored */ + +#if PATCHLEVEL < 5 + HvMAX(cxt->hclass) = HBUCKETS - 1; /* keys %hclass = $HBUCKETS; */ +#endif + + /* + * The `hook' hash table is used to keep track of the references on + * the STORABLE_freeze hook routines, when found in some class name. + * + * It is assumed that the inheritance tree will not be changed during + * storing, and that no new method will be dynamically created by the + * hooks. + */ + + cxt->hook = newHV(); /* Table where hooks are cached */ +} + +/* + * clean_store_context + * + * Clean store context by + */ +static void clean_store_context(cxt) +stcxt_t *cxt; +{ + HE *he; + + TRACEME(("clean_store_context")); + + ASSERT(cxt->optype & ST_STORE, ("was performing a store()")); + + /* + * Insert real values into hashes where we stored faked pointers. + */ + + hv_iterinit(cxt->hseen); + while (he = hv_iternext(cxt->hseen)) + HeVAL(he) = &PL_sv_undef; + + hv_iterinit(cxt->hclass); + while (he = hv_iternext(cxt->hclass)) + HeVAL(he) = &PL_sv_undef; + + /* + * And now dispose of them... + */ + + hv_undef(cxt->hseen); + sv_free((SV *) cxt->hseen); + + hv_undef(cxt->hclass); + sv_free((SV *) cxt->hclass); + + hv_undef(cxt->hook); + sv_free((SV *) cxt->hook); + + cxt->entry = 0; + cxt->dirty = 0; +} + +/* + * init_retrieve_context + * + * Initialize a new retrieve context for real recursion. + */ +static void init_retrieve_context(cxt, optype) +stcxt_t *cxt; +int optype; +{ + TRACEME(("init_retrieve_context")); + + /* + * The hook hash table is used to keep track of the references on + * the STORABLE_thaw hook routines, when found in some class name. + * + * It is assumed that the inheritance tree will not be changed during + * storing, and that no new method will be dynamically created by the + * hooks. + */ + + cxt->hook = newHV(); /* Caches STORABLE_thaw */ + + /* + * If retrieving an old binary version, the cxt->retrieve_vtbl variable + * was set to sv_old_retrieve. We'll need a hash table to keep track of + * the correspondance between the tags and the tag number used by the + * new retrieve routines. + */ + + cxt->hseen = (cxt->retrieve_vtbl == sv_old_retrieve) ? newHV() : 0; + + cxt->aseen = newAV(); /* Where retrieved objects are kept */ + cxt->aclass = newAV(); /* Where seen classnames are kept */ + cxt->tagnum = 0; /* Have to count objects... */ + cxt->classnum = 0; /* ...and class names as well */ + cxt->optype = optype; + cxt->entry = 1; /* No recursion yet */ +} + +/* + * clean_retrieve_context + * + * Clean retrieve context by + */ +static void clean_retrieve_context(cxt) +stcxt_t *cxt; +{ + TRACEME(("clean_retrieve_context")); + + ASSERT(cxt->optype & ST_RETRIEVE, ("was performing a retrieve()")); + + av_undef(cxt->aseen); + sv_free((SV *) cxt->aseen); + + av_undef(cxt->aclass); + sv_free((SV *) cxt->aclass); + + hv_undef(cxt->hook); + sv_free((SV *) cxt->hook); + + if (cxt->hseen) + sv_free((SV *) cxt->hseen); /* optional HV, for backward compat. */ + + cxt->entry = 0; + cxt->dirty = 0; +} + +/* + * clean_context + * + * A workaround for the CROAK bug: cleanup the last context. + */ +static void clean_context(cxt) +stcxt_t *cxt; +{ + TRACEME(("clean_context")); + + ASSERT(cxt->dirty, ("dirty context")); + + if (cxt->optype & ST_RETRIEVE) + clean_retrieve_context(cxt); + else + clean_store_context(cxt); +} + +/* + * allocate_context + * + * Allocate a new context and push it on top of the parent one. + * This new context is made globally visible via SET_STCXT(). + */ +static stcxt_t *allocate_context(parent_cxt) +stcxt_t *parent_cxt; +{ + stcxt_t *cxt; + + TRACEME(("allocate_context")); + + ASSERT(!parent_cxt->dirty, ("parent context clean")); + + Newz(0, cxt, 1, stcxt_t); + cxt->prev = parent_cxt; + SET_STCXT(cxt); + + return cxt; +} + +/* + * free_context + * + * Free current context, which cannot be the "root" one. + * Make the context underneath globally visible via SET_STCXT(). + */ +static void free_context(cxt) +stcxt_t *cxt; +{ + stcxt_t *prev = cxt->prev; + + TRACEME(("free_context")); + + ASSERT(!cxt->dirty, ("clean context")); + ASSERT(prev, ("not freeing root context")); + + if (kbuf) + Safefree(kbuf); + if (mbase) + Safefree(mbase); + + Safefree(cxt); + SET_STCXT(prev); +} + +/*** + *** Predicates. + ***/ + +/* + * is_storing + * + * Tells whether we're in the middle of a store operation. + */ +int is_storing() +{ + dSTCXT; + + return cxt->entry && (cxt->optype & ST_STORE); +} + +/* + * is_retrieving + * + * Tells whether we're in the middle of a retrieve operation. + */ +int is_retrieving() +{ + dSTCXT; + + return cxt->entry && (cxt->optype & ST_RETRIEVE); +} + +/* + * last_op_in_netorder + * + * Returns whether last operation was made using network order. + * + * This is typically out-of-band information that might prove useful + * to people wishing to convert native to network order data when used. + */ +int last_op_in_netorder() +{ + dSTCXT; + + return cxt->netorder; +} + +/*** + *** Hook lookup and calling routines. + ***/ + +/* + * pkg_fetchmeth + * + * A wrapper on gv_fetchmethod_autoload() which caches results. + * + * Returns the routine reference as an SV*, or null if neither the package + * nor its ancestors know about the method. + */ +static SV *pkg_fetchmeth(cache, pkg, method) +HV *cache; +HV *pkg; +char *method; +{ + GV *gv; + SV *sv; + SV **svh; + + /* + * The following code is the same as the one performed by UNIVERSAL::can + * in the Perl core. + */ + + gv = gv_fetchmethod_autoload(pkg, method, FALSE); + if (gv && isGV(gv)) { + sv = newRV((SV*) GvCV(gv)); + TRACEME(("%s->%s: 0x%lx", HvNAME(pkg), method, (unsigned long) sv)); + } else { + sv = newSVsv(&PL_sv_undef); + TRACEME(("%s->%s: not found", HvNAME(pkg), method)); + } + + /* + * Cache the result, ignoring failure: if we can't store the value, + * it just won't be cached. + */ + + (void) hv_store(cache, HvNAME(pkg), strlen(HvNAME(pkg)), sv, 0); + + return SvOK(sv) ? sv : (SV *) 0; +} + +/* + * pkg_hide + * + * Force cached value to be undef: hook ignored even if present. + */ +static void pkg_hide(cache, pkg, method) +HV *cache; +HV *pkg; +char *method; +{ + (void) hv_store(cache, + HvNAME(pkg), strlen(HvNAME(pkg)), newSVsv(&PL_sv_undef), 0); +} + +/* + * pkg_can + * + * Our own "UNIVERSAL::can", which caches results. + * + * Returns the routine reference as an SV*, or null if the object does not + * know about the method. + */ +static SV *pkg_can(cache, pkg, method) +HV *cache; +HV *pkg; +char *method; +{ + SV **svh; + SV *sv; + + TRACEME(("pkg_can for %s->%s", HvNAME(pkg), method)); + + /* + * Look into the cache to see whether we already have determined + * where the routine was, if any. + * + * NOTA BENE: we don't use `method' at all in our lookup, since we know + * that only one hook (i.e. always the same) is cached in a given cache. + */ + + svh = hv_fetch(cache, HvNAME(pkg), strlen(HvNAME(pkg)), FALSE); + if (svh) { + sv = *svh; + if (!SvOK(sv)) { + TRACEME(("cached %s->%s: not found", HvNAME(pkg), method)); + return (SV *) 0; + } else { + TRACEME(("cached %s->%s: 0x%lx", HvNAME(pkg), method, + (unsigned long) sv)); + return sv; + } + } + + TRACEME(("not cached yet")); + return pkg_fetchmeth(cache, pkg, method); /* Fetch and cache */ +} + +/* + * scalar_call + * + * Call routine as obj->hook(av) in scalar context. + * Propagates the single returned value if not called in void context. + */ +static SV *scalar_call(obj, hook, cloning, av, flags) +SV *obj; +SV *hook; +int cloning; +AV *av; +I32 flags; +{ + dSP; + int count; + SV *sv = 0; + + TRACEME(("scalar_call (cloning=%d)", cloning)); + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + XPUSHs(obj); + XPUSHs(sv_2mortal(newSViv(cloning))); /* Cloning flag */ + if (av) { + SV **ary = AvARRAY(av); + int cnt = AvFILLp(av) + 1; + int i; + XPUSHs(ary[0]); /* Frozen string */ + for (i = 1; i < cnt; i++) { + TRACEME(("pushing arg #%d (0x%lx)...", i, (unsigned long) ary[i])); + XPUSHs(sv_2mortal(newRV(ary[i]))); + } + } + PUTBACK; + + TRACEME(("calling...")); + count = perl_call_sv(hook, flags); /* Go back to Perl code */ + TRACEME(("count = %d", count)); + + SPAGAIN; + + if (count) { + sv = POPs; + SvREFCNT_inc(sv); /* We're returning it, must stay alive! */ + } + + PUTBACK; + FREETMPS; + LEAVE; + + return sv; +} + +/* + * array_call + * + * Call routine obj->hook(cloning) in array context. + * Returns the list of returned values in an array. + */ +static AV *array_call(obj, hook, cloning) +SV *obj; +SV *hook; +int cloning; +{ + dSP; + int count; + AV *av; + int i; + + TRACEME(("arrary_call (cloning=%d), cloning")); + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + XPUSHs(obj); /* Target object */ + XPUSHs(sv_2mortal(newSViv(cloning))); /* Cloning flag */ + PUTBACK; + + count = perl_call_sv(hook, G_ARRAY); /* Go back to Perl code */ + + SPAGAIN; + + av = newAV(); + for (i = count - 1; i >= 0; i--) { + SV *sv = POPs; + av_store(av, i, SvREFCNT_inc(sv)); + } + + PUTBACK; + FREETMPS; + LEAVE; + + return av; +} + +/* + * known_class + * + * Lookup the class name in the `hclass' table and either assign it a new ID + * or return the existing one, by filling in `classnum'. + * + * Return true if the class was known, false if the ID was just generated. + */ +static int known_class(cxt, name, len, classnum) +stcxt_t *cxt; +char *name; /* Class name */ +int len; /* Name length */ +I32 *classnum; +{ + SV **svh; + HV *hclass = cxt->hclass; + + TRACEME(("known_class (%s)", name)); + + /* + * Recall that we don't store pointers in this hash table, but tags. + * Therefore, we need LOW_32BITS() to extract the relevant parts. + */ + + svh = hv_fetch(hclass, name, len, FALSE); + if (svh) { + *classnum = LOW_32BITS(*svh); + return TRUE; + } + + /* + * Unknown classname, we need to record it. + * The (IV) cast below is for 64-bit machines, to avoid compiler warnings. + */ + + cxt->classnum++; + if (!hv_store(hclass, name, len, (SV*)(IV) cxt->classnum, 0)) + CROAK(("Unable to record new classname")); + + *classnum = cxt->classnum; + return FALSE; +} + +/*** + *** Sepcific store routines. + ***/ + +/* + * store_ref + * + * Store a reference. + * Layout is SX_REF <object> or SX_OVERLOAD <object>. + */ +static int store_ref(cxt, sv) +stcxt_t *cxt; +SV *sv; +{ + TRACEME(("store_ref (0x%lx)", (unsigned long) sv)); + + /* + * Follow reference, and check if target is overloaded. + */ + + sv = SvRV(sv); + + if (SvOBJECT(sv)) { + HV *stash = (HV *) SvSTASH(sv); + if (stash && Gv_AMG(stash)) { + TRACEME(("ref (0x%lx) is overloaded", (unsigned long) sv)); + PUTMARK(SX_OVERLOAD); + } else + PUTMARK(SX_REF); + } else + PUTMARK(SX_REF); + + return store(cxt, sv); +} + +/* + * store_scalar + * + * Store a scalar. + * + * Layout is SX_LSCALAR <length> <data>, SX_SCALAR <lenght> <data> or SX_UNDEF. + * The <data> section is omitted if <length> is 0. + * + * If integer or double, the layout is SX_INTEGER <data> or SX_DOUBLE <data>. + * Small integers (within [-127, +127]) are stored as SX_BYTE <byte>. + */ +static int store_scalar(cxt, sv) +stcxt_t *cxt; +SV *sv; +{ + IV iv; + char *pv; + STRLEN len; + U32 flags = SvFLAGS(sv); /* "cc -O" may put it in register */ + + TRACEME(("store_scalar (0x%lx)", (unsigned long) sv)); + + /* + * For efficiency, break the SV encapsulation by peaking at the flags + * directly without using the Perl macros to avoid dereferencing + * sv->sv_flags each time we wish to check the flags. + */ + + if (!(flags & SVf_OK)) { /* !SvOK(sv) */ + if (sv == &PL_sv_undef) { + TRACEME(("immortal undef")); + PUTMARK(SX_SV_UNDEF); + } else { + TRACEME(("undef at 0x%x", sv)); + PUTMARK(SX_UNDEF); + } + return 0; + } + + /* + * Always store the string representation of a scalar if it exists. + * Gisle Aas provided me with this test case, better than a long speach: + * + * perl -MDevel::Peek -le '$a="abc"; $a+0; Dump($a)' + * SV = PVNV(0x80c8520) + * REFCNT = 1 + * FLAGS = (NOK,POK,pNOK,pPOK) + * IV = 0 + * NV = 0 + * PV = 0x80c83d0 "abc"\0 + * CUR = 3 + * LEN = 4 + * + * Write SX_SCALAR, length, followed by the actual data. + * + * Otherwise, write an SX_BYTE, SX_INTEGER or an SX_DOUBLE as + * appropriate, followed by the actual (binary) data. A double + * is written as a string if network order, for portability. + * + * NOTE: instead of using SvNOK(sv), we test for SvNOKp(sv). + * The reason is that when the scalar value is tainted, the SvNOK(sv) + * value is false. + * + * The test for a read-only scalar with both POK and NOK set is meant + * to quickly detect &PL_sv_yes and &PL_sv_no without having to pay the + * address comparison for each scalar we store. + */ + +#define SV_MAYBE_IMMORTAL (SVf_READONLY|SVf_POK|SVf_NOK) + + if ((flags & SV_MAYBE_IMMORTAL) == SV_MAYBE_IMMORTAL) { + if (sv == &PL_sv_yes) { + TRACEME(("immortal yes")); + PUTMARK(SX_SV_YES); + } else if (sv == &PL_sv_no) { + TRACEME(("immortal no")); + PUTMARK(SX_SV_NO); + } else { + pv = SvPV(sv, len); /* We know it's SvPOK */ + goto string; /* Share code below */ + } + } else if (flags & SVp_POK) { /* SvPOKp(sv) => string */ + pv = SvPV(sv, len); + + /* + * Will come here from below with pv and len set if double & netorder, + * or from above if it was readonly, POK and NOK but neither &PL_sv_yes + * nor &PL_sv_no. + */ + string: + + STORE_SCALAR(pv, len); + TRACEME(("ok (scalar 0x%lx '%s', length = %d)", + (unsigned long) sv, SvPVX(sv), len)); + + } else if (flags & SVp_NOK) { /* SvNOKp(sv) => double */ + double nv = SvNV(sv); + + /* + * Watch for number being an integer in disguise. + */ + if (nv == (double) (iv = I_V(nv))) { + TRACEME(("double %lf is actually integer %ld", nv, iv)); + goto integer; /* Share code below */ + } + + if (cxt->netorder) { + TRACEME(("double %lf stored as string", nv)); + pv = SvPV(sv, len); + goto string; /* Share code above */ + } + + PUTMARK(SX_DOUBLE); + WRITE(&nv, sizeof(nv)); + + TRACEME(("ok (double 0x%lx, value = %lf)", (unsigned long) sv, nv)); + + } else if (flags & SVp_IOK) { /* SvIOKp(sv) => integer */ + iv = SvIV(sv); + + /* + * Will come here from above with iv set if double is an integer. + */ + integer: + + /* + * Optimize small integers into a single byte, otherwise store as + * a real integer (converted into network order if they asked). + */ + + if (iv >= -128 && iv <= 127) { + unsigned char siv = (unsigned char) (iv + 128); /* [0,255] */ + PUTMARK(SX_BYTE); + PUTMARK(siv); + TRACEME(("small integer stored as %d", siv)); + } else if (cxt->netorder) { + int niv; +#ifdef HAS_HTONL + niv = (int) htonl(iv); + TRACEME(("using network order")); +#else + niv = (int) iv; + TRACEME(("as-is for network order")); +#endif + PUTMARK(SX_NETINT); + WRITE(&niv, sizeof(niv)); + } else { + PUTMARK(SX_INTEGER); + WRITE(&iv, sizeof(iv)); + } + + TRACEME(("ok (integer 0x%lx, value = %d)", (unsigned long) sv, iv)); + + } else + CROAK(("Can't determine type of %s(0x%lx)", sv_reftype(sv, FALSE), + (unsigned long) sv)); + + return 0; /* Ok, no recursion on scalars */ +} + +/* + * store_array + * + * Store an array. + * + * Layout is SX_ARRAY <size> followed by each item, in increading index order. + * Each item is stored as <object>. + */ +static int store_array(cxt, av) +stcxt_t *cxt; +AV *av; +{ + SV **sav; + I32 len = av_len(av) + 1; + I32 i; + int ret; + + TRACEME(("store_array (0x%lx)", (unsigned long) av)); + + /* + * Signal array by emitting SX_ARRAY, followed by the array length. + */ + + PUTMARK(SX_ARRAY); + WLEN(len); + TRACEME(("size = %d", len)); + + /* + * Now store each item recursively. + */ + + for (i = 0; i < len; i++) { + sav = av_fetch(av, i, 0); + if (!sav) { + TRACEME(("(#%d) undef item", i)); + STORE_UNDEF(); + continue; + } + TRACEME(("(#%d) item", i)); + if (ret = store(cxt, *sav)) + return ret; + } + + TRACEME(("ok (array)")); + + return 0; +} + +/* + * sortcmp + * + * Sort two SVs + * Borrowed from perl source file pp_ctl.c, where it is used by pp_sort. + */ +static int +sortcmp(a, b) +const void *a; +const void *b; +{ + return sv_cmp(*(SV * const *) a, *(SV * const *) b); +} + + +/* + * store_hash + * + * Store an hash table. + * + * Layout is SX_HASH <size> followed by each key/value pair, in random order. + * Values are stored as <object>. + * Keys are stored as <length> <data>, the <data> section being omitted + * if length is 0. + */ +static int store_hash(cxt, hv) +stcxt_t *cxt; +HV *hv; +{ + I32 len = HvKEYS(hv); + I32 i; + int ret = 0; + I32 riter; + HE *eiter; + + TRACEME(("store_hash (0x%lx)", (unsigned long) hv)); + + /* + * Signal hash by emitting SX_HASH, followed by the table length. + */ + + PUTMARK(SX_HASH); + WLEN(len); + TRACEME(("size = %d", len)); + + /* + * Save possible iteration state via each() on that table. + */ + + riter = HvRITER(hv); + eiter = HvEITER(hv); + hv_iterinit(hv); + + /* + * Now store each item recursively. + * + * If canonical is defined to some true value then store each + * key/value pair in sorted order otherwise the order is random. + * Canonical order is irrelevant when a deep clone operation is performed. + * + * Fetch the value from perl only once per store() operation, and only + * when needed. + */ + + if ( + !(cxt->optype & ST_CLONE) && (cxt->canonical == 1 || + (cxt->canonical < 0 && (cxt->canonical = + SvTRUE(perl_get_sv("Storable::canonical", TRUE)) ? 1 : 0))) + ) { + /* + * Storing in order, sorted by key. + * Run through the hash, building up an array of keys in a + * mortal array, sort the array and then run through the + * array. + */ + + AV *av = newAV(); + + TRACEME(("using canonical order")); + + for (i = 0; i < len; i++) { + HE *he = hv_iternext(hv); + SV *key = hv_iterkeysv(he); + av_store(av, AvFILLp(av)+1, key); /* av_push(), really */ + } + + qsort((char *) AvARRAY(av), len, sizeof(SV *), sortcmp); + + for (i = 0; i < len; i++) { + char *keyval; + I32 keylen; + SV *key = av_shift(av); + HE *he = hv_fetch_ent(hv, key, 0, 0); + SV *val = HeVAL(he); + if (val == 0) + return 1; /* Internal error, not I/O error */ + + /* + * Store value first. + */ + + TRACEME(("(#%d) value 0x%lx", i, (unsigned long) val)); + + if (ret = store(cxt, val)) + goto out; + + /* + * Write key string. + * Keys are written after values to make sure retrieval + * can be optimal in terms of memory usage, where keys are + * read into a fixed unique buffer called kbuf. + * See retrieve_hash() for details. + */ + + keyval = hv_iterkey(he, &keylen); + TRACEME(("(#%d) key '%s'", i, keyval)); + WLEN(keylen); + if (keylen) + WRITE(keyval, keylen); + } + + /* + * Free up the temporary array + */ + + av_undef(av); + sv_free((SV *) av); + + } else { + + /* + * Storing in "random" order (in the order the keys are stored + * within the the hash). This is the default and will be faster! + */ + + for (i = 0; i < len; i++) { + char *key; + I32 len; + SV *val = hv_iternextsv(hv, &key, &len); + + if (val == 0) + return 1; /* Internal error, not I/O error */ + + /* + * Store value first. + */ + + TRACEME(("(#%d) value 0x%lx", i, (unsigned long) val)); + + if (ret = store(cxt, val)) + goto out; + + /* + * Write key string. + * Keys are written after values to make sure retrieval + * can be optimal in terms of memory usage, where keys are + * read into a fixed unique buffer called kbuf. + * See retrieve_hash() for details. + */ + + TRACEME(("(#%d) key '%s'", i, key)); + WLEN(len); + if (len) + WRITE(key, len); + } + } + + TRACEME(("ok (hash 0x%lx)", (unsigned long) hv)); + +out: + HvRITER(hv) = riter; /* Restore hash iterator state */ + HvEITER(hv) = eiter; + + return ret; +} + +/* + * store_tied + * + * When storing a tied object (be it a tied scalar, array or hash), we lay out + * a special mark, followed by the underlying tied object. For instance, when + * dealing with a tied hash, we store SX_TIED_HASH <hash object>, where + * <hash object> stands for the serialization of the tied hash. + */ +static int store_tied(cxt, sv) +stcxt_t *cxt; +SV *sv; +{ + MAGIC *mg; + int ret = 0; + int svt = SvTYPE(sv); + char mtype = 'P'; + + TRACEME(("store_tied (0x%lx)", (unsigned long) sv)); + + /* + * We have a small run-time penalty here because we chose to factorise + * all tieds objects into the same routine, and not have a store_tied_hash, + * a store_tied_array, etc... + * + * Don't use a switch() statement, as most compilers don't optimize that + * well for 2/3 values. An if() else if() cascade is just fine. We put + * tied hashes first, as they are the most likely beasts. + */ + + if (svt == SVt_PVHV) { + TRACEME(("tied hash")); + PUTMARK(SX_TIED_HASH); /* Introduces tied hash */ + } else if (svt == SVt_PVAV) { + TRACEME(("tied array")); + PUTMARK(SX_TIED_ARRAY); /* Introduces tied array */ + } else { + TRACEME(("tied scalar")); + PUTMARK(SX_TIED_SCALAR); /* Introduces tied scalar */ + mtype = 'q'; + } + + if (!(mg = mg_find(sv, mtype))) + CROAK(("No magic '%c' found while storing tied %s", mtype, + (svt == SVt_PVHV) ? "hash" : + (svt == SVt_PVAV) ? "array" : "scalar")); + + /* + * The mg->mg_obj found by mg_find() above actually points to the + * underlying tied Perl object implementation. For instance, if the + * original SV was that of a tied array, then mg->mg_obj is an AV. + * + * Note that we store the Perl object as-is. We don't call its FETCH + * method along the way. At retrieval time, we won't call its STORE + * method either, but the tieing magic will be re-installed. In itself, + * that ensures that the tieing semantics are preserved since futher + * accesses on the retrieved object will indeed call the magic methods... + */ + + if (ret = store(cxt, mg->mg_obj)) + return ret; + + TRACEME(("ok (tied)")); + + return 0; +} + +/* + * store_tied_item + * + * Stores a reference to an item within a tied structure: + * + * . \$h{key}, stores both the (tied %h) object and 'key'. + * . \$a[idx], stores both the (tied @a) object and 'idx'. + * + * Layout is therefore either: + * SX_TIED_KEY <object> <key> + * SX_TIED_IDX <object> <index> + */ +static int store_tied_item(cxt, sv) +stcxt_t *cxt; +SV *sv; +{ + MAGIC *mg; + int ret; + + TRACEME(("store_tied_item (0x%lx)", (unsigned long) sv)); + + if (!(mg = mg_find(sv, 'p'))) + CROAK(("No magic 'p' found while storing reference to tied item")); + + /* + * We discriminate between \$h{key} and \$a[idx] via mg_ptr. + */ + + if (mg->mg_ptr) { + TRACEME(("store_tied_item: storing a ref to a tied hash item")); + PUTMARK(SX_TIED_KEY); + TRACEME(("store_tied_item: storing OBJ 0x%lx", + (unsigned long) mg->mg_obj)); + + if (ret = store(cxt, mg->mg_obj)) + return ret; + + TRACEME(("store_tied_item: storing PTR 0x%lx", + (unsigned long) mg->mg_ptr)); + + if (ret = store(cxt, (SV *) mg->mg_ptr)) + return ret; + } else { + I32 idx = mg->mg_len; + + TRACEME(("store_tied_item: storing a ref to a tied array item ")); + PUTMARK(SX_TIED_IDX); + TRACEME(("store_tied_item: storing OBJ 0x%lx", + (unsigned long) mg->mg_obj)); + + if (ret = store(cxt, mg->mg_obj)) + return ret; + + TRACEME(("store_tied_item: storing IDX %d", idx)); + + WLEN(idx); + } + + TRACEME(("ok (tied item)")); + + return 0; +} + +/* + * store_hook -- dispatched manually, not via sv_store[] + * + * The blessed SV is serialized by a hook. + * + * Simple Layout is: + * + * SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>] + * + * where <flags> indicates how long <len>, <len2> and <len3> are, whether + * the trailing part [] is present, the type of object (scalar, array or hash). + * There is also a bit which says how the classname is stored between: + * + * <len> <classname> + * <index> + * + * and when the <index> form is used (classname already seen), the "large + * classname" bit in <flags> indicates how large the <index> is. + * + * The serialized string returned by the hook is of length <len2> and comes + * next. It is an opaque string for us. + * + * Those <len3> object IDs which are listed last represent the extra references + * not directly serialized by the hook, but which are linked to the object. + * + * When recursion is mandated to resolve object-IDs not yet seen, we have + * instead, with <header> being flags with bits set to indicate the object type + * and that recursion was indeed needed: + * + * SX_HOOK <header> <object> <header> <object> <flags> + * + * that same header being repeated between serialized objects obtained through + * recursion, until we reach flags indicating no recursion, at which point + * we know we've resynchronized with a single layout, after <flags>. + */ +static int store_hook(cxt, sv, type, pkg, hook) +stcxt_t *cxt; +SV *sv; +HV *pkg; +SV *hook; +{ + I32 len; + char *class; + STRLEN len2; + SV *ref; + AV *av; + SV **ary; + int count; /* really len3 + 1 */ + unsigned char flags; + char *pv; + int i; + int recursed = 0; /* counts recursion */ + int obj_type; /* object type, on 2 bits */ + I32 classnum; + int ret; + int clone = cxt->optype & ST_CLONE; + + TRACEME(("store_hook, class \"%s\", tagged #%d", HvNAME(pkg), cxt->tagnum)); + + /* + * Determine object type on 2 bits. + */ + + switch (type) { + case svis_SCALAR: + obj_type = SHT_SCALAR; + break; + case svis_ARRAY: + obj_type = SHT_ARRAY; + break; + case svis_HASH: + obj_type = SHT_HASH; + break; + default: + CROAK(("Unexpected object type (%d) in store_hook()", type)); + } + flags = SHF_NEED_RECURSE | obj_type; + + class = HvNAME(pkg); + len = strlen(class); + + /* + * To call the hook, we need to fake a call like: + * + * $object->STORABLE_freeze($cloning); + * + * but we don't have the $object here. For instance, if $object is + * a blessed array, what we have in `sv' is the array, and we can't + * call a method on those. + * + * Therefore, we need to create a temporary reference to the object and + * make the call on that reference. + */ + + TRACEME(("about to call STORABLE_freeze on class %s", class)); + + ref = newRV_noinc(sv); /* Temporary reference */ + av = array_call(ref, hook, clone); /* @a = $object->STORABLE_freeze($c) */ + SvRV(ref) = 0; + SvREFCNT_dec(ref); /* Reclaim temporary reference */ + + count = AvFILLp(av) + 1; + TRACEME(("store_hook, array holds %d items", count)); + + /* + * If they return an empty list, it means they wish to ignore the + * hook for this class (and not just this instance -- that's for them + * to handle if they so wish). + * + * Simply disable the cached entry for the hook (it won't be recomputed + * since it's present in the cache) and recurse to store_blessed(). + */ + + if (!count) { + /* + * They must not change their mind in the middle of a serialization. + */ + + if (hv_fetch(cxt->hclass, class, len, FALSE)) + CROAK(("Too late to ignore hooks for %s class \"%s\"", + (cxt->optype & ST_CLONE) ? "cloning" : "storing", class)); + + pkg_hide(cxt->hook, pkg, "STORABLE_freeze"); + + ASSERT(!pkg_can(cxt->hook, pkg, "STORABLE_freeze"), ("hook invisible")); + TRACEME(("Ignoring STORABLE_freeze in class \"%s\"", class)); + + return store_blessed(cxt, sv, type, pkg); + } + + /* + * Get frozen string. + */ + + ary = AvARRAY(av); + pv = SvPV(ary[0], len2); + + /* + * Allocate a class ID if not already done. + */ + + if (!known_class(cxt, class, len, &classnum)) { + TRACEME(("first time we see class %s, ID = %d", class, classnum)); + classnum = -1; /* Mark: we must store classname */ + } else { + TRACEME(("already seen class %s, ID = %d", class, classnum)); + } + + /* + * If they returned more than one item, we need to serialize some + * extra references if not already done. + * + * Loop over the array, starting at postion #1, and for each item, + * ensure it is a reference, serialize it if not already done, and + * replace the entry with the tag ID of the corresponding serialized + * object. + * + * We CHEAT by not calling av_fetch() and read directly within the + * array, for speed. + */ + + for (i = 1; i < count; i++) { + SV **svh; + SV *xsv = ary[i]; + + if (!SvROK(xsv)) + CROAK(("Item #%d from hook in %s is not a reference", i, class)); + xsv = SvRV(xsv); /* Follow ref to know what to look for */ + + /* + * Look in hseen and see if we have a tag already. + * Serialize entry if not done already, and get its tag. + */ + + if (svh = hv_fetch(cxt->hseen, (char *) &xsv, sizeof(xsv), FALSE)) + goto sv_seen; /* Avoid moving code too far to the right */ + + TRACEME(("listed object %d at 0x%lx is unknown", + i-1, (unsigned long) xsv)); + + /* + * We need to recurse to store that object and get it to be known + * so that we can resolve the list of object-IDs at retrieve time. + * + * The first time we do this, we need to emit the proper header + * indicating that we recursed, and what the type of object is (the + * object we're storing via a user-hook). Indeed, during retrieval, + * we'll have to create the object before recursing to retrieve the + * others, in case those would point back at that object. + */ + + /* [SX_HOOK] <flags> <object>*/ + if (!recursed++) + PUTMARK(SX_HOOK); + PUTMARK(flags); + + if (ret = store(cxt, xsv)) /* Given by hook for us to store */ + return ret; + + svh = hv_fetch(cxt->hseen, (char *) &xsv, sizeof(xsv), FALSE); + if (!svh) + CROAK(("Could not serialize item #%d from hook in %s", i, class)); + + /* + * Replace entry with its tag (not a real SV, so no refcnt increment) + */ + + sv_seen: + SvREFCNT_dec(xsv); + ary[i] = *svh; + TRACEME(("listed object %d at 0x%lx is tag #%d", + i-1, (unsigned long) xsv, (I32) *svh)); + } + + /* + * Compute leading flags. + */ + + flags = obj_type; + if (((classnum == -1) ? len : classnum) > LG_SCALAR) + flags |= SHF_LARGE_CLASSLEN; + if (classnum != -1) + flags |= SHF_IDX_CLASSNAME; + if (len2 > LG_SCALAR) + flags |= SHF_LARGE_STRLEN; + if (count > 1) + flags |= SHF_HAS_LIST; + if (count > (LG_SCALAR + 1)) + flags |= SHF_LARGE_LISTLEN; + + /* + * We're ready to emit either serialized form: + * + * SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>] + * SX_HOOK <flags> <index> <len2> <str> [<len3> <object-IDs>] + * + * If we recursed, the SX_HOOK has already been emitted. + */ + + TRACEME(("SX_HOOK (recursed=%d) flags=0x%x class=%d len=%d len2=%d len3=%d", + recursed, flags, classnum, len, len2, count-1)); + + /* SX_HOOK <flags> */ + if (!recursed) + PUTMARK(SX_HOOK); + PUTMARK(flags); + + /* <len> <classname> or <index> */ + if (flags & SHF_IDX_CLASSNAME) { + if (flags & SHF_LARGE_CLASSLEN) + WLEN(classnum); + else { + unsigned char cnum = (unsigned char) classnum; + PUTMARK(cnum); + } + } else { + if (flags & SHF_LARGE_CLASSLEN) + WLEN(len); + else { + unsigned char clen = (unsigned char) len; + PUTMARK(clen); + } + WRITE(class, len); /* Final \0 is omitted */ + } + + /* <len2> <frozen-str> */ + if (flags & SHF_LARGE_STRLEN) + WLEN(len2); + else { + unsigned char clen = (unsigned char) len2; + PUTMARK(clen); + } + if (len2) + WRITE(pv, len2); /* Final \0 is omitted */ + + /* [<len3> <object-IDs>] */ + if (flags & SHF_HAS_LIST) { + int len3 = count - 1; + if (flags & SHF_LARGE_LISTLEN) + WLEN(len3); + else { + unsigned char clen = (unsigned char) len3; + PUTMARK(clen); + } + + /* + * NOTA BENE, for 64-bit machines: the ary[i] below does not yield a + * real pointer, rather a tag number, well under the 32-bit limit. + */ + + for (i = 1; i < count; i++) { + I32 tagval = htonl(LOW_32BITS(ary[i])); + WRITE(&tagval, sizeof(I32)); + TRACEME(("object %d, tag #%d", i-1, ntohl(tagval))); + } + } + + /* + * Free the array. We need extra care for indices after 0, since they + * don't hold real SVs but integers cast. + */ + + if (count > 1) + AvFILLp(av) = 0; /* Cheat, nothing after 0 interests us */ + av_undef(av); + sv_free((SV *) av); + + return 0; +} + +/* + * store_blessed -- dispatched manually, not via sv_store[] + * + * Check whether there is a STORABLE_xxx hook defined in the class or in one + * of its ancestors. If there is, then redispatch to store_hook(); + * + * Otherwise, the blessed SV is stored using the following layout: + * + * SX_BLESS <flag> <len> <classname> <object> + * + * where <flag> indicates whether <len> is stored on 0 or 4 bytes, depending + * on the high-order bit in flag: if 1, then length follows on 4 bytes. + * Otherwise, the low order bits give the length, thereby giving a compact + * representation for class names less than 127 chars long. + * + * Each <classname> seen is remembered and indexed, so that the next time + * an object in the blessed in the same <classname> is stored, the following + * will be emitted: + * + * SX_IX_BLESS <flag> <index> <object> + * + * where <index> is the classname index, stored on 0 or 4 bytes depending + * on the high-order bit in flag (same encoding as above for <len>). + */ +static int store_blessed(cxt, sv, type, pkg) +stcxt_t *cxt; +SV *sv; +int type; +HV *pkg; +{ + SV *hook; + I32 len; + char *class; + I32 classnum; + + TRACEME(("store_blessed, type %d, class \"%s\"", type, HvNAME(pkg))); + + /* + * Look for a hook for this blessed SV and redirect to store_hook() + * if needed. + */ + + hook = pkg_can(cxt->hook, pkg, "STORABLE_freeze"); + if (hook) + return store_hook(cxt, sv, type, pkg, hook); + + /* + * This is a blessed SV without any serialization hook. + */ + + class = HvNAME(pkg); + len = strlen(class); + + TRACEME(("blessed 0x%lx in %s, no hook: tagged #%d", + (unsigned long) sv, class, cxt->tagnum)); + + /* + * Determine whether it is the first time we see that class name (in which + * case it will be stored in the SX_BLESS form), or whether we already + * saw that class name before (in which case the SX_IX_BLESS form will be + * used). + */ + + if (known_class(cxt, class, len, &classnum)) { + TRACEME(("already seen class %s, ID = %d", class, classnum)); + PUTMARK(SX_IX_BLESS); + if (classnum <= LG_BLESS) { + unsigned char cnum = (unsigned char) classnum; + PUTMARK(cnum); + } else { + unsigned char flag = (unsigned char) 0x80; + PUTMARK(flag); + WLEN(classnum); + } + } else { + TRACEME(("first time we see class %s, ID = %d", class, classnum)); + PUTMARK(SX_BLESS); + if (len <= LG_BLESS) { + unsigned char clen = (unsigned char) len; + PUTMARK(clen); + } else { + unsigned char flag = (unsigned char) 0x80; + PUTMARK(flag); + WLEN(len); /* Don't BER-encode, this should be rare */ + } + WRITE(class, len); /* Final \0 is omitted */ + } + + /* + * Now emit the <object> part. + */ + + return SV_STORE(type)(cxt, sv); +} + +/* + * store_other + * + * We don't know how to store the item we reached, so return an error condition. + * (it's probably a GLOB, some CODE reference, etc...) + * + * If they defined the `forgive_me' variable at the Perl level to some + * true value, then don't croak, just warn, and store a placeholder string + * instead. + */ +static int store_other(cxt, sv) +stcxt_t *cxt; +SV *sv; +{ + STRLEN len; + static char buf[80]; + + TRACEME(("store_other")); + + /* + * Fetch the value from perl only once per store() operation. + */ + + if ( + cxt->forgive_me == 0 || + (cxt->forgive_me < 0 && !(cxt->forgive_me = + SvTRUE(perl_get_sv("Storable::forgive_me", TRUE)) ? 1 : 0)) + ) + CROAK(("Can't store %s items", sv_reftype(sv, FALSE))); + + warn("Can't store item %s(0x%lx)", + sv_reftype(sv, FALSE), (unsigned long) sv); + + /* + * Store placeholder string as a scalar instead... + */ + + (void) sprintf(buf, "You lost %s(0x%lx)\0", sv_reftype(sv, FALSE), + (unsigned long) sv); + + len = strlen(buf); + STORE_SCALAR(buf, len); + TRACEME(("ok (dummy \"%s\", length = %d)", buf, len)); + + return 0; +} + +/*** + *** Store driving routines + ***/ + +/* + * sv_type + * + * WARNING: partially duplicates Perl's sv_reftype for speed. + * + * Returns the type of the SV, identified by an integer. That integer + * may then be used to index the dynamic routine dispatch table. + */ +static int sv_type(sv) +SV *sv; +{ + switch (SvTYPE(sv)) { + case SVt_NULL: + case SVt_IV: + case SVt_NV: + /* + * No need to check for ROK, that can't be set here since there + * is no field capable of hodling the xrv_rv reference. + */ + return svis_SCALAR; + case SVt_PV: + case SVt_RV: + case SVt_PVIV: + case SVt_PVNV: + /* + * Starting from SVt_PV, it is possible to have the ROK flag + * set, the pointer to the other SV being either stored in + * the xrv_rv (in the case of a pure SVt_RV), or as the + * xpv_pv field of an SVt_PV and its heirs. + * + * However, those SV cannot be magical or they would be an + * SVt_PVMG at least. + */ + return SvROK(sv) ? svis_REF : svis_SCALAR; + case SVt_PVMG: + case SVt_PVLV: /* Workaround for perl5.004_04 "LVALUE" bug */ + if (SvRMAGICAL(sv) && (mg_find(sv, 'p'))) + return svis_TIED_ITEM; + /* FALL THROUGH */ + case SVt_PVBM: + if (SvRMAGICAL(sv) && (mg_find(sv, 'q'))) + return svis_TIED; + return SvROK(sv) ? svis_REF : svis_SCALAR; + case SVt_PVAV: + if (SvRMAGICAL(sv) && (mg_find(sv, 'P'))) + return svis_TIED; + return svis_ARRAY; + case SVt_PVHV: + if (SvRMAGICAL(sv) && (mg_find(sv, 'P'))) + return svis_TIED; + return svis_HASH; + default: + break; + } + + return svis_OTHER; +} + +/* + * store + * + * Recursively store objects pointed to by the sv to the specified file. + * + * Layout is <content> or SX_OBJECT <tagnum> if we reach an already stored + * object (one for which storage has started -- it may not be over if we have + * a self-referenced structure). This data set forms a stored <object>. + */ +static int store(cxt, sv) +stcxt_t *cxt; +SV *sv; +{ + SV **svh; + int ret; + SV *tag; + int type; + HV *hseen = cxt->hseen; + + TRACEME(("store (0x%lx)", (unsigned long) sv)); + + /* + * If object has already been stored, do not duplicate data. + * Simply emit the SX_OBJECT marker followed by its tag data. + * The tag is always written in network order. + * + * NOTA BENE, for 64-bit machines: the "*svh" below does not yield a + * real pointer, rather a tag number (watch the insertion code below). + * That means it pobably safe to assume it is well under the 32-bit limit, + * and makes the truncation safe. + * -- RAM, 14/09/1999 + */ + + svh = hv_fetch(hseen, (char *) &sv, sizeof(sv), FALSE); + if (svh) { + I32 tagval = htonl(LOW_32BITS(*svh)); + + TRACEME(("object 0x%lx seen as #%d", + (unsigned long) sv, ntohl(tagval))); + + PUTMARK(SX_OBJECT); + WRITE(&tagval, sizeof(I32)); + return 0; + } + + /* + * Allocate a new tag and associate it with the address of the sv being + * stored, before recursing... + * + * In order to avoid creating new SvIVs to hold the tagnum we just + * cast the tagnum to a SV pointer and store that in the hash. This + * means that we must clean up the hash manually afterwards, but gives + * us a 15% throughput increase. + * + * The (IV) cast below is for 64-bit machines, to avoid warnings from + * the compiler. Please, let me know if it does not work. + * -- RAM, 14/09/1999 + */ + + cxt->tagnum++; + if (!hv_store(hseen, + (char *) &sv, sizeof(sv), (SV*)(IV) cxt->tagnum, 0)) + return -1; + + /* + * Store `sv' and everything beneath it, using appropriate routine. + * Abort immediately if we get a non-zero status back. + */ + + type = sv_type(sv); + + TRACEME(("storing 0x%lx tag #%d, type %d...", + (unsigned long) sv, cxt->tagnum, type)); + + if (SvOBJECT(sv)) { + HV *pkg = SvSTASH(sv); + ret = store_blessed(cxt, sv, type, pkg); + } else + ret = SV_STORE(type)(cxt, sv); + + TRACEME(("%s (stored 0x%lx, refcnt=%d, %s)", + ret ? "FAILED" : "ok", (unsigned long) sv, + SvREFCNT(sv), sv_reftype(sv, FALSE))); + + return ret; +} + +/* + * magic_write + * + * Write magic number and system information into the file. + * Layout is <magic> <network> [<len> <byteorder> <sizeof int> <sizeof long> + * <sizeof ptr>] where <len> is the length of the byteorder hexa string. + * All size and lenghts are written as single characters here. + * + * Note that no byte ordering info is emitted when <network> is true, since + * integers will be emitted in network order in that case. + */ +static int magic_write(cxt) +stcxt_t *cxt; +{ + char buf[256]; /* Enough room for 256 hexa digits */ + unsigned char c; + int use_network_order = cxt->netorder; + + TRACEME(("magic_write on fd=%d", cxt->fio ? fileno(cxt->fio) : -1)); + + if (cxt->fio) + WRITE(magicstr, strlen(magicstr)); /* Don't write final \0 */ + + /* + * Starting with 0.6, the "use_network_order" byte flag is also used to + * indicate the version number of the binary image, encoded in the upper + * bits. The bit 0 is always used to indicate network order. + */ + + c = (unsigned char) + ((use_network_order ? 0x1 : 0x0) | (STORABLE_BIN_MAJOR << 1)); + PUTMARK(c); + + /* + * Starting with 0.7, a full byte is dedicated to the minor version of + * the binary format, which is incremented only when new markers are + * introduced, for instance, but when backward compatibility is preserved. + */ + + PUTMARK((unsigned char) STORABLE_BIN_MINOR); + + if (use_network_order) + return 0; /* Don't bother with byte ordering */ + + sprintf(buf, "%lx", (unsigned long) BYTEORDER); + c = (unsigned char) strlen(buf); + PUTMARK(c); + WRITE(buf, (unsigned int) c); /* Don't write final \0 */ + PUTMARK((unsigned char) sizeof(int)); + PUTMARK((unsigned char) sizeof(long)); + PUTMARK((unsigned char) sizeof(char *)); + + TRACEME(("ok (magic_write byteorder = 0x%lx [%d], I%d L%d P%d)", + (unsigned long) BYTEORDER, (int) c, + sizeof(int), sizeof(long), sizeof(char *))); + + return 0; +} + +/* + * do_store + * + * Common code for store operations. + * + * When memory store is requested (f = NULL) and a non null SV* is given in + * `res', it is filled with a new SV created out of the memory buffer. + * + * It is required to provide a non-null `res' when the operation type is not + * dclone() and store() is performed to memory. + */ +static int do_store(f, sv, optype, network_order, res) +PerlIO *f; +SV *sv; +int optype; +int network_order; +SV **res; +{ + dSTCXT; + int status; + + ASSERT(!(f == 0 && !(optype & ST_CLONE)) || res, + ("must supply result SV pointer for real recursion to memory")); + + TRACEME(("do_store (optype=%d, netorder=%d)", + optype, network_order)); + + optype |= ST_STORE; + + /* + * Workaround for CROAK leak: if they enter with a "dirty" context, + * free up memory for them now. + */ + + if (cxt->dirty) + clean_context(cxt); + + /* + * Now that STORABLE_xxx hooks exist, it is possible that they try to + * re-enter store() via the hooks. We need to stack contexts. + */ + + if (cxt->entry) + cxt = allocate_context(cxt); + + cxt->entry++; + + ASSERT(cxt->entry == 1, ("starting new recursion")); + ASSERT(!cxt->dirty, ("clean context")); + + /* + * Ensure sv is actually a reference. From perl, we called something + * like: + * pstore(FILE, \@array); + * so we must get the scalar value behing that reference. + */ + + if (!SvROK(sv)) + CROAK(("Not a reference")); + sv = SvRV(sv); /* So follow it to know what to store */ + + /* + * If we're going to store to memory, reset the buffer. + */ + + if (!f) + MBUF_INIT(0); + + /* + * Prepare context and emit headers. + */ + + init_store_context(cxt, f, optype, network_order); + + if (-1 == magic_write(cxt)) /* Emit magic and ILP info */ + return 0; /* Error */ + + /* + * Recursively store object... + */ + + ASSERT(is_storing(), ("within store operation")); + + status = store(cxt, sv); /* Just do it! */ + + /* + * If they asked for a memory store and they provided an SV pointer, + * make an SV string out of the buffer and fill their pointer. + * + * When asking for ST_REAL, it's MANDATORY for the caller to provide + * an SV, since context cleanup might free the buffer if we did recurse. + * (unless caller is dclone(), which is aware of that). + */ + + if (!cxt->fio && res) + *res = mbuf2sv(); + + /* + * Final cleanup. + * + * The "root" context is never freed, since it is meant to be always + * handy for the common case where no recursion occurs at all (i.e. + * we enter store() outside of any Storable code and leave it, period). + * We know it's the "root" context because there's nothing stacked + * underneath it. + * + * OPTIMIZATION: + * + * When deep cloning, we don't free the context: doing so would force + * us to copy the data in the memory buffer. Sicne we know we're + * about to enter do_retrieve... + */ + + clean_store_context(cxt); + if (cxt->prev && !(cxt->optype & ST_CLONE)) + free_context(cxt); + + TRACEME(("do_store returns %d", status)); + + return status == 0; +} + +/* + * pstore + * + * Store the transitive data closure of given object to disk. + * Returns 0 on error, a true value otherwise. + */ +int pstore(f, sv) +PerlIO *f; +SV *sv; +{ + TRACEME(("pstore")); + return do_store(f, sv, 0, FALSE, Nullsv); + +} + +/* + * net_pstore + * + * Same as pstore(), but network order is used for integers and doubles are + * emitted as strings. + */ +int net_pstore(f, sv) +PerlIO *f; +SV *sv; +{ + TRACEME(("net_pstore")); + return do_store(f, sv, 0, TRUE, Nullsv); +} + +/*** + *** Memory stores. + ***/ + +/* + * mbuf2sv + * + * Build a new SV out of the content of the internal memory buffer. + */ +static SV *mbuf2sv() +{ + dSTCXT; + + return newSVpv(mbase, MBUF_SIZE()); +} + +/* + * mstore + * + * Store the transitive data closure of given object to memory. + * Returns undef on error, a scalar value containing the data otherwise. + */ +SV *mstore(sv) +SV *sv; +{ + dSTCXT; + SV *out; + + TRACEME(("mstore")); + + if (!do_store(0, sv, 0, FALSE, &out)) + return &PL_sv_undef; + + return out; +} + +/* + * net_mstore + * + * Same as mstore(), but network order is used for integers and doubles are + * emitted as strings. + */ +SV *net_mstore(sv) +SV *sv; +{ + dSTCXT; + SV *out; + + TRACEME(("net_mstore")); + + if (!do_store(0, sv, 0, TRUE, &out)) + return &PL_sv_undef; + + return out; +} + +/*** + *** Specific retrieve callbacks. + ***/ + +/* + * retrieve_other + * + * Return an error via croak, since it is not possible that we get here + * under normal conditions, when facing a file produced via pstore(). + */ +static SV *retrieve_other(cxt) +stcxt_t *cxt; +{ + if ( + cxt->ver_major != STORABLE_BIN_MAJOR && + cxt->ver_minor != STORABLE_BIN_MINOR + ) { + CROAK(("Corrupted storable %s (binary v%d.%d), current is v%d.%d", + cxt->fio ? "file" : "string", + cxt->ver_major, cxt->ver_minor, + STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR)); + } else { + CROAK(("Corrupted storable %s (binary v%d.%d)", + cxt->fio ? "file" : "string", + cxt->ver_major, cxt->ver_minor)); + } + + return (SV *) 0; /* Just in case */ +} + +/* + * retrieve_idx_blessed + * + * Layout is SX_IX_BLESS <index> <object> with SX_IX_BLESS already read. + * <index> can be coded on either 1 or 5 bytes. + */ +static SV *retrieve_idx_blessed(cxt) +stcxt_t *cxt; +{ + I32 idx; + char *class; + SV **sva; + SV *sv; + + TRACEME(("retrieve_idx_blessed (#%d)", cxt->tagnum)); + + GETMARK(idx); /* Index coded on a single char? */ + if (idx & 0x80) + RLEN(idx); + + /* + * Fetch classname in `aclass' + */ + + sva = av_fetch(cxt->aclass, idx, FALSE); + if (!sva) + CROAK(("Class name #%d should have been seen already", idx)); + + class = SvPVX(*sva); /* We know it's a PV, by construction */ + + TRACEME(("class ID %d => %s", idx, class)); + + /* + * Retrieve object and bless it. + */ + + sv = retrieve(cxt); + if (sv) + BLESS(sv, class); + + return sv; +} + +/* + * retrieve_blessed + * + * Layout is SX_BLESS <len> <classname> <object> with SX_BLESS already read. + * <len> can be coded on either 1 or 5 bytes. + */ +static SV *retrieve_blessed(cxt) +stcxt_t *cxt; +{ + I32 len; + SV *sv; + char buf[LG_BLESS + 1]; /* Avoid malloc() if possible */ + char *class = buf; + + TRACEME(("retrieve_blessed (#%d)", cxt->tagnum)); + + /* + * Decode class name length and read that name. + * + * Short classnames have two advantages: their length is stored on one + * single byte, and the string can be read on the stack. + */ + + GETMARK(len); /* Length coded on a single char? */ + if (len & 0x80) { + RLEN(len); + TRACEME(("** allocating %d bytes for class name", len+1)); + New(10003, class, len+1, char); + } + READ(class, len); + class[len] = '\0'; /* Mark string end */ + + /* + * It's a new classname, otherwise it would have been an SX_IX_BLESS. + */ + + if (!av_store(cxt->aclass, cxt->classnum++, newSVpvn(class, len))) + return (SV *) 0; + + /* + * Retrieve object and bless it. + */ + + sv = retrieve(cxt); + if (sv) { + BLESS(sv, class); + if (class != buf) + Safefree(class); + } + + return sv; +} + +/* + * retrieve_hook + * + * Layout: SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>] + * with leading mark already read, as usual. + * + * When recursion was involved during serialization of the object, there + * is an unknown amount of serialized objects after the SX_HOOK mark. Until + * we reach a <flags> marker with the recursion bit cleared. + */ +static SV *retrieve_hook(cxt) +stcxt_t *cxt; +{ + I32 len; + char buf[LG_BLESS + 1]; /* Avoid malloc() if possible */ + char *class = buf; + unsigned int flags; + I32 len2; + SV *frozen; + I32 len3 = 0; + AV *av = 0; + SV *hook; + SV *sv; + SV *rv; + int obj_type; + I32 classname; + int clone = cxt->optype & ST_CLONE; + + TRACEME(("retrieve_hook (#%d)", cxt->tagnum)); + + /* + * Read flags, which tell us about the type, and whether we need to recurse. + */ + + GETMARK(flags); + + /* + * Create the (empty) object, and mark it as seen. + * + * This must be done now, because tags are incremented, and during + * serialization, the object tag was affected before recursion could + * take place. + */ + + obj_type = flags & SHF_TYPE_MASK; + switch (obj_type) { + case SHT_SCALAR: + sv = newSV(0); + break; + case SHT_ARRAY: + sv = (SV *) newAV(); + break; + case SHT_HASH: + sv = (SV *) newHV(); + break; + default: + return retrieve_other(cxt); /* Let it croak */ + } + SEEN(sv); + + /* + * Whilst flags tell us to recurse, do so. + * + * We don't need to remember the addresses returned by retrieval, because + * all the references will be obtained through indirection via the object + * tags in the object-ID list. + */ + + while (flags & SHF_NEED_RECURSE) { + TRACEME(("retrieve_hook recursing...")); + rv = retrieve(cxt); + if (!rv) + return (SV *) 0; + TRACEME(("retrieve_hook back with rv=0x%lx", (unsigned long) rv)); + GETMARK(flags); + } + + if (flags & SHF_IDX_CLASSNAME) { + SV **sva; + I32 idx; + + /* + * Fetch index from `aclass' + */ + + if (flags & SHF_LARGE_CLASSLEN) + RLEN(idx); + else + GETMARK(idx); + + sva = av_fetch(cxt->aclass, idx, FALSE); + if (!sva) + CROAK(("Class name #%d should have been seen already", idx)); + + class = SvPVX(*sva); /* We know it's a PV, by construction */ + TRACEME(("class ID %d => %s", idx, class)); + + } else { + /* + * Decode class name length and read that name. + * + * NOTA BENE: even if the length is stored on one byte, we don't read + * on the stack. Just like retrieve_blessed(), we limit the name to + * LG_BLESS bytes. This is an arbitrary decision. + */ + + if (flags & SHF_LARGE_CLASSLEN) + RLEN(len); + else + GETMARK(len); + + if (len > LG_BLESS) { + TRACEME(("** allocating %d bytes for class name", len+1)); + New(10003, class, len+1, char); + } + + READ(class, len); + class[len] = '\0'; /* Mark string end */ + + /* + * Record new classname. + */ + + if (!av_store(cxt->aclass, cxt->classnum++, newSVpvn(class, len))) + return (SV *) 0; + } + + TRACEME(("class name: %s", class)); + + /* + * Decode user-frozen string length and read it in a SV. + * + * For efficiency reasons, we read data directly into the SV buffer. + * To understand that code, read retrieve_scalar() + */ + + if (flags & SHF_LARGE_STRLEN) + RLEN(len2); + else + GETMARK(len2); + + frozen = NEWSV(10002, len2); + if (len2) { + SAFEREAD(SvPVX(frozen), len2, frozen); + SvCUR_set(frozen, len2); + *SvEND(frozen) = '\0'; + } + (void) SvPOK_only(frozen); /* Validates string pointer */ + SvTAINT(frozen); + + TRACEME(("frozen string: %d bytes", len2)); + + /* + * Decode object-ID list length, if present. + */ + + if (flags & SHF_HAS_LIST) { + if (flags & SHF_LARGE_LISTLEN) + RLEN(len3); + else + GETMARK(len3); + if (len3) { + av = newAV(); + av_extend(av, len3 + 1); /* Leave room for [0] */ + AvFILLp(av) = len3; /* About to be filled anyway */ + } + } + + TRACEME(("has %d object IDs to link", len3)); + + /* + * Read object-ID list into array. + * Because we pre-extended it, we can cheat and fill it manually. + * + * We read object tags and we can convert them into SV* on the fly + * because we know all the references listed in there (as tags) + * have been already serialized, hence we have a valid correspondance + * between each of those tags and the recreated SV. + */ + + if (av) { + SV **ary = AvARRAY(av); + int i; + for (i = 1; i <= len3; i++) { /* We leave [0] alone */ + I32 tag; + SV **svh; + SV *xsv; + + READ(&tag, sizeof(I32)); + tag = ntohl(tag); + svh = av_fetch(cxt->aseen, tag, FALSE); + if (!svh) + CROAK(("Object #%d should have been retrieved already", tag)); + xsv = *svh; + ary[i] = SvREFCNT_inc(xsv); + } + } + + /* + * Bless the object and look up the STORABLE_thaw hook. + */ + + BLESS(sv, class); + hook = pkg_can(cxt->hook, SvSTASH(sv), "STORABLE_thaw"); + if (!hook) + CROAK(("No STORABLE_thaw defined for objects of class %s", class)); + + /* + * If we don't have an `av' yet, prepare one. + * Then insert the frozen string as item [0]. + */ + + if (!av) { + av = newAV(); + av_extend(av, 1); + AvFILLp(av) = 0; + } + AvARRAY(av)[0] = SvREFCNT_inc(frozen); + + /* + * Call the hook as: + * + * $object->STORABLE_thaw($cloning, $frozen, @refs); + * + * where $object is our blessed (empty) object, $cloning is a boolean + * telling whether we're running a deep clone, $frozen is the frozen + * string the user gave us in his serializing hook, and @refs, which may + * be empty, is the list of extra references he returned along for us + * to serialize. + * + * In effect, the hook is an alternate creation routine for the class, + * the object itself being already created by the runtime. + */ + + TRACEME(("calling STORABLE_thaw on %s at 0x%lx (%d args)", + class, (unsigned long) sv, AvFILLp(av) + 1)); + + rv = newRV(sv); + (void) scalar_call(rv, hook, clone, av, G_SCALAR|G_DISCARD); + SvREFCNT_dec(rv); + + /* + * Final cleanup. + */ + + SvREFCNT_dec(frozen); + av_undef(av); + sv_free((SV *) av); + if (!(flags & SHF_IDX_CLASSNAME) && class != buf) + Safefree(class); + + return sv; +} + +/* + * retrieve_ref + * + * Retrieve reference to some other scalar. + * Layout is SX_REF <object>, with SX_REF already read. + */ +static SV *retrieve_ref(cxt) +stcxt_t *cxt; +{ + SV *rv; + SV *sv; + + TRACEME(("retrieve_ref (#%d)", cxt->tagnum)); + + /* + * We need to create the SV that holds the reference to the yet-to-retrieve + * object now, so that we may record the address in the seen table. + * Otherwise, if the object to retrieve references us, we won't be able + * to resolve the SX_OBJECT we'll see at that point! Hence we cannot + * do the retrieve first and use rv = newRV(sv) since it will be too late + * for SEEN() recording. + */ + + rv = NEWSV(10002, 0); + SEEN(rv); /* Will return if rv is null */ + sv = retrieve(cxt); /* Retrieve <object> */ + if (!sv) + return (SV *) 0; /* Failed */ + + /* + * WARNING: breaks RV encapsulation. + * + * Now for the tricky part. We have to upgrade our existing SV, so that + * it is now an RV on sv... Again, we cheat by duplicating the code + * held in newSVrv(), since we already got our SV from retrieve(). + * + * We don't say: + * + * SvRV(rv) = SvREFCNT_inc(sv); + * + * here because the reference count we got from retrieve() above is + * already correct: if the object was retrieved from the file, then + * its reference count is one. Otherwise, if it was retrieved via + * an SX_OBJECT indication, a ref count increment was done. + */ + + sv_upgrade(rv, SVt_RV); + SvRV(rv) = sv; /* $rv = \$sv */ + SvROK_on(rv); + + TRACEME(("ok (retrieve_ref at 0x%lx)", (unsigned long) rv)); + + return rv; +} + +/* + * retrieve_overloaded + * + * Retrieve reference to some other scalar with overloading. + * Layout is SX_OVERLOAD <object>, with SX_OVERLOAD already read. + */ +static SV *retrieve_overloaded(cxt) +stcxt_t *cxt; +{ + SV *rv; + SV *sv; + HV *stash; + + TRACEME(("retrieve_overloaded (#%d)", cxt->tagnum)); + + /* + * Same code as retrieve_ref(), duplicated to avoid extra call. + */ + + rv = NEWSV(10002, 0); + SEEN(rv); /* Will return if rv is null */ + sv = retrieve(cxt); /* Retrieve <object> */ + if (!sv) + return (SV *) 0; /* Failed */ + + /* + * WARNING: breaks RV encapsulation. + */ + + sv_upgrade(rv, SVt_RV); + SvRV(rv) = sv; /* $rv = \$sv */ + SvROK_on(rv); + + /* + * Restore overloading magic. + */ + + stash = (HV *) SvSTASH (sv); + if (!stash || !Gv_AMG(stash)) + CROAK(("Cannot restore overloading on %s(0x%lx)", sv_reftype(sv, FALSE), + (unsigned long) sv)); + + SvAMAGIC_on(rv); + + TRACEME(("ok (retrieve_overloaded at 0x%lx)", (unsigned long) rv)); + + return rv; +} + +/* + * retrieve_tied_array + * + * Retrieve tied array + * Layout is SX_TIED_ARRAY <object>, with SX_TIED_ARRAY already read. + */ +static SV *retrieve_tied_array(cxt) +stcxt_t *cxt; +{ + SV *tv; + SV *sv; + + TRACEME(("retrieve_tied_array (#%d)", cxt->tagnum)); + + tv = NEWSV(10002, 0); + SEEN(tv); /* Will return if tv is null */ + sv = retrieve(cxt); /* Retrieve <object> */ + if (!sv) + return (SV *) 0; /* Failed */ + + sv_upgrade(tv, SVt_PVAV); + AvREAL_off((AV *)tv); + sv_magic(tv, sv, 'P', Nullch, 0); + SvREFCNT_dec(sv); /* Undo refcnt inc from sv_magic() */ + + TRACEME(("ok (retrieve_tied_array at 0x%lx)", (unsigned long) tv)); + + return tv; +} + +/* + * retrieve_tied_hash + * + * Retrieve tied hash + * Layout is SX_TIED_HASH <object>, with SX_TIED_HASH already read. + */ +static SV *retrieve_tied_hash(cxt) +stcxt_t *cxt; +{ + SV *tv; + SV *sv; + + TRACEME(("retrieve_tied_hash (#%d)", cxt->tagnum)); + + tv = NEWSV(10002, 0); + SEEN(tv); /* Will return if tv is null */ + sv = retrieve(cxt); /* Retrieve <object> */ + if (!sv) + return (SV *) 0; /* Failed */ + + sv_upgrade(tv, SVt_PVHV); + sv_magic(tv, sv, 'P', Nullch, 0); + SvREFCNT_dec(sv); /* Undo refcnt inc from sv_magic() */ + + TRACEME(("ok (retrieve_tied_hash at 0x%lx)", (unsigned long) tv)); + + return tv; +} + +/* + * retrieve_tied_scalar + * + * Retrieve tied scalar + * Layout is SX_TIED_SCALAR <object>, with SX_TIED_SCALAR already read. + */ +static SV *retrieve_tied_scalar(cxt) +stcxt_t *cxt; +{ + SV *tv; + SV *sv; + + TRACEME(("retrieve_tied_scalar (#%d)", cxt->tagnum)); + + tv = NEWSV(10002, 0); + SEEN(tv); /* Will return if rv is null */ + sv = retrieve(cxt); /* Retrieve <object> */ + if (!sv) + return (SV *) 0; /* Failed */ + + sv_upgrade(tv, SVt_PVMG); + sv_magic(tv, sv, 'q', Nullch, 0); + SvREFCNT_dec(sv); /* Undo refcnt inc from sv_magic() */ + + TRACEME(("ok (retrieve_tied_scalar at 0x%lx)", (unsigned long) tv)); + + return tv; +} + +/* + * retrieve_tied_key + * + * Retrieve reference to value in a tied hash. + * Layout is SX_TIED_KEY <object> <key>, with SX_TIED_KEY already read. + */ +static SV *retrieve_tied_key(cxt) +stcxt_t *cxt; +{ + SV *tv; + SV *sv; + SV *key; + + TRACEME(("retrieve_tied_key (#%d)", cxt->tagnum)); + + tv = NEWSV(10002, 0); + SEEN(tv); /* Will return if tv is null */ + sv = retrieve(cxt); /* Retrieve <object> */ + if (!sv) + return (SV *) 0; /* Failed */ + + key = retrieve(cxt); /* Retrieve <key> */ + if (!key) + return (SV *) 0; /* Failed */ + + sv_upgrade(tv, SVt_PVMG); + sv_magic(tv, sv, 'p', (char *)key, HEf_SVKEY); + SvREFCNT_dec(key); /* Undo refcnt inc from sv_magic() */ + SvREFCNT_dec(sv); /* Undo refcnt inc from sv_magic() */ + + return tv; +} + +/* + * retrieve_tied_idx + * + * Retrieve reference to value in a tied array. + * Layout is SX_TIED_IDX <object> <idx>, with SX_TIED_IDX already read. + */ +static SV *retrieve_tied_idx(cxt) +stcxt_t *cxt; +{ + SV *tv; + SV *sv; + I32 idx; + + TRACEME(("retrieve_tied_idx (#%d)", cxt->tagnum)); + + tv = NEWSV(10002, 0); + SEEN(tv); /* Will return if tv is null */ + sv = retrieve(cxt); /* Retrieve <object> */ + if (!sv) + return (SV *) 0; /* Failed */ + + RLEN(idx); /* Retrieve <idx> */ + + sv_upgrade(tv, SVt_PVMG); + sv_magic(tv, sv, 'p', Nullch, idx); + SvREFCNT_dec(sv); /* Undo refcnt inc from sv_magic() */ + + return tv; +} + + +/* + * retrieve_lscalar + * + * Retrieve defined long (string) scalar. + * + * Layout is SX_LSCALAR <length> <data>, with SX_LSCALAR already read. + * The scalar is "long" in that <length> is larger than LG_SCALAR so it + * was not stored on a single byte. + */ +static SV *retrieve_lscalar(cxt) +stcxt_t *cxt; +{ + STRLEN len; + SV *sv; + + RLEN(len); + TRACEME(("retrieve_lscalar (#%d), len = %d", cxt->tagnum, len)); + + /* + * Allocate an empty scalar of the suitable length. + */ + + sv = NEWSV(10002, len); + SEEN(sv); /* Associate this new scalar with tag "tagnum" */ + + /* + * WARNING: duplicates parts of sv_setpv and breaks SV data encapsulation. + * + * Now, for efficiency reasons, read data directly inside the SV buffer, + * and perform the SV final settings directly by duplicating the final + * work done by sv_setpv. Since we're going to allocate lots of scalars + * this way, it's worth the hassle and risk. + */ + + SAFEREAD(SvPVX(sv), len, sv); + SvCUR_set(sv, len); /* Record C string length */ + *SvEND(sv) = '\0'; /* Ensure it's null terminated anyway */ + (void) SvPOK_only(sv); /* Validate string pointer */ + SvTAINT(sv); /* External data cannot be trusted */ + + TRACEME(("large scalar len %d '%s'", len, SvPVX(sv))); + TRACEME(("ok (retrieve_lscalar at 0x%lx)", (unsigned long) sv)); + + return sv; +} + +/* + * retrieve_scalar + * + * Retrieve defined short (string) scalar. + * + * Layout is SX_SCALAR <length> <data>, with SX_SCALAR already read. + * The scalar is "short" so <length> is single byte. If it is 0, there + * is no <data> section. + */ +static SV *retrieve_scalar(cxt) +stcxt_t *cxt; +{ + int len; + SV *sv; + + GETMARK(len); + TRACEME(("retrieve_scalar (#%d), len = %d", cxt->tagnum, len)); + + /* + * Allocate an empty scalar of the suitable length. + */ + + sv = NEWSV(10002, len); + SEEN(sv); /* Associate this new scalar with tag "tagnum" */ + + /* + * WARNING: duplicates parts of sv_setpv and breaks SV data encapsulation. + */ + + if (len == 0) { + /* + * newSV did not upgrade to SVt_PV so the scalar is undefined. + * To make it defined with an empty length, upgrade it now... + */ + sv_upgrade(sv, SVt_PV); + SvGROW(sv, 1); + *SvEND(sv) = '\0'; /* Ensure it's null terminated anyway */ + TRACEME(("ok (retrieve_scalar empty at 0x%lx)", (unsigned long) sv)); + } else { + /* + * Now, for efficiency reasons, read data directly inside the SV buffer, + * and perform the SV final settings directly by duplicating the final + * work done by sv_setpv. Since we're going to allocate lots of scalars + * this way, it's worth the hassle and risk. + */ + SAFEREAD(SvPVX(sv), len, sv); + SvCUR_set(sv, len); /* Record C string length */ + *SvEND(sv) = '\0'; /* Ensure it's null terminated anyway */ + TRACEME(("small scalar len %d '%s'", len, SvPVX(sv))); + } + + (void) SvPOK_only(sv); /* Validate string pointer */ + SvTAINT(sv); /* External data cannot be trusted */ + + TRACEME(("ok (retrieve_scalar at 0x%lx)", (unsigned long) sv)); + return sv; +} + +/* + * retrieve_integer + * + * Retrieve defined integer. + * Layout is SX_INTEGER <data>, whith SX_INTEGER already read. + */ +static SV *retrieve_integer(cxt) +stcxt_t *cxt; +{ + SV *sv; + IV iv; + + TRACEME(("retrieve_integer (#%d)", cxt->tagnum)); + + READ(&iv, sizeof(iv)); + sv = newSViv(iv); + SEEN(sv); /* Associate this new scalar with tag "tagnum" */ + + TRACEME(("integer %d", iv)); + TRACEME(("ok (retrieve_integer at 0x%lx)", (unsigned long) sv)); + + return sv; +} + +/* + * retrieve_netint + * + * Retrieve defined integer in network order. + * Layout is SX_NETINT <data>, whith SX_NETINT already read. + */ +static SV *retrieve_netint(cxt) +stcxt_t *cxt; +{ + SV *sv; + int iv; + + TRACEME(("retrieve_netint (#%d)", cxt->tagnum)); + + READ(&iv, sizeof(iv)); +#ifdef HAS_NTOHL + sv = newSViv((int) ntohl(iv)); + TRACEME(("network integer %d", (int) ntohl(iv))); +#else + sv = newSViv(iv); + TRACEME(("network integer (as-is) %d", iv)); +#endif + SEEN(sv); /* Associate this new scalar with tag "tagnum" */ + + TRACEME(("ok (retrieve_netint at 0x%lx)", (unsigned long) sv)); + + return sv; +} + +/* + * retrieve_double + * + * Retrieve defined double. + * Layout is SX_DOUBLE <data>, whith SX_DOUBLE already read. + */ +static SV *retrieve_double(cxt) +stcxt_t *cxt; +{ + SV *sv; + double nv; + + TRACEME(("retrieve_double (#%d)", cxt->tagnum)); + + READ(&nv, sizeof(nv)); + sv = newSVnv(nv); + SEEN(sv); /* Associate this new scalar with tag "tagnum" */ + + TRACEME(("double %lf", nv)); + TRACEME(("ok (retrieve_double at 0x%lx)", (unsigned long) sv)); + + return sv; +} + +/* + * retrieve_byte + * + * Retrieve defined byte (small integer within the [-128, +127] range). + * Layout is SX_BYTE <data>, whith SX_BYTE already read. + */ +static SV *retrieve_byte(cxt) +stcxt_t *cxt; +{ + SV *sv; + int siv; + + TRACEME(("retrieve_byte (#%d)", cxt->tagnum)); + + GETMARK(siv); + TRACEME(("small integer read as %d", (unsigned char) siv)); + sv = newSViv((unsigned char) siv - 128); + SEEN(sv); /* Associate this new scalar with tag "tagnum" */ + + TRACEME(("byte %d", (unsigned char) siv - 128)); + TRACEME(("ok (retrieve_byte at 0x%lx)", (unsigned long) sv)); + + return sv; +} + +/* + * retrieve_undef + * + * Return the undefined value. + */ +static SV *retrieve_undef(cxt) +stcxt_t *cxt; +{ + SV* sv; + + TRACEME(("retrieve_undef")); + + sv = newSV(0); + SEEN(sv); + + return sv; +} + +/* + * retrieve_sv_undef + * + * Return the immortal undefined value. + */ +static SV *retrieve_sv_undef(cxt) +stcxt_t *cxt; +{ + SV *sv = &PL_sv_undef; + + TRACEME(("retrieve_sv_undef")); + + SEEN(sv); + return sv; +} + +/* + * retrieve_sv_yes + * + * Return the immortal yes value. + */ +static SV *retrieve_sv_yes(cxt) +stcxt_t *cxt; +{ + SV *sv = &PL_sv_yes; + + TRACEME(("retrieve_sv_yes")); + + SEEN(sv); + return sv; +} + +/* + * retrieve_sv_no + * + * Return the immortal no value. + */ +static SV *retrieve_sv_no(cxt) +stcxt_t *cxt; +{ + SV *sv = &PL_sv_no; + + TRACEME(("retrieve_sv_no")); + + SEEN(sv); + return sv; +} + +/* + * retrieve_array + * + * Retrieve a whole array. + * Layout is SX_ARRAY <size> followed by each item, in increading index order. + * Each item is stored as <object>. + * + * When we come here, SX_ARRAY has been read already. + */ +static SV *retrieve_array(cxt) +stcxt_t *cxt; +{ + I32 len; + I32 i; + AV *av; + SV *sv; + + TRACEME(("retrieve_array (#%d)", cxt->tagnum)); + + /* + * Read length, and allocate array, then pre-extend it. + */ + + RLEN(len); + TRACEME(("size = %d", len)); + av = newAV(); + SEEN(av); /* Will return if array not allocated nicely */ + if (len) + av_extend(av, len); + else + return (SV *) av; /* No data follow if array is empty */ + + /* + * Now get each item in turn... + */ + + for (i = 0; i < len; i++) { + TRACEME(("(#%d) item", i)); + sv = retrieve(cxt); /* Retrieve item */ + if (!sv) + return (SV *) 0; + if (av_store(av, i, sv) == 0) + return (SV *) 0; + } + + TRACEME(("ok (retrieve_array at 0x%lx)", (unsigned long) av)); + + return (SV *) av; +} + +/* + * retrieve_hash + * + * Retrieve a whole hash table. + * Layout is SX_HASH <size> followed by each key/value pair, in random order. + * Keys are stored as <length> <data>, the <data> section being omitted + * if length is 0. + * Values are stored as <object>. + * + * When we come here, SX_HASH has been read already. + */ +static SV *retrieve_hash(cxt) +stcxt_t *cxt; +{ + I32 len; + I32 size; + I32 i; + HV *hv; + SV *sv; + static SV *sv_h_undef = (SV *) 0; /* hv_store() bug */ + + TRACEME(("retrieve_hash (#%d)", cxt->tagnum)); + + /* + * Read length, allocate table. + */ + + RLEN(len); + TRACEME(("size = %d", len)); + hv = newHV(); + SEEN(hv); /* Will return if table not allocated properly */ + if (len == 0) + return (SV *) hv; /* No data follow if table empty */ + + /* + * Now get each key/value pair in turn... + */ + + for (i = 0; i < len; i++) { + /* + * Get value first. + */ + + TRACEME(("(#%d) value", i)); + sv = retrieve(cxt); + if (!sv) + return (SV *) 0; + + /* + * Get key. + * Since we're reading into kbuf, we must ensure we're not + * recursing between the read and the hv_store() where it's used. + * Hence the key comes after the value. + */ + + RLEN(size); /* Get key size */ + KBUFCHK(size); /* Grow hash key read pool if needed */ + if (size) + READ(kbuf, size); + kbuf[size] = '\0'; /* Mark string end, just in case */ + TRACEME(("(#%d) key '%s'", i, kbuf)); + + /* + * Enter key/value pair into hash table. + */ + + if (hv_store(hv, kbuf, (U32) size, sv, 0) == 0) + return (SV *) 0; + } + + TRACEME(("ok (retrieve_hash at 0x%lx)", (unsigned long) hv)); + + return (SV *) hv; +} + +/* + * old_retrieve_array + * + * Retrieve a whole array in pre-0.6 binary format. + * + * Layout is SX_ARRAY <size> followed by each item, in increading index order. + * Each item is stored as SX_ITEM <object> or SX_IT_UNDEF for "holes". + * + * When we come here, SX_ARRAY has been read already. + */ +static SV *old_retrieve_array(cxt) +stcxt_t *cxt; +{ + I32 len; + I32 i; + AV *av; + SV *sv; + int c; + + TRACEME(("old_retrieve_array (#%d)", cxt->tagnum)); + + /* + * Read length, and allocate array, then pre-extend it. + */ + + RLEN(len); + TRACEME(("size = %d", len)); + av = newAV(); + SEEN(av); /* Will return if array not allocated nicely */ + if (len) + av_extend(av, len); + else + return (SV *) av; /* No data follow if array is empty */ + + /* + * Now get each item in turn... + */ + + for (i = 0; i < len; i++) { + GETMARK(c); + if (c == SX_IT_UNDEF) { + TRACEME(("(#%d) undef item", i)); + continue; /* av_extend() already filled us with undef */ + } + if (c != SX_ITEM) + (void) retrieve_other(0); /* Will croak out */ + TRACEME(("(#%d) item", i)); + sv = retrieve(cxt); /* Retrieve item */ + if (!sv) + return (SV *) 0; + if (av_store(av, i, sv) == 0) + return (SV *) 0; + } + + TRACEME(("ok (old_retrieve_array at 0x%lx)", (unsigned long) av)); + + return (SV *) av; +} + +/* + * old_retrieve_hash + * + * Retrieve a whole hash table in pre-0.6 binary format. + * + * Layout is SX_HASH <size> followed by each key/value pair, in random order. + * Keys are stored as SX_KEY <length> <data>, the <data> section being omitted + * if length is 0. + * Values are stored as SX_VALUE <object> or SX_VL_UNDEF for "holes". + * + * When we come here, SX_HASH has been read already. + */ +static SV *old_retrieve_hash(cxt) +stcxt_t *cxt; +{ + I32 len; + I32 size; + I32 i; + HV *hv; + SV *sv; + int c; + static SV *sv_h_undef = (SV *) 0; /* hv_store() bug */ + + TRACEME(("old_retrieve_hash (#%d)", cxt->tagnum)); + + /* + * Read length, allocate table. + */ + + RLEN(len); + TRACEME(("size = %d", len)); + hv = newHV(); + SEEN(hv); /* Will return if table not allocated properly */ + if (len == 0) + return (SV *) hv; /* No data follow if table empty */ + + /* + * Now get each key/value pair in turn... + */ + + for (i = 0; i < len; i++) { + /* + * Get value first. + */ + + GETMARK(c); + if (c == SX_VL_UNDEF) { + TRACEME(("(#%d) undef value", i)); + /* + * Due to a bug in hv_store(), it's not possible to pass + * &PL_sv_undef to hv_store() as a value, otherwise the + * associated key will not be creatable any more. -- RAM, 14/01/97 + */ + if (!sv_h_undef) + sv_h_undef = newSVsv(&PL_sv_undef); + sv = SvREFCNT_inc(sv_h_undef); + } else if (c == SX_VALUE) { + TRACEME(("(#%d) value", i)); + sv = retrieve(cxt); + if (!sv) + return (SV *) 0; + } else + (void) retrieve_other(0); /* Will croak out */ + + /* + * Get key. + * Since we're reading into kbuf, we must ensure we're not + * recursing between the read and the hv_store() where it's used. + * Hence the key comes after the value. + */ + + GETMARK(c); + if (c != SX_KEY) + (void) retrieve_other(0); /* Will croak out */ + RLEN(size); /* Get key size */ + KBUFCHK(size); /* Grow hash key read pool if needed */ + if (size) + READ(kbuf, size); + kbuf[size] = '\0'; /* Mark string end, just in case */ + TRACEME(("(#%d) key '%s'", i, kbuf)); + + /* + * Enter key/value pair into hash table. + */ + + if (hv_store(hv, kbuf, (U32) size, sv, 0) == 0) + return (SV *) 0; + } + + TRACEME(("ok (retrieve_hash at 0x%lx)", (unsigned long) hv)); + + return (SV *) hv; +} + +/*** + *** Retrieval engine. + ***/ + +/* + * magic_check + * + * Make sure the stored data we're trying to retrieve has been produced + * on an ILP compatible system with the same byteorder. It croaks out in + * case an error is detected. [ILP = integer-long-pointer sizes] + * Returns null if error is detected, &PL_sv_undef otherwise. + * + * Note that there's no byte ordering info emitted when network order was + * used at store time. + */ +static SV *magic_check(cxt) +stcxt_t *cxt; +{ + char buf[256]; + char byteorder[256]; + int c; + int use_network_order; + int version_major; + int version_minor = 0; + + TRACEME(("magic_check")); + + /* + * The "magic number" is only for files, not when freezing in memory. + */ + + if (cxt->fio) { + STRLEN len = sizeof(magicstr) - 1; + STRLEN old_len; + + READ(buf, len); /* Not null-terminated */ + buf[len] = '\0'; /* Is now */ + + if (0 == strcmp(buf, magicstr)) + goto magic_ok; + + /* + * Try to read more bytes to check for the old magic number, which + * was longer. + */ + + old_len = sizeof(old_magicstr) - 1; + READ(&buf[len], old_len - len); + buf[old_len] = '\0'; /* Is now null-terminated */ + + if (strcmp(buf, old_magicstr)) + CROAK(("File is not a perl storable")); + } + +magic_ok: + /* + * Starting with 0.6, the "use_network_order" byte flag is also used to + * indicate the version number of the binary, and therefore governs the + * setting of sv_retrieve_vtbl. See magic_write(). + */ + + GETMARK(use_network_order); + version_major = use_network_order >> 1; + cxt->retrieve_vtbl = version_major ? sv_retrieve : sv_old_retrieve; + + TRACEME(("magic_check: netorder = 0x%x", use_network_order)); + + + /* + * Starting with 0.7 (binary major 2), a full byte is dedicated to the + * minor version of the protocol. See magic_write(). + */ + + if (version_major > 1) + GETMARK(version_minor); + + cxt->ver_major = version_major; + cxt->ver_minor = version_minor; + + TRACEME(("binary image version is %d.%d", version_major, version_minor)); + + /* + * Inter-operability sanity check: we can't retrieve something stored + * using a format more recent than ours, because we have no way to + * know what has changed, and letting retrieval go would mean a probable + * failure reporting a "corrupted" storable file. + */ + + if ( + version_major > STORABLE_BIN_MAJOR || + (version_major == STORABLE_BIN_MAJOR && + version_minor > STORABLE_BIN_MINOR) + ) + CROAK(("Storable binary image v%d.%d more recent than I am (v%d.%d)", + version_major, version_minor, + STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR)); + + /* + * If they stored using network order, there's no byte ordering + * information to check. + */ + + if (cxt->netorder = (use_network_order & 0x1)) + return &PL_sv_undef; /* No byte ordering info */ + + sprintf(byteorder, "%lx", (unsigned long) BYTEORDER); + GETMARK(c); + READ(buf, c); /* Not null-terminated */ + buf[c] = '\0'; /* Is now */ + + if (strcmp(buf, byteorder)) + CROAK(("Byte order is not compatible")); + + GETMARK(c); /* sizeof(int) */ + if ((int) c != sizeof(int)) + CROAK(("Integer size is not compatible")); + + GETMARK(c); /* sizeof(long) */ + if ((int) c != sizeof(long)) + CROAK(("Long integer size is not compatible")); + + GETMARK(c); /* sizeof(char *) */ + if ((int) c != sizeof(char *)) + CROAK(("Pointer integer size is not compatible")); + + return &PL_sv_undef; /* OK */ +} + +/* + * retrieve + * + * Recursively retrieve objects from the specified file and return their + * root SV (which may be an AV or an HV for what we care). + * Returns null if there is a problem. + */ +static SV *retrieve(cxt) +stcxt_t *cxt; +{ + int type; + SV **svh; + SV *sv; + + TRACEME(("retrieve")); + + /* + * Grab address tag which identifies the object if we are retrieving + * an older format. Since the new binary format counts objects and no + * longer explicitely tags them, we must keep track of the correspondance + * ourselves. + * + * The following section will disappear one day when the old format is + * no longer supported, hence the final "goto" in the "if" block. + */ + + if (cxt->hseen) { /* Retrieving old binary */ + stag_t tag; + if (cxt->netorder) { + I32 nettag; + READ(&nettag, sizeof(I32)); /* Ordered sequence of I32 */ + tag = (stag_t) nettag; + } else + READ(&tag, sizeof(stag_t)); /* Original address of the SV */ + + GETMARK(type); + if (type == SX_OBJECT) { + I32 tagn; + svh = hv_fetch(cxt->hseen, (char *) &tag, sizeof(tag), FALSE); + if (!svh) + CROAK(("Old tag 0x%x should have been mapped already", tag)); + tagn = SvIV(*svh); /* Mapped tag number computed earlier below */ + + /* + * The following code is common with the SX_OBJECT case below. + */ + + svh = av_fetch(cxt->aseen, tagn, FALSE); + if (!svh) + CROAK(("Object #%d should have been retrieved already", tagn)); + sv = *svh; + TRACEME(("has retrieved #%d at 0x%lx", tagn, (unsigned long) sv)); + SvREFCNT_inc(sv); /* One more reference to this same sv */ + return sv; /* The SV pointer where object was retrieved */ + } + + /* + * Map new object, but don't increase tagnum. This will be done + * by each of the retrieve_* functions when they call SEEN(). + * + * The mapping associates the "tag" initially present with a unique + * tag number. See test for SX_OBJECT above to see how this is perused. + */ + + if (!hv_store(cxt->hseen, (char *) &tag, sizeof(tag), + newSViv(cxt->tagnum), 0)) + return (SV *) 0; + + goto first_time; + } + + /* + * Regular post-0.6 binary format. + */ + +again: + GETMARK(type); + + TRACEME(("retrieve type = %d", type)); + + /* + * Are we dealing with an object we should have already retrieved? + */ + + if (type == SX_OBJECT) { + I32 tag; + READ(&tag, sizeof(I32)); + tag = ntohl(tag); + svh = av_fetch(cxt->aseen, tag, FALSE); + if (!svh) + CROAK(("Object #%d should have been retrieved already", tag)); + sv = *svh; + TRACEME(("had retrieved #%d at 0x%lx", tag, (unsigned long) sv)); + SvREFCNT_inc(sv); /* One more reference to this same sv */ + return sv; /* The SV pointer where object was retrieved */ + } + +first_time: /* Will disappear when support for old format is dropped */ + + /* + * Okay, first time through for this one. + */ + + sv = RETRIEVE(cxt, type)(cxt); + if (!sv) + return (SV *) 0; /* Failed */ + + /* + * Old binary formats (pre-0.7). + * + * Final notifications, ended by SX_STORED may now follow. + * Currently, the only pertinent notification to apply on the + * freshly retrieved object is either: + * SX_CLASS <char-len> <classname> for short classnames. + * SX_LG_CLASS <int-len> <classname> for larger one (rare!). + * Class name is then read into the key buffer pool used by + * hash table key retrieval. + */ + + if (cxt->ver_major < 2) { + while ((type = GETCHAR()) != SX_STORED) { + I32 len; + switch (type) { + case SX_CLASS: + GETMARK(len); /* Length coded on a single char */ + break; + case SX_LG_CLASS: /* Length coded on a regular integer */ + RLEN(len); + break; + case EOF: + default: + return (SV *) 0; /* Failed */ + } + KBUFCHK(len); /* Grow buffer as necessary */ + if (len) + READ(kbuf, len); + kbuf[len] = '\0'; /* Mark string end */ + BLESS(sv, kbuf); + } + } + + TRACEME(("ok (retrieved 0x%lx, refcnt=%d, %s)", (unsigned long) sv, + SvREFCNT(sv) - 1, sv_reftype(sv, FALSE))); + + return sv; /* Ok */ +} + +/* + * do_retrieve + * + * Retrieve data held in file and return the root object. + * Common routine for pretrieve and mretrieve. + */ +static SV *do_retrieve(f, in, optype) +PerlIO *f; +SV *in; +int optype; +{ + dSTCXT; + SV *sv; + struct extendable msave; /* Where potentially valid mbuf is saved */ + + TRACEME(("do_retrieve (optype = 0x%x)", optype)); + + optype |= ST_RETRIEVE; + + /* + * Sanity assertions for retrieve dispatch tables. + */ + + ASSERT(sizeof(sv_old_retrieve) == sizeof(sv_retrieve), + ("old and new retrieve dispatch table have same size")); + ASSERT(sv_old_retrieve[SX_ERROR] == retrieve_other, + ("SX_ERROR entry correctly initialized in old dispatch table")); + ASSERT(sv_retrieve[SX_ERROR] == retrieve_other, + ("SX_ERROR entry correctly initialized in new dispatch table")); + + /* + * Workaround for CROAK leak: if they enter with a "dirty" context, + * free up memory for them now. + */ + + if (cxt->dirty) + clean_context(cxt); + + /* + * Now that STORABLE_xxx hooks exist, it is possible that they try to + * re-enter retrieve() via the hooks. + */ + + if (cxt->entry) + cxt = allocate_context(cxt); + + cxt->entry++; + + ASSERT(cxt->entry == 1, ("starting new recursion")); + ASSERT(!cxt->dirty, ("clean context")); + + /* + * Prepare context. + * + * Data is loaded into the memory buffer when f is NULL, unless `in' is + * also NULL, in which case we're expecting the data to already lie + * in the buffer (dclone case). + */ + + KBUFINIT(); /* Allocate hash key reading pool once */ + + if (!f && in) { + StructCopy(&cxt->membuf, &msave, struct extendable); + MBUF_LOAD(in); + } + + + /* + * Magic number verifications. + * + * This needs to be done before calling init_retrieve_context() + * since the format indication in the file are necessary to conduct + * some of the initializations. + */ + + cxt->fio = f; /* Where I/O are performed */ + + if (!magic_check(cxt)) + CROAK(("Magic number checking on storable %s failed", + cxt->fio ? "file" : "string")); + + TRACEME(("data stored in %s format", + cxt->netorder ? "net order" : "native")); + + init_retrieve_context(cxt, optype); + + ASSERT(is_retrieving(), ("within retrieve operation")); + + sv = retrieve(cxt); /* Recursively retrieve object, get root SV */ + + /* + * Final cleanup. + */ + + if (!f && in) + StructCopy(&msave, &cxt->membuf, struct extendable); + + /* + * The "root" context is never freed. + */ + + clean_retrieve_context(cxt); + if (cxt->prev) /* This context was stacked */ + free_context(cxt); /* It was not the "root" context */ + + /* + * Prepare returned value. + */ + + if (!sv) { + TRACEME(("retrieve ERROR")); + return &PL_sv_undef; /* Something went wrong, return undef */ + } + + TRACEME(("retrieve got %s(0x%lx)", + sv_reftype(sv, FALSE), (unsigned long) sv)); + + /* + * Backward compatibility with Storable-0.5@9 (which we know we + * are retrieving if hseen is non-null): don't create an extra RV + * for objects since we special-cased it at store time. + * + * Build a reference to the SV returned by pretrieve even if it is + * already one and not a scalar, for consistency reasons. + * + * NB: although context might have been cleaned, the value of `cxt->hseen' + * remains intact, and can be used as a flag. + */ + + if (cxt->hseen) { /* Was not handling overloading by then */ + SV *rv; + if (sv_type(sv) == svis_REF && (rv = SvRV(sv)) && SvOBJECT(rv)) + return sv; + } + + /* + * If reference is overloaded, restore behaviour. + * + * NB: minor glitch here: normally, overloaded refs are stored specially + * so that we can croak when behaviour cannot be re-installed, and also + * avoid testing for overloading magic at each reference retrieval. + * + * Unfortunately, the root reference is implicitely stored, so we must + * check for possible overloading now. Furthermore, if we don't restore + * overloading, we cannot croak as if the original ref was, because we + * have no way to determine whether it was an overloaded ref or not in + * the first place. + * + * It's a pity that overloading magic is attached to the rv, and not to + * the underlying sv as blessing is. + */ + + if (SvOBJECT(sv)) { + HV *stash = (HV *) SvSTASH (sv); + SV *rv = newRV_noinc(sv); + if (stash && Gv_AMG(stash)) { + SvAMAGIC_on(rv); + TRACEME(("restored overloading on root reference")); + } + return rv; + } + + return newRV_noinc(sv); +} + +/* + * pretrieve + * + * Retrieve data held in file and return the root object, undef on error. + */ +SV *pretrieve(f) +PerlIO *f; +{ + TRACEME(("pretrieve")); + return do_retrieve(f, Nullsv, 0); +} + +/* + * mretrieve + * + * Retrieve data held in scalar and return the root object, undef on error. + */ +SV *mretrieve(sv) +SV *sv; +{ + TRACEME(("mretrieve")); + return do_retrieve(0, sv, 0); +} + +/*** + *** Deep cloning + ***/ + +/* + * dclone + * + * Deep clone: returns a fresh copy of the original referenced SV tree. + * + * This is achieved by storing the object in memory and restoring from + * there. Not that efficient, but it should be faster than doing it from + * pure perl anyway. + */ +SV *dclone(sv) +SV *sv; +{ + dSTCXT; + int size; + stcxt_t *real_context; + SV *out; + + TRACEME(("dclone")); + + /* + * Workaround for CROAK leak: if they enter with a "dirty" context, + * free up memory for them now. + */ + + if (cxt->dirty) + clean_context(cxt); + + /* + * do_store() optimizes for dclone by not freeing its context, should + * we need to allocate one because we're deep cloning from a hook. + */ + + if (!do_store(0, sv, ST_CLONE, FALSE, Nullsv)) + return &PL_sv_undef; /* Error during store */ + + /* + * Because of the above optimization, we have to refresh the context, + * since a new one could have been allocated and stacked by do_store(). + */ + + { dSTCXT; real_context = cxt; } /* Sub-block needed for macro */ + cxt = real_context; /* And we need this temporary... */ + + /* + * Now, `cxt' may refer to a new context. + */ + + ASSERT(!cxt->dirty, ("clean context")); + ASSERT(!cxt->entry, ("entry will not cause new context allocation")); + + size = MBUF_SIZE(); + TRACEME(("dclone stored %d bytes", size)); + + MBUF_INIT(size); + out = do_retrieve(0, Nullsv, ST_CLONE); /* Will free non-root context */ + + TRACEME(("dclone returns 0x%lx", (unsigned long) out)); + + return out; +} + +/*** + *** Glue with perl. + ***/ + +/* + * The Perl IO GV object distinguishes between input and output for sockets + * but not for plain files. To allow Storable to transparently work on + * plain files and sockets transparently, we have to ask xsubpp to fetch the + * right object for us. Hence the OutputStream and InputStream declarations. + * + * Before perl 5.004_05, those entries in the standard typemap are not + * defined in perl include files, so we do that here. + */ + +#ifndef OutputStream +#define OutputStream PerlIO * +#define InputStream PerlIO * +#endif /* !OutputStream */ + +MODULE = Storable PACKAGE = Storable + +PROTOTYPES: ENABLE + +BOOT: + init_perinterp(); + +int +pstore(f,obj) +OutputStream f +SV * obj + +int +net_pstore(f,obj) +OutputStream f +SV * obj + +SV * +mstore(obj) +SV * obj + +SV * +net_mstore(obj) +SV * obj + +SV * +pretrieve(f) +InputStream f + +SV * +mretrieve(sv) +SV * sv + +SV * +dclone(sv) +SV * sv + +int +last_op_in_netorder() + +int +is_storing() + +int +is_retrieving() + diff --git a/ext/Storable/patchlevel.h b/ext/Storable/patchlevel.h new file mode 100644 index 0000000000..e3d7670bc6 --- /dev/null +++ b/ext/Storable/patchlevel.h @@ -0,0 +1 @@ +#define PATCHLEVEL 2 |