diff options
author | Nicholas Clark <nick@ccl4.org> | 2009-02-09 15:22:27 +0000 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2009-02-09 15:22:27 +0000 |
commit | c3d8257849283b4b05f8df0c5e5c838c0daac733 (patch) | |
tree | 9830b24f5d7d630899807e29cd3293bada8f8e4f /ext/Hash-Util-FieldHash | |
parent | e6027527172a53f9f883343141fbbf1e30692407 (diff) | |
download | perl-c3d8257849283b4b05f8df0c5e5c838c0daac733.tar.gz |
Rename ext/Hash/Util/FieldHash to ext/Hash-Util-FieldHash
Diffstat (limited to 'ext/Hash-Util-FieldHash')
-rw-r--r-- | ext/Hash-Util-FieldHash/Changes | 29 | ||||
-rw-r--r-- | ext/Hash-Util-FieldHash/FieldHash.xs | 490 | ||||
-rw-r--r-- | ext/Hash-Util-FieldHash/Makefile.PL | 22 | ||||
-rw-r--r-- | ext/Hash-Util-FieldHash/lib/Hash/Util/FieldHash.pm | 860 | ||||
-rw-r--r-- | ext/Hash-Util-FieldHash/t/01_load.t | 57 | ||||
-rw-r--r-- | ext/Hash-Util-FieldHash/t/02_function.t | 336 | ||||
-rw-r--r-- | ext/Hash-Util-FieldHash/t/03_class.t | 118 | ||||
-rw-r--r-- | ext/Hash-Util-FieldHash/t/04_thread.t | 70 | ||||
-rw-r--r-- | ext/Hash-Util-FieldHash/t/05_perlhook.t | 215 | ||||
-rw-r--r-- | ext/Hash-Util-FieldHash/t/10_hash.t | 118 | ||||
-rw-r--r-- | ext/Hash-Util-FieldHash/t/11_hashassign.t | 321 | ||||
-rw-r--r-- | ext/Hash-Util-FieldHash/t/12_hashwarn.t | 62 |
12 files changed, 2698 insertions, 0 deletions
diff --git a/ext/Hash-Util-FieldHash/Changes b/ext/Hash-Util-FieldHash/Changes new file mode 100644 index 0000000000..77f1fd9f01 --- /dev/null +++ b/ext/Hash-Util-FieldHash/Changes @@ -0,0 +1,29 @@ +Revision history for Perl extension Hash::Util::FieldHash. + +0.01 Sat Jun 3 16:24:12 2006 + - original version; created by h2xs 1.23 with options + -A -g --skip-ppport -nHash::Util::FieldHash + Fri Jun 23 22:31:59 CEST 2006 + - accepted as v5.9.4 DEVEL28420 + + +0.02 Fri Apr 20 22:22:57 CEST 2007 + - Bugfix: string keys are now checked whether they represent + an object, so %fieldhash_clone = %fieldhash_orig works. + +1.01 Thu May 31 11:12:20 CEST 2007 + - Introduced magic id chaching after a suggestion by Jerry Hedden + - Added functions id, id_2obj, register, idhash, idhashes + + Sun Jun 17 15:10:45 CEST 2007 + - added tests for new functions + - pod partially re-written to describe the multi-level + interface + - updated pod part of lib/Hash/Util.pm + - release accepted by p5p + +1.02 Sat Jul 14 22:38:33 CEST 2007 + - prototype set to ($) for id() + - tests added for prototypes + - some cleanup in xs code + - small pod fixes diff --git a/ext/Hash-Util-FieldHash/FieldHash.xs b/ext/Hash-Util-FieldHash/FieldHash.xs new file mode 100644 index 0000000000..919a6891f0 --- /dev/null +++ b/ext/Hash-Util-FieldHash/FieldHash.xs @@ -0,0 +1,490 @@ +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" + +/* support for Hash::Util::FieldHash, prefix HUF_ */ + +/* A Perl sub that returns a hashref to the object registry */ +#define HUF_OB_REG "Hash::Util::FieldHash::_ob_reg" +/* Identifier for PERL_MAGIC_ext magic */ +#define HUF_IDCACHE 0x4944 + +/* For global cache of object registry */ +#define MY_CXT_KEY "Hash::Util::FieldHash::_guts" XS_VERSION +typedef struct { + HV* ob_reg; /* Cache object registry */ +} my_cxt_t; +START_MY_CXT + +/* Inquire the object registry (a lexical hash) from perl */ +HV* HUF_get_ob_reg(void) { + dSP; + HV* ob_reg = NULL; + I32 items; + ENTER; + SAVETMPS; + + PUSHMARK(SP); + items = call_pv(HUF_OB_REG, G_SCALAR|G_NOARGS); + SPAGAIN; + + if (items == 1 && TOPs && SvROK(TOPs) && SvTYPE(SvRV(TOPs)) == SVt_PVHV) + ob_reg = (HV*)SvRV(POPs); + PUTBACK; + FREETMPS; + LEAVE; + + if (!ob_reg) + Perl_die(aTHX_ "Can't get object registry hash"); + return ob_reg; +} + +/* Deal with global context */ +#define HUF_INIT 1 +#define HUF_CLONE 0 +#define HUF_RESET -1 + +void HUF_global(I32 how) { + if (how == HUF_INIT) { + MY_CXT_INIT; + MY_CXT.ob_reg = HUF_get_ob_reg(); + } else if (how == HUF_CLONE) { + MY_CXT_CLONE; + MY_CXT.ob_reg = HUF_get_ob_reg(); + } else if (how == HUF_RESET) { + dMY_CXT; + MY_CXT.ob_reg = HUF_get_ob_reg(); + } +} + +/* Object id */ + +/* definition of id transformation */ +#define HUF_OBJ_ID(x) newSVuv(PTR2UV(x)) + +SV* HUF_obj_id(SV* obj) { + SV *item = SvRV(obj); + MAGIC *mg; + SV *id; + + /* Get cached object ID, if it exists */ + if (SvTYPE(item) >= SVt_PVMG) { + for ( mg = SvMAGIC(item); mg; mg = mg->mg_moremagic ) { + if ((mg->mg_type == PERL_MAGIC_ext) && + (mg->mg_private == HUF_IDCACHE) + ) { + return mg->mg_obj; + } + } + } + + /* Create an object ID, cache it */ + id = HUF_OBJ_ID(item); + mg = sv_magicext(item, id, PERL_MAGIC_ext, NULL, NULL, 0); + mg->mg_private = HUF_IDCACHE; + + /* Return the object ID */ + return id; +} + +/* set up uvar magic for any sv */ +void HUF_add_uvar_magic( + SV* sv, /* the sv to enchant, visible to get/set */ + I32(* val)(pTHX_ IV, SV*), /* "get" function */ + I32(* set)(pTHX_ IV, SV*), /* "set" function */ + I32 index, /* get/set will see this */ + SV* thing /* any associated info */ +) { + struct ufuncs uf; + uf.uf_val = val; + uf.uf_set = set; + uf.uf_index = index; + sv_magic(sv, thing, PERL_MAGIC_uvar, (char*)&uf, sizeof(uf)); +} + +/* Fetch the data container of a trigger */ +AV* HUF_get_trigger_content(SV* trigger) { + MAGIC* mg; + if (trigger && (mg = mg_find(trigger, PERL_MAGIC_uvar))) + return (AV*)mg->mg_obj; + return NULL; +} + +/* Delete an object from all field hashes it may occur in. Also delete + * the object's entry from the object registry. This function goes in + * the uf_set field of the uvar magic of a trigger. + */ +I32 HUF_destroy_obj(pTHX_ IV index, SV* trigger) { + /* Do nothing if the weakref wasn't undef'd. Also don't bother + * during global destruction. (MY_CXT.ob_reg is sometimes funny there) */ + if (!SvROK(trigger) && (!PL_in_clean_all)) { + dMY_CXT; + AV* cont = HUF_get_trigger_content(trigger); + SV* ob_id = *av_fetch(cont, 0, 0); + HV* field_tab = (HV*) *av_fetch(cont, 1, 0); + HE* ent; + hv_iterinit(field_tab); + while (ent = hv_iternext(field_tab)) { + SV* field_ref = HeVAL(ent); + SV* field = SvRV(field_ref); + hv_delete_ent((HV*)field, ob_id, 0, 0); + } + /* make it safe in case we must run in global clenaup, after all */ + if (PL_in_clean_all) + HUF_global(HUF_RESET); /* shoudn't be needed */ + hv_delete_ent(MY_CXT.ob_reg, ob_id, 0, 0); + } + return 0; +} + +/* Create a trigger for an object. The trigger is a magical SV + * that holds a weak ref to the object. The magic fires when the object + * expires and takes care of garbage collection in registred hashes. + * For that purpose, the magic structure holds the original id of + * the object, and a list (a hash, really) of hashes from which the + * object may * have to be deleted. The trigger is stored in the + * object registry and is also deleted when the object expires. + */ +SV* HUF_new_trigger(SV* obj, SV* ob_id) { + dMY_CXT; + SV* trigger = sv_rvweaken(newRV_inc(SvRV(obj))); + AV* cont = newAV(); + sv_2mortal((SV*)cont); + av_store(cont, 0, SvREFCNT_inc(ob_id)); + av_store(cont, 1, (SV*)newHV()); + HUF_add_uvar_magic(trigger, NULL, &HUF_destroy_obj, 0, (SV*)cont); + hv_store_ent(MY_CXT.ob_reg, ob_id, trigger, 0); + return trigger; +} + +/* retrieve a trigger for obj if one exists, return NULL otherwise */ +SV* HUF_ask_trigger(SV* ob_id) { + dMY_CXT; + HE* ent; + if (ent = hv_fetch_ent(MY_CXT.ob_reg, ob_id, 0, 0)) + return HeVAL(ent); + return NULL; +} + +/* get the trigger for an object, creating it if necessary */ +SV* HUF_get_trigger0(SV* obj, SV* ob_id) { + SV* trigger; + if (!(trigger = HUF_ask_trigger(ob_id))) + trigger = HUF_new_trigger(obj, ob_id); + return trigger; +} + +SV* HUF_get_trigger(SV* obj, SV* ob_id) { + SV* trigger = HUF_ask_trigger(ob_id); + if (!trigger) + trigger = HUF_new_trigger(obj, ob_id); + return( trigger); +} + +/* mark an object (trigger) as having been used with a field + (a clenup-liability) +*/ +void HUF_mark_field(SV* trigger, SV* field) { + AV* cont = HUF_get_trigger_content(trigger); + HV* field_tab = (HV*) *av_fetch(cont, 1, 0); + SV* field_ref = newRV_inc(field); + UV field_addr = PTR2UV(field); + hv_store(field_tab, (char *)&field_addr, sizeof(field_addr), field_ref, 0); +} + +/* Determine, from the value of action, whether this call may create a new + * hash key */ +#define HUF_WOULD_CREATE_KEY(x) ((x) != HV_DELETE && ((x) & (HV_FETCH_ISSTORE | HV_FETCH_LVALUE))) + +/* The key exchange functions. They communicate with S_hv_magic_uvar_xkey + * in hv.c */ +I32 HUF_watch_key_safe(pTHX_ IV action, SV* field) { + MAGIC* mg = mg_find(field, PERL_MAGIC_uvar); + SV* keysv; + if (mg && (keysv = mg->mg_obj)) { + if (SvROK(keysv)) { /* ref key */ + SV* ob_id = HUF_obj_id(keysv); + mg->mg_obj = ob_id; /* key replacement */ + if (HUF_WOULD_CREATE_KEY(action)) { + SV* trigger = HUF_get_trigger(keysv, ob_id); + HUF_mark_field(trigger, field); + } + } else if (HUF_WOULD_CREATE_KEY(action)) { /* string key */ + /* registered as object id? */ + SV* trigger; + if ( trigger = HUF_ask_trigger(keysv)) + HUF_mark_field( trigger, field); + } + } else { + Perl_die(aTHX_ "Rogue call of 'HUF_watch_key_safe'"); + } + return 0; +} + +I32 HUF_watch_key_id(pTHX_ IV action, SV* field) { + MAGIC* mg = mg_find(field, PERL_MAGIC_uvar); + SV* keysv; + if (mg && (keysv = mg->mg_obj)) { + if (SvROK(keysv)) /* ref key */ + mg->mg_obj = HUF_obj_id(keysv); /* key replacement */ + } else { + Perl_die(aTHX_ "Rogue call of 'HUF_watch_key_id'"); + } + return 0; +} + +int HUF_func_2mode( I32(* val)(pTHX_ IV, SV*)) { + int ans = 0; + if (val == &HUF_watch_key_id) + ans = 1; + if (val == &HUF_watch_key_safe) + ans = 2; + return(ans); +} + +I32(* HUF_mode_2func( int mode))(pTHX_ IV, SV*) { + I32(* ans)(pTHX_ IV, SV*) = NULL; + switch (mode) { + case 1: + ans = &HUF_watch_key_id; + break; + case 2: + ans = &HUF_watch_key_safe; + break; + } + return(ans); +} + +/* see if something is a field hash */ +int HUF_get_status(HV* hash) { + int ans = 0; + if (hash && (SvTYPE(hash) == SVt_PVHV)) { + MAGIC* mg; + struct ufuncs* uf; + if ((mg = mg_find((SV*)hash, PERL_MAGIC_uvar)) && + (uf = (struct ufuncs *)mg->mg_ptr) && + (uf->uf_set == NULL) + ) { + ans = HUF_func_2mode(uf->uf_val); + } + } + return ans; +} + +/* Thread support. These routines are called by CLONE (and nothing else) */ + +/* Fix entries for one object in all field hashes */ +void HUF_fix_trigger(SV* trigger, SV* new_id) { + AV* cont = HUF_get_trigger_content(trigger); + HV* field_tab = (HV*) *av_fetch(cont, 1, 0); + HV* new_tab = newHV(); + HE* ent; + SV* old_id = *av_fetch(cont, 0, 0); + hv_iterinit(field_tab); + while (ent = hv_iternext(field_tab)) { + SV* field_ref = HeVAL(ent); + HV* field = (HV*)SvRV(field_ref); + UV field_addr = PTR2UV(field); + SV* val; + /* recreate field tab entry */ + hv_store(new_tab, (char *)&field_addr, sizeof(field_addr), SvREFCNT_inc(field_ref), 0); + /* recreate field entry, if any */ + if (val = hv_delete_ent(field, old_id, 0, 0)) + hv_store_ent(field, new_id, SvREFCNT_inc(val), 0); + } + /* update the trigger */ + av_store(cont, 0, SvREFCNT_inc(new_id)); + av_store(cont, 1, (SV*)new_tab); +} + +/* Go over object registry and fix all objects. Also fix the object + * registry. + */ +void HUF_fix_objects(void) { + dMY_CXT; + I32 i, len; + HE* ent; + AV* oblist = (AV*)sv_2mortal((SV*)newAV()); + hv_iterinit(MY_CXT.ob_reg); + while(ent = hv_iternext(MY_CXT.ob_reg)) + av_push(oblist, SvREFCNT_inc(hv_iterkeysv(ent))); + len = av_len(oblist); + for (i = 0; i <= len; ++i) { + SV* old_id = *av_fetch(oblist, i, 0); + SV* trigger = hv_delete_ent(MY_CXT.ob_reg, old_id, 0, 0); + SV* obj = SvRV(trigger); + MAGIC *mg; + + SV* new_id = HUF_OBJ_ID(obj); + + /* Replace cached object ID with this new one */ + for (mg = SvMAGIC(obj); mg; mg = mg->mg_moremagic) { + if ((mg->mg_type == PERL_MAGIC_ext) && + (mg->mg_private == HUF_IDCACHE) + ) { + mg->mg_obj = new_id; + } + } + + HUF_fix_trigger(trigger, new_id); + hv_store_ent(MY_CXT.ob_reg, new_id, SvREFCNT_inc(trigger), 0); + } +} + +/* test support (not needed for functionality) */ + +static SV* counter; +I32 HUF_inc_var(pTHX_ IV index, SV* which) { + sv_setiv(counter, 1 + SvIV(counter)); + return 0; +} + +MODULE = Hash::Util::FieldHash PACKAGE = Hash::Util::FieldHash + +BOOT: +{ + HUF_global(HUF_INIT); /* create variables */ +} + +int +_fieldhash(SV* href, int mode) +PROTOTYPE: $$ +CODE: + HV* field; + RETVAL = 0; + if (mode && + href && SvROK(href) && + (field = (HV*)SvRV(href)) && + SvTYPE(field) == SVt_PVHV + ) { + + HUF_add_uvar_magic( + SvRV(href), + HUF_mode_2func( mode), + NULL, + 0, + NULL + ); + RETVAL = HUF_get_status(field); + } +OUTPUT: + RETVAL + +void +id(SV* ref) +PROTOTYPE: $ +PPCODE: + if (SvROK(ref)) { + XPUSHs(HUF_obj_id(ref)); + } else { + XPUSHs(ref); + } + +SV* +id_2obj(SV* id) +PROTOTYPE: $ +CODE: + SV* obj = HUF_ask_trigger(id); + if (obj) { + RETVAL = newRV_inc(SvRV(obj)); + } else { + RETVAL = &PL_sv_undef; + } +OUTPUT: + RETVAL + +SV* +register(SV* obj, ...) +PROTOTYPE: $@ +CODE: + SV* trigger; + int i; + RETVAL = NULL; + if (!SvROK(obj)) { + Perl_die(aTHX_ "Attempt to register a non-ref"); + } else { + RETVAL = newRV_inc(SvRV(obj)); + } + trigger = HUF_get_trigger(obj, HUF_obj_id(obj)); + for (i = 1; i < items; ++ i) { + SV* field_ref = POPs; + if (SvROK(field_ref) && (SvTYPE(SvRV(field_ref)) == SVt_PVHV)) { + HUF_mark_field(trigger, SvRV(field_ref)); + } + } +OUTPUT: + RETVAL + +void +CLONE(char* classname) +CODE: + if (0 == strcmp(classname, "Hash::Util::FieldHash")) { + HUF_global(HUF_CLONE); + HUF_fix_objects(); + } + +void +_active_fields(SV* obj) +PPCODE: + if (SvROK(obj)) { + SV* ob_id = HUF_obj_id(obj); + SV* trigger = HUF_ask_trigger(ob_id); + if (trigger) { + AV* cont = HUF_get_trigger_content(trigger); + HV* field_tab = (HV*) *av_fetch(cont, 1, 0); + HE* ent; + hv_iterinit(field_tab); + while (ent = hv_iternext(field_tab)) { + HV* field = (HV*)SvRV(HeVAL(ent)); + if (hv_exists_ent(field, ob_id, 0)) + XPUSHs(sv_2mortal(newRV_inc((SV*)field))); + } + } + } + +void +_test_uvar_get(SV* svref, SV* countref) +CODE: + if (SvROK(svref) && SvROK(countref)) { + counter = SvRV(countref); + sv_setiv(counter, 0); + HUF_add_uvar_magic( + SvRV(svref), + &HUF_inc_var, + NULL, + 0, + SvRV(countref) + ); + } + +void +_test_uvar_set(SV* svref, SV* countref) +CODE: + if (SvROK(svref) && SvROK(countref)) { + counter = SvRV(countref); + sv_setiv(counter, 0); + counter = SvRV(countref); + HUF_add_uvar_magic( + SvRV(svref), + NULL, + &HUF_inc_var, + 0, + SvRV(countref) + ); + } + +void +_test_uvar_same(SV* svref, SV* countref) +CODE: + if (SvROK(svref) && SvROK(countref)) { + counter = SvRV(countref); + sv_setiv(counter, 0); + HUF_add_uvar_magic( + SvRV(svref), + &HUF_inc_var, + &HUF_inc_var, + 0, + NULL + ); + } + diff --git a/ext/Hash-Util-FieldHash/Makefile.PL b/ext/Hash-Util-FieldHash/Makefile.PL new file mode 100644 index 0000000000..4793ec912b --- /dev/null +++ b/ext/Hash-Util-FieldHash/Makefile.PL @@ -0,0 +1,22 @@ +use 5.009004; +use ExtUtils::MakeMaker; +# See lib/ExtUtils/MakeMaker.pm for details of how to influence +# the contents of the Makefile that is written. +WriteMakefile( + NAME => 'Hash::Util::FieldHash', + VERSION_FROM => 'lib/Hash/Util/FieldHash.pm', # finds $VERSION + PREREQ_PM => {}, # e.g., Module::Name => 1.1 + ($] >= 5.005 ? ## Add these new keywords supported since 5.005 + (ABSTRACT_FROM => 'lib/Hash/Util/FieldHash.pm', + AUTHOR => 'Anno Siegel <anno4000@zrz.tu-berlin.de>') : ()), + LIBS => [''], # e.g., '-lm' + DEFINE => '', # e.g., '-DHAVE_SOMETHING' + # Insert -I. if you add *.h files later: + INC => '', # e.g., '-I/usr/include/other' + # Un-comment this if you add C files to link with later: + # OBJECT => '$(O_FILES)', # link all the C files too + # Don't manify in core build + (grep( /^PERL_CORE=1$/, @ARGV) ? + (MAN3PODS => {}) : + ()), +); diff --git a/ext/Hash-Util-FieldHash/lib/Hash/Util/FieldHash.pm b/ext/Hash-Util-FieldHash/lib/Hash/Util/FieldHash.pm new file mode 100644 index 0000000000..25f244aba5 --- /dev/null +++ b/ext/Hash-Util-FieldHash/lib/Hash/Util/FieldHash.pm @@ -0,0 +1,860 @@ +package Hash::Util::FieldHash; + +use 5.009004; +use strict; +use warnings; +use Scalar::Util qw( reftype); + +our $VERSION = '1.03'; + +require Exporter; +our @ISA = qw(Exporter); +our %EXPORT_TAGS = ( + 'all' => [ qw( + fieldhash + fieldhashes + idhash + idhashes + id + id_2obj + register + )], +); +our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); + +{ + require XSLoader; + my %ob_reg; # private object registry + sub _ob_reg { \ %ob_reg } + XSLoader::load('Hash::Util::FieldHash', $VERSION); +} + +sub fieldhash (\%) { + for ( shift ) { + return unless ref() && reftype( $_) eq 'HASH'; + return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0); + return $_ if Hash::Util::FieldHash::_fieldhash( $_, 2) == 2; + return; + } +} + +sub idhash (\%) { + for ( shift ) { + return unless ref() && reftype( $_) eq 'HASH'; + return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0); + return $_ if Hash::Util::FieldHash::_fieldhash( $_, 1) == 1; + return; + } +} + +sub fieldhashes { map &fieldhash( $_), @_ } +sub idhashes { map &idhash( $_), @_ } + +1; +__END__ + +=head1 NAME + +Hash::Util::FieldHash - Support for Inside-Out Classes + +=head1 SYNOPSIS + + ### Create fieldhashes + use Hash::Util qw(fieldhash fieldhashes); + + # Create a single field hash + fieldhash my %foo; + + # Create three at once... + fieldhashes \ my(%foo, %bar, %baz); + # ...or any number + fieldhashes @hashrefs; + + ### Create an idhash and register it for garbage collection + use Hash::Util::FieldHash qw(idhash register); + idhash my %name; + my $object = \ do { my $o }; + # register the idhash for garbage collection with $object + register($object, \ %name); + # the following entry will be deleted when $object goes out of scope + $name{$object} = 'John Doe'; + + ### Register an ordinary hash for garbage collection + use Hash::Util::FieldHash qw(id register); + my %name; + my $object = \ do { my $o }; + # register the hash %name for garbage collection of $object's id + register $object, \ %name; + # the following entry will be deleted when $object goes out of scope + $name{id $object} = 'John Doe'; + +=head1 FUNCTIONS + +C<Hash::Util::FieldHash> offers a number of functions in support of +L<The Inside-out Technique> of class construction. + +=over + +=item id + + id($obj) + +Returns the reference address of a reference $obj. If $obj is +not a reference, returns $obj. + +This function is a stand-in replacement for +L<Scalar::Util::refaddr|Scalar::Util/refaddr>, that is, it returns +the reference address of its argument as a numeric value. The only +difference is that C<refaddr()> returns C<undef> when given a +non-reference while C<id()> returns its argument unchanged. + +C<id()> also uses a caching technique that makes it faster when +the id of an object is requested often, but slower if it is needed +only once or twice. + +=item id_2obj + + $obj = id_2obj($id) + +If C<$id> is the id of a registered object (see L</register>), returns +the object, otherwise an undefined value. For registered objects this +is the inverse function of C<id()>. + +=item register + + register($obj) + register($obj, @hashrefs) + +In the first form, registers an object to work with for the function +C<id_2obj()>. In the second form, it additionally marks the given +hashrefs down for garbage collection. This means that when the object +goes out of scope, any entries in the given hashes under the key of +C<id($obj)> will be deleted from the hashes. + +It is a fatal error to register a non-reference $obj. Any non-hashrefs +among the following arguments are silently ignored. + +It is I<not> an error to register the same object multiple times with +varying sets of hashrefs. Any hashrefs that are not registered yet +will be added, others ignored. + +Registry also implies thread support. When a new thread is created, +all references are replaced with new ones, including all objects. +If a hash uses the reference address of an object as a key, that +connection would be broken. With a registered object, its id will +be updated in all hashes registered with it. + +=item idhash + + idhash my %hash + +Makes an idhash from the argument, which must be a hash. + +An I<idhash> works like a normal hash, except that it stringifies a +I<reference used as a key> differently. A reference is stringified +as if the C<id()> function had been invoked on it, that is, its +reference address in decimal is used as the key. + +=item idhashes + + idhashes \ my(%hash, %gnash, %trash) + idhashes \ @hashrefs + +Creates many idhashes from its hashref arguments. Returns those +arguments that could be converted or their number in scalar context. + +=item fieldhash + + fieldhash %hash; + +Creates a single fieldhash. The argument must be a hash. Returns +a reference to the given hash if successful, otherwise nothing. + +A I<fieldhash> is, in short, an idhash with auto-registry. When an +object (or, indeed, any reference) is used as a fieldhash key, the +fieldhash is automatically registered for garbage collection with +the object, as if C<register $obj, \ %fieldhash> had been called. + +=item fieldhashes + + fieldhashes @hashrefs; + +Creates any number of field hashes. Arguments must be hash references. +Returns the converted hashrefs in list context, their number in scalar +context. + +=back + +=head1 DESCRIPTION + +A word on terminology: I shall use the term I<field> for a scalar +piece of data that a class associates with an object. Other terms that +have been used for this concept are "object variable", "(object) property", +"(object) attribute" and more. Especially "attribute" has some currency +among Perl programmer, but that clashes with the C<attributes> pragma. The +term "field" also has some currency in this sense and doesn't seem +to conflict with other Perl terminology. + +In Perl, an object is a blessed reference. The standard way of associating +data with an object is to store the data inside the object's body, that is, +the piece of data pointed to by the reference. + +In consequence, if two or more classes want to access an object they +I<must> agree on the type of reference and also on the organization of +data within the object body. Failure to agree on the type results in +immediate death when the wrong method tries to access an object. Failure +to agree on data organization may lead to one class trampling over the +data of another. + +This object model leads to a tight coupling between subclasses. +If one class wants to inherit from another (and both classes access +object data), the classes must agree about implementation details. +Inheritance can only be used among classes that are maintained together, +in a single source or not. + +In particular, it is not possible to write general-purpose classes +in this technique, classes that can advertise themselves as "Put me +on your @ISA list and use my methods". If the other class has different +ideas about how the object body is used, there is trouble. + +For reference L<Name_hash> in L<Example 1> shows the standard implementation of +a simple class C<Name> in the well-known hash based way. It also demonstrates +the predictable failure to construct a common subclass C<NamedFile> +of C<Name> and the class C<IO::File> (whose objects I<must> be globrefs). + +Thus, techniques are of interest that store object data I<not> in +the object body but some other place. + +=head2 The Inside-out Technique + +With I<inside-out> classes, each class declares a (typically lexical) +hash for each field it wants to use. The reference address of an +object is used as the hash key. By definition, the reference address +is unique to each object so this guarantees a place for each field that +is private to the class and unique to each object. See L<Name_id> in +L<Example 1> for a simple example. + +In comparison to the standard implementation where the object is a +hash and the fields correspond to hash keys, here the fields correspond +to hashes, and the object determines the hash key. Thus the hashes +appear to be turned I<inside out>. + +The body of an object is never examined by an inside-out class, only +its reference address is used. This allows for the body of an actual +object to be I<anything at all> while the object methods of the class +still work as designed. This is a key feature of inside-out classes. + +=head2 Problems of Inside-out + +Inside-out classes give us freedom of inheritance, but as usual there +is a price. + +Most obviously, there is the necessity of retrieving the reference +address of an object for each data access. It's a minor inconvenience, +but it does clutter the code. + +More important (and less obvious) is the necessity of garbage +collection. When a normal object dies, anything stored in the +object body is garbage-collected by perl. With inside-out objects, +Perl knows nothing about the data stored in field hashes by a class, +but these must be deleted when the object goes out of scope. Thus +the class must provide a C<DESTROY> method to take care of that. + +In the presence of multiple classes it can be non-trivial +to make sure that every relevant destructor is called for +every object. Perl calls the first one it finds on the +inheritance tree (if any) and that's it. + +A related issue is thread-safety. When a new thread is created, +the Perl interpreter is cloned, which implies that all reference +addresses in use will be replaced with new ones. Thus, if a class +tries to access a field of a cloned object its (cloned) data will +still be stored under the now invalid reference address of the +original in the parent thread. A general C<CLONE> method must +be provided to re-establish the association. + +=head2 Solutions + +C<Hash::Util::FieldHash> addresses these issues on several +levels. + +The C<id()> function is provided in addition to the +existing C<Scalar::Util::refaddr()>. Besides its short name +it can be a little faster under some circumstances (and a +bit slower under others). Benchmark if it matters. The +working of C<id()> also allows the use of the class name +as a I<generic object> as described L<further down|/"The Generic Object">. + +The C<id()> function is incorporated in I<id hashes> in the sense +that it is called automatically on every key that is used with +the hash. No explicit call is necessary. + +The problems of garbage collection and thread safety are both +addressed by the function C<register()>. It registers an object +together with any number of hashes. Registry means that when the +object dies, an entry in any of the hashes under the reference +address of this object will be deleted. This guarantees garbage +collection in these hashes. It also means that on thread +cloning the object's entries in registered hashes will be +replaced with updated entries whose key is the cloned object's +reference address. Thus the object-data association becomes +thread-safe. + +Object registry is best done when the object is initialized +for use with a class. That way, garbage collection and thread +safety are established for every object and every field that is +initialized. + +Finally, I<field hashes> incorporate all these functions in one +package. Besides automatically calling the C<id()> function +on every object used as a key, the object is registered with +the field hash on first use. Classes based on field hashes +are fully garbage-collected and thread safe without further +measures. + +=head2 More Problems + +Another problem that occurs with inside-out classes is serialization. +Since the object data is not in its usual place, standard routines +like C<Storable::freeze()>, C<Storable::thaw()> and +C<Data::Dumper::Dumper()> can't deal with it on their own. Both +C<Data::Dumper> and C<Storable> provide the necessary hooks to +make things work, but the functions or methods used by the hooks +must be provided by each inside-out class. + +A general solution to the serialization problem would require another +level of registry, one that that associates I<classes> and fields. +So far, the functions of C<Hash::Util::FieldHash> are unaware of +any classes, which I consider a feature. Therefore C<Hash::Util::FieldHash> +doesn't address the serialization problems. + +=head2 The Generic Object + +Classes based on the C<id()> function (and hence classes based on +C<idhash()> and C<fieldhash()>) show a peculiar behavior in that +the class name can be used like an object. Specifically, methods +that set or read data associated with an object continue to work as +class methods, just as if the class name were an object, distinct from +all other objects, with its own data. This object may be called +the I<generic object> of the class. + +This works because field hashes respond to keys that are not references +like a normal hash would and use the string offered as the hash key. +Thus, if a method is called as a class method, the field hash is presented +with the class name instead of an object and blithely uses it as a key. +Since the keys of real objects are decimal numbers, there is no +conflict and the slot in the field hash can be used like any other. +The C<id()> function behaves correspondingly with respect to non-reference +arguments. + +Two possible uses (besides ignoring the property) come to mind. +A singleton class could be implemented this using the generic object. +If necessary, an C<init()> method could die or ignore calls with +actual objects (references), so only the generic object will ever exist. + +Another use of the generic object would be as a template. It is +a convenient place to store class-specific defaults for various +fields to be used in actual object initialization. + +Usually, the feature can be entirely ignored. Calling I<object +methods> as I<class methods> normally leads to an error and isn't used +routinely anywhere. It may be a problem that this error isn't +indicated by a class with a generic object. + +=head2 How to use Field Hashes + +Traditionally, the definition of an inside-out class contains a bare +block inside which a number of lexical hashes are declared and the +basic accessor methods defined, usually through C<Scalar::Util::refaddr>. +Further methods may be defined outside this block. There has to be +a DESTROY method and, for thread support, a CLONE method. + +When field hashes are used, the basic structure remains the same. +Each lexical hash will be made a field hash. The call to C<refaddr> +can be omitted from the accessor methods. DESTROY and CLONE methods +are not necessary. + +If you have an existing inside-out class, simply making all hashes +field hashes with no other change should make no difference. Through +the calls to C<refaddr> or equivalent, the field hashes never get to +see a reference and work like normal hashes. Your DESTROY (and +CLONE) methods are still needed. + +To make the field hashes kick in, it is easiest to redefine C<refaddr> +as + + sub refaddr { shift } + +instead of importing it from C<Scalar::Util>. It should now be possible +to disable DESTROY and CLONE. Note that while it isn't disabled, +DESTROY will be called before the garbage collection of field hashes, +so it will be invoked with a functional object and will continue to +function. + +It is not desirable to import the functions C<fieldhash> and/or +C<fieldhashes> into every class that is going to use them. They +are only used once to set up the class. When the class is up and running, +these functions serve no more purpose. + +If there are only a few field hashes to declare, it is simplest to + + use Hash::Util::FieldHash; + +early and call the functions qualified: + + Hash::Util::FieldHash::fieldhash my %foo; + +Otherwise, import the functions into a convenient package like +C<HUF> or, more general, C<Aux> + + { + package Aux; + use Hash::Util::FieldHash ':all'; + } + +and call + + Aux::fieldhash my %foo; + +as needed. + +=head2 Garbage-Collected Hashes + +Garbage collection in a field hash means that entries will "spontaneously" +disappear when the object that created them disappears. That must be +borne in mind, especially when looping over a field hash. If anything +you do inside the loop could cause an object to go out of scope, a +random key may be deleted from the hash you are looping over. That +can throw the loop iterator, so it's best to cache a consistent snapshot +of the keys and/or values and loop over that. You will still have to +check that a cached entry still exists when you get to it. + +Garbage collection can be confusing when keys are created in a field hash +from normal scalars as well as references. Once a reference is I<used> with +a field hash, the entry will be collected, even if it was later overwritten +with a plain scalar key (every positive integer is a candidate). This +is true even if the original entry was deleted in the meantime. In fact, +deletion from a field hash, and also a test for existence constitute +I<use> in this sense and create a liability to delete the entry when +the reference goes out of scope. If you happen to create an entry +with an identical key from a string or integer, that will be collected +instead. Thus, mixed use of references and plain scalars as field hash +keys is not entirely supported. + +=head1 EXAMPLES + +The examples show a very simple class that implements a I<name>, consisting +of a first and last name (no middle initial). The name class has four +methods: + +=over + +=item * C<init()> + +An object method that initializes the first and last name to its +two arguments. If called as a class method, C<init()> creates an +object in the given class and initializes that. + +=item * C<first()> + +Retrieve the first name + +=item * C<last()> + +Retrieve the last name + +=item * C<name()> + +Retrieve the full name, the first and last name joined by a blank. + +=back + +The examples show this class implemented with different levels of +support by C<Hash::Util::FieldHash>. All supported combinations +are shown. The difference between implementations is often quite +small. The implementations are: + +=over + +=item * C<Name_hash> + +A conventional (not inside-out) implementation where an object is +a hash that stores the field values, without support by +C<Hash::Util::FieldHash>. This implementation doesn't allow +arbitrary inheritance. + +=item * C<Name_id> + +Inside-out implementation based on the C<id()> function. It needs +a C<DESTROY> method. For thread support a C<CLONE> method (not shown) +would also be needed. Instead of C<Hash::Util::FieldHash::id()> the +function C<Scalar::Util::refaddr> could be used with very little +functional difference. This is the basic pattern of an inside-out +class. + +=item * C<Name_idhash> + +Idhash-based inside-out implementation. Like L<Name_id> it needs +a C<DESTROY> method and would need C<CLONE> for thread support. + +=item * C<Name_id_reg> + +Inside-out implementation based on the C<id()> function with explicit +object registry. No destructor is needed and objects are thread safe. + +=item * C<Name_idhash_reg> + +Idhash-based inside-out implementation with explicit object registry. +No destructor is needed and objects are thread safe. + +=item * C<Name_fieldhash> + +FieldHash-based inside-out implementation. Object registry happens +automatically. No destructor is needed and objects are thread safe. + +=back + +These examples are realized in the code below, which could be copied +to a file F<Example.pm>. + +=head2 Example 1 + + use strict; use warnings; + + { + package Name_hash; # standard implementation: the object is a hash + + sub init { + my $obj = shift; + my ($first, $last) = @_; + # create an object if called as class method + $obj = bless {}, $obj unless ref $obj; + $obj->{ first} = $first; + $obj->{ last} = $last; + $obj; + } + + sub first { shift()->{ first} } + sub last { shift()->{ last} } + + sub name { + my $n = shift; + join ' ' => $n->first, $n->last; + } + + } + + { + package Name_id; + use Hash::Util::FieldHash qw(id); + + my (%first, %last); + + sub init { + my $obj = shift; + my ($first, $last) = @_; + # create an object if called as class method + $obj = bless \ my $o, $obj unless ref $obj; + $first{ id $obj} = $first; + $last{ id $obj} = $last; + $obj; + } + + sub first { $first{ id shift()} } + sub last { $last{ id shift()} } + + sub name { + my $n = shift; + join ' ' => $n->first, $n->last; + } + + sub DESTROY { + my $id = id shift; + delete $first{ $id}; + delete $last{ $id}; + } + + } + + { + package Name_idhash; + use Hash::Util::FieldHash; + + Hash::Util::FieldHash::idhashes( \ my (%first, %last) ); + + sub init { + my $obj = shift; + my ($first, $last) = @_; + # create an object if called as class method + $obj = bless \ my $o, $obj unless ref $obj; + $first{ $obj} = $first; + $last{ $obj} = $last; + $obj; + } + + sub first { $first{ shift()} } + sub last { $last{ shift()} } + + sub name { + my $n = shift; + join ' ' => $n->first, $n->last; + } + + sub DESTROY { + my $n = shift; + delete $first{ $n}; + delete $last{ $n}; + } + + } + + { + package Name_id_reg; + use Hash::Util::FieldHash qw(id register); + + my (%first, %last); + + sub init { + my $obj = shift; + my ($first, $last) = @_; + # create an object if called as class method + $obj = bless \ my $o, $obj unless ref $obj; + register( $obj, \ (%first, %last) ); + $first{ id $obj} = $first; + $last{ id $obj} = $last; + $obj; + } + + sub first { $first{ id shift()} } + sub last { $last{ id shift()} } + + sub name { + my $n = shift; + join ' ' => $n->first, $n->last; + } + } + + { + package Name_idhash_reg; + use Hash::Util::FieldHash qw(register); + + Hash::Util::FieldHash::idhashes \ my (%first, %last); + + sub init { + my $obj = shift; + my ($first, $last) = @_; + # create an object if called as class method + $obj = bless \ my $o, $obj unless ref $obj; + register( $obj, \ (%first, %last) ); + $first{ $obj} = $first; + $last{ $obj} = $last; + $obj; + } + + sub first { $first{ shift()} } + sub last { $last{ shift()} } + + sub name { + my $n = shift; + join ' ' => $n->first, $n->last; + } + } + + { + package Name_fieldhash; + use Hash::Util::FieldHash; + + Hash::Util::FieldHash::fieldhashes \ my (%first, %last); + + sub init { + my $obj = shift; + my ($first, $last) = @_; + # create an object if called as class method + $obj = bless \ my $o, $obj unless ref $obj; + $first{ $obj} = $first; + $last{ $obj} = $last; + $obj; + } + + sub first { $first{ shift()} } + sub last { $last{ shift()} } + + sub name { + my $n = shift; + join ' ' => $n->first, $n->last; + } + } + + 1; + +To exercise the various implementations the script L<below|/"Example 2"> can +be used. + +It sets up a class C<Name> that is a mirror of one of the implementation +classes C<Name_hash>, C<Name_id>, ..., C<Name_fieldhash>. That determines +which implementation is run. + +The script first verifies the function of the C<Name> class. + +In the second step, the free inheritability of the implementation +(or lack thereof) is demonstrated. For this purpose it constructs +a class called C<NamedFile> which is a common subclass of C<Name> and +the standard class C<IO::File>. This puts inheritability to the test +because objects of C<IO::File> I<must> be globrefs. Objects of C<NamedFile> +should behave like a file opened for reading and also support the C<name()> +method. This class juncture works with exception of the C<Name_hash> +implementation, where object initialization fails because of the +incompatibility of object bodies. + +=head2 Example 2 + + use strict; use warnings; $| = 1; + + use Example; + + { + package Name; + use base 'Name_id'; # define here which implementation to run + } + + + # Verify that the base package works + my $n = Name->init(qw(Albert Einstein)); + print $n->name, "\n"; + print "\n"; + + # Create a named file handle (See definition below) + my $nf = NamedFile->init(qw(/tmp/x Filomena File)); + # use as a file handle... + for ( 1 .. 3 ) { + my $l = <$nf>; + print "line $_: $l"; + } + # ...and as a Name object + print "...brought to you by ", $nf->name, "\n"; + exit; + + + # Definition of NamedFile + package NamedFile; + use base 'Name'; + use base 'IO::File'; + + sub init { + my $obj = shift; + my ($file, $first, $last) = @_; + $obj = $obj->IO::File::new() unless ref $obj; + $obj->open($file) or die "Can't read '$file': $!"; + $obj->Name::init($first, $last); + } + __END__ + + +=head1 GUTS + +To make C<Hash::Util::FieldHash> work, there were two changes to +F<perl> itself. C<PERL_MAGIC_uvar> was made avalaible for hashes, +and weak references now call uvar C<get> magic after a weakref has been +cleared. The first feature is used to make field hashes intercept +their keys upon access. The second one triggers garbage collection. + +=head2 The C<PERL_MAGIC_uvar> interface for hashes + +C<PERL_MAGIC_uvar> I<get> magic is called from C<hv_fetch_common> and +C<hv_delete_common> through the function C<hv_magic_uvar_xkey>, which +defines the interface. The call happens for hashes with "uvar" magic +if the C<ufuncs> structure has equal values in the C<uf_val> and C<uf_set> +fields. Hashes are unaffected if (and as long as) these fields +hold different values. + +Upon the call, the C<mg_obj> field will hold the hash key to be accessed. +Upon return, the C<SV*> value in C<mg_obj> will be used in place of the +original key in the hash access. The integer index value in the first +parameter will be the C<action> value from C<hv_fetch_common>, or -1 +if the call is from C<hv_delete_common>. + +This is a template for a function suitable for the C<uf_val> field in +a C<ufuncs> structure for this call. The C<uf_set> and C<uf_index> +fields are irrelevant. + + IV watch_key(pTHX_ IV action, SV* field) { + MAGIC* mg = mg_find(field, PERL_MAGIC_uvar); + SV* keysv = mg->mg_obj; + /* Do whatever you need to. If you decide to + supply a different key newkey, return it like this + */ + sv_2mortal(newkey); + mg->mg_obj = newkey; + return 0; + } + +=head2 Weakrefs call uvar magic + +When a weak reference is stored in an C<SV> that has "uvar" magic, C<set> +magic is called after the reference has gone stale. This hook can be +used to trigger further garbage-collection activities associated with +the referenced object. + +=head2 How field hashes work + +The three features of key hashes, I<key replacement>, I<thread support>, +and I<garbage collection> are supported by a data structure called +the I<object registry>. This is a private hash where every object +is stored. An "object" in this sense is any reference (blessed or +unblessed) that has been used as a field hash key. + +The object registry keeps track of references that have been used as +field hash keys. The keys are generated from the reference address +like in a field hash (though the registry isn't a field hash). Each +value is a weak copy of the original reference, stored in an C<SV> that +is itself magical (C<PERL_MAGIC_uvar> again). The magical structure +holds a list (another hash, really) of field hashes that the reference +has been used with. When the weakref becomes stale, the magic is +activated and uses the list to delete the reference from all field +hashes it has been used with. After that, the entry is removed from +the object registry itself. Implicitly, that frees the magic structure +and the storage it has been using. + +Whenever a reference is used as a field hash key, the object registry +is checked and a new entry is made if necessary. The field hash is +then added to the list of fields this reference has used. + +The object registry is also used to repair a field hash after thread +cloning. Here, the entire object registry is processed. For every +reference found there, the field hashes it has used are visited and +the entry is updated. + +=head2 Internal function Hash::Util::FieldHash::_fieldhash + + # test if %hash is a field hash + my $result = _fieldhash \ %hash, 0; + + # make %hash a field hash + my $result = _fieldhash \ %hash, 1; + +C<_fieldhash> is the internal function used to create field hashes. +It takes two arguments, a hashref and a mode. If the mode is boolean +false, the hash is not changed but tested if it is a field hash. If +the hash isn't a field hash the return value is boolean false. If it +is, the return value indicates the mode of field hash. When called with +a boolean true mode, it turns the given hash into a field hash of this +mode, returning the mode of the created field hash. C<_fieldhash> +does not erase the given hash. + +Currently there is only one type of field hash, and only the boolean +value of the mode makes a difference, but that may change. + +=head1 AUTHOR + +Anno Siegel (ANNO) wrote the xs code and the changes in perl proper +Jerry Hedden (JDHEDDEN) made it faster + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2006-2007 by (Anno Siegel) + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.8.7 or, +at your option, any later version of Perl 5 you may have available. + +=cut diff --git a/ext/Hash-Util-FieldHash/t/01_load.t b/ext/Hash-Util-FieldHash/t/01_load.t new file mode 100644 index 0000000000..222ef9ae14 --- /dev/null +++ b/ext/Hash-Util-FieldHash/t/01_load.t @@ -0,0 +1,57 @@ +#!perl + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir 't' if -d 't'; + @INC = '../lib'; + } +} + +use strict; use warnings; + +use Test::More tests => 8; + +# see that Hash::Util::FieldHash and Hash::Util load and export what +# they should + +BEGIN { + use_ok( 'Hash::Util'); + ok( defined( &Hash::Util::lock_keys), "Hash::Util::lock_keys found"); + ok( !defined( &Hash::Util::FieldHash::fieldhashes), + "Hash::Util::FieldHash not loaded", + ); +} + +package one; +use Test::More; +use Hash::Util qw( lock_keys); +BEGIN { + ok( defined( &lock_keys), "lock_keys imported from Hash::Util"); +} + +use Hash::Util qw( fieldhashes); +BEGIN { + ok( defined( &Hash::Util::FieldHash::fieldhashes), + "Hash::Util::FieldHash loaded", + ); + ok( defined( &fieldhashes), + "fieldhashes imported from Hash::Util", + ); +} + +package two; +use Test::More; +use Hash::Util::FieldHash qw( fieldhashes); +BEGIN { + ok( defined( &fieldhashes), + "fieldhashes imported from Hash::Util::FieldHash", + ); +} + +use Hash::Util::FieldHash qw( :all); +BEGIN { + ok( defined( &fieldhash), + "fieldhash imported from Hash::Util::FieldHash via :all", + ); +} + diff --git a/ext/Hash-Util-FieldHash/t/02_function.t b/ext/Hash-Util-FieldHash/t/02_function.t new file mode 100644 index 0000000000..738209c146 --- /dev/null +++ b/ext/Hash-Util-FieldHash/t/02_function.t @@ -0,0 +1,336 @@ +#!perl + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir 't' if -d 't'; + @INC = '../lib'; + } +} + +use strict; use warnings; +use Test::More; +my $n_tests = 0; + +use Hash::Util::FieldHash qw( :all); +my $ob_reg = Hash::Util::FieldHash::_ob_reg; + +######################### + +my $fieldhash_mode = 2; + +# define ref types to use with some tests +my @test_types; +BEGIN { + # skipping CODE refs, they are differently scoped + @test_types = qw( SCALAR ARRAY HASH GLOB); +} + +### The id() function +{ + BEGIN { $n_tests += 4 } + my $ref = []; + is id( $ref), refaddr( $ref), "id is refaddr"; + my %h; + Hash::Util::FieldHash::_fieldhash \ %h, $fieldhash_mode; + $h{ $ref} = (); + my ( $key) = keys %h; + is id( $ref), $key, "id is FieldHash key"; + my $scalar = 'string'; + is id( $scalar), $scalar, "string passes unchanged"; + $scalar = 1234; + is id( $scalar), $scalar, "number passes unchanged"; +} + +### idhash functionality +{ + BEGIN { $n_tests += 3 } + Hash::Util::FieldHash::idhash my %h; + my $ref = sub {}; + my $val = 123; + $h{ $ref} = $val; + my ( $key) = keys %h; + is $key, id( $ref), "idhash key correct"; + is $h{ $ref}, $val, "value retrieved through ref"; + is scalar keys %$ob_reg, 0, "no auto-registry in idhash"; +} + +### the register() and id_2obj functions +{ + BEGIN { $n_tests += 9 } + my $obj = {}; + my $id = id( $obj); + is id_2obj( $id), undef, "unregistered object not retrieved"; + is scalar keys %$ob_reg, 0, "object registry empty"; + is register( $obj), $obj, "object returned by register"; + is scalar keys %$ob_reg, 1, "object registry nonempty"; + is id_2obj( $id), $obj, "registered object retrieved"; + my %hash; + register( $obj, \ %hash); + $hash{ $id} = 123; + is scalar keys %hash, 1, "key present in registered hash"; + undef $obj; + is scalar keys %hash, 0, "key collected from registered hash"; + is scalar keys %$ob_reg, 0, "object registry empty again"; + eval { register( 1234) }; + like $@, qr/^Attempt to register/, "registering non-ref is fatal"; + +} + +### Object auto-registry + +BEGIN { $n_tests += 3 } +{ + { + my $obj = {}; + { + my $h = {}; + Hash::Util::FieldHash::_fieldhash $h, $fieldhash_mode; + $h->{ $obj} = 123; + is( keys %$ob_reg, 1, "one object registered"); + } + # field hash stays alive until $obj dies + is( keys %$ob_reg, 1, "object still registered"); + } + is( keys %$ob_reg, 0, "object unregistered"); +} + +### existence/retrieval/deletion +BEGIN { $n_tests += 6 } +{ + no warnings 'misc'; + my $val = 123; + Hash::Util::FieldHash::_fieldhash \ my( %h), $fieldhash_mode; + for ( [ str => 'abc'], [ ref => {}] ) { + my ( $keytype, $key) = @$_; + $h{ $key} = $val; + ok( exists $h{ $key}, "existence ($keytype)"); + is( $h{ $key}, $val, "retrieval ($keytype)"); + delete $h{ $key}; + is( keys %h, 0, "deletion ($keytype)"); + } +} + +### id-action (stringification independent of bless) +BEGIN { $n_tests += 5 } +# use Scalar::Util qw( refaddr); +{ + my( %f, %g, %h, %i); + Hash::Util::FieldHash::_fieldhash \ %f, $fieldhash_mode; + Hash::Util::FieldHash::_fieldhash \ %g, $fieldhash_mode; + my $val = 123; + my $key = []; + $f{ $key} = $val; + is( $f{ $key}, $val, "plain key set in field"); + my ( $id) = keys %f; + my $refaddr = refaddr($key); + is $id, $refaddr, "key is refaddr"; + bless $key; + is( $f{ $key}, $val, "access through blessed"); + $key = []; + $h{ $key} = $val; + is( $h{ $key}, $val, "plain key set in hash"); + bless $key; + isnt( $h{ $key}, $val, "no access through blessed"); +} + +# Garbage collection +BEGIN { $n_tests += 1 + 2*( 3*@test_types + 5) + 1 + 2 } + +{ + my %h; + Hash::Util::FieldHash::_fieldhash \ %h, $fieldhash_mode; + $h{ []} = 123; + is( keys %h, 0, "blip"); +} + +for my $preload ( [], [ map {}, 1 .. 3] ) { + my $pre = @$preload ? ' (preloaded)' : ''; + my %f; + Hash::Util::FieldHash::_fieldhash \ %f, $fieldhash_mode; + my @preval = map "$_", @$preload; + @f{ @$preload} = @preval; + # Garbage collection separately + for my $type ( @test_types) { + { + my $ref = gen_ref( $type); + $f{ $ref} = $type; + my ( $val) = grep $_ eq $type, values %f; + is( $val, $type, "$type visible$pre"); + is( + keys %$ob_reg, + 1 + @$preload, + "$type obj registered$pre" + ); + } + is( keys %f, @$preload, "$type gone$pre"); + } + + # Garbage collection collectively + is( keys %$ob_reg, @$preload, "no objs remaining$pre"); + { + my @refs = map gen_ref( $_), @test_types; + @f{ @refs} = @test_types; + ok( + eq_set( [ values %f], [ @test_types, @preval]), + "all types present$pre", + ); + is( + keys %$ob_reg, + @test_types + @$preload, + "all types registered$pre", + ); + } + die "preload gone" unless defined $preload; + ok( eq_set( [ values %f], \ @preval), "all types gone$pre"); + is( keys %$ob_reg, @$preload, "all types unregistered$pre"); +} +is( keys %$ob_reg, 0, "preload gone after loop"); + +# autovivified key +{ + my %h; + Hash::Util::FieldHash::_fieldhash \ %h, $fieldhash_mode; + my $ref = {}; + my $x = $h{ $ref}->[ 0]; + is keys %h, 1, "autovivified key present"; + undef $ref; + is keys %h, 0, "autovivified key collected"; +} + +# big key sets +BEGIN { $n_tests += 8 } +{ + my $size = 10_000; + my %f; + Hash::Util::FieldHash::_fieldhash \ %f, $fieldhash_mode; + { + my @refs = map [], 1 .. $size; + $f{ $_} = 1 for @refs; + is( keys %f, $size, "many keys singly"); + is( + keys %$ob_reg, + $size, + "many objects singly", + ); + } + is( keys %f, 0, "many keys singly gone"); + is( + keys %$ob_reg, + 0, + "many objects singly unregistered", + ); + + { + my @refs = map [], 1 .. $size; + @f{ @refs } = ( 1) x @refs; + is( keys %f, $size, "many keys at once"); + is( + keys %$ob_reg, + $size, + "many objects at once", + ); + } + is( keys %f, 0, "many keys at once gone"); + is( + keys %$ob_reg, + 0, + "many objects at once unregistered", + ); +} + +# many field hashes +BEGIN { $n_tests += 6 } +{ + my $n_fields = 1000; + my @fields = map {}, $n_fields; + Hash::Util::FieldHash::_fieldhash( $_, $fieldhash_mode) for @fields; + my @obs = map gen_ref( $_), @test_types; + my $n_obs = @obs; + for my $field ( @fields ) { + @{ $field }{ @obs} = map ref, @obs; + } + my $err = grep keys %$_ != @obs, @fields; + is( $err, 0, "$n_obs entries in $n_fields fields"); + is( keys %$ob_reg, @obs, "$n_obs obs registered"); + pop @obs; + $err = grep keys %$_ != @obs, @fields; + is( $err, 0, "one entry gone from $n_fields fields"); + is( keys %$ob_reg, @obs, "one ob unregistered"); + @obs = (); + $err = grep keys %$_ != @obs, @fields; + is( $err, 0, "all entries gone from $n_fields fields"); + is( keys %$ob_reg, @obs, "all obs unregistered"); +} + + +# direct hash assignment +BEGIN { $n_tests += 4 } +{ + Hash::Util::FieldHash::_fieldhash( $_, $fieldhash_mode) for \ my( %f, %g, %h); + my $size = 6; + my @obs = map [], 1 .. $size; + @f{ @obs} = ( 1) x $size; + $g{ $_} = $f{ $_} for keys %f; # single assignment + %h = %f; # wholesale assignment + @obs = (); + is keys %$ob_reg, 0, "all keys collected"; + is keys %f, 0, "orig garbage-collected"; + is keys %g, 0, "single-copy garbage-collected"; + is keys %h, 0, "wholesale-copy garbage-collected"; +} + +{ + # prototypes in place? + my %proto_tab = ( + fieldhash => '\\%', + fieldhashes => '', + idhash => '\\%', + idhashes => '', + id => '$', + id_2obj => '$', + register => '$@', + ); + + + my @notfound = grep !exists $proto_tab{ $_} => + @Hash::Util::FieldHash::EXPORT_OK; + ok @notfound == 0, "All exports in table"; + is prototype( "Hash::Util::FieldHash::$_") || '', $proto_tab{ $_}, + "$_ has prototype ($proto_tab{ $_})" for + @Hash::Util::FieldHash::EXPORT_OK; + + BEGIN { $n_tests += 1 + @Hash::Util::FieldHash::EXPORT_OK } +} + +{ + BEGIN { $n_tests += 1 } + Hash::Util::FieldHash::_fieldhash \ my( %h), $fieldhash_mode; + bless \ %h, 'abc'; # this bus-errors with a certain bug + ok( 1, "no bus error on bless") +} + +BEGIN { plan tests => $n_tests } + +####################################################################### + +sub refaddr { + # silence possible warnings from hex() on 64bit systems + no warnings 'portable'; + + my $ref = shift; + hex +($ref =~ /\(0x([[:xdigit:]]+)\)$/)[ 0]; +} + +use Symbol qw( gensym); + +BEGIN { + my %gen = ( + SCALAR => sub { \ my $o }, + ARRAY => sub { [] }, + HASH => sub { {} }, + GLOB => sub { gensym }, + CODE => sub { sub {} }, + ); + + sub gen_ref { $gen{ shift()}->() } +} diff --git a/ext/Hash-Util-FieldHash/t/03_class.t b/ext/Hash-Util-FieldHash/t/03_class.t new file mode 100644 index 0000000000..f064fcc054 --- /dev/null +++ b/ext/Hash-Util-FieldHash/t/03_class.t @@ -0,0 +1,118 @@ +#!perl + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir 't' if -d 't'; + @INC = '../lib'; + } +} + +use strict; use warnings; +use Test::More; +my $n_tests = 0; + +use Config; +BEGIN { $n_tests += 2 } +{ + my $p = Impostor->new( 'Donald Duck'); + is( $p->greeting, "Hi, I'm Donald Duck", "blank title"); + $p->assume_title( 'Mr'); + is( $p->greeting, "Hi, I'm Mr Donald Duck", "changed title"); +} + +# thread support? +BEGIN { $n_tests += 5 } +SKIP: { + skip "No thread support", 5 unless $Config{ usethreads}; + require threads; + treads->import if threads->can( 'import'); + + my $ans; + my $p = Impostor->new( 'Donald Duck'); + $ans = threads->create( sub { $p->greeting })->join; + is( $ans, "Hi, I'm Donald Duck", "thread: blank title"); + $p->assume_title( 'Mr'); + $ans = threads->create( sub { $p->greeting })->join; + is( $ans, "Hi, I'm Mr Donald Duck", "thread: changed title"); + $ans = threads->create( + sub { + $p->assume_title( 'Uncle'); + $p->greeting; + } + )->join; + is( $ans, "Hi, I'm Uncle Donald Duck", "thread: local change"); + is( $p->greeting, "Hi, I'm Mr Donald Duck", "thread: change is local"); + + # second generation thread + $ans = threads->create( + sub { + threads->create( sub { $p->greeting })->join; + } + )->join; + is( $ans, "Hi, I'm Mr Donald Duck", "double thread: got greeting"); +} + +BEGIN { plan tests => $n_tests } + +############################################################################ + +# must do this in BEGIN so that field hashes are declared before +# first use above + +BEGIN { + package CFF; + use Hash::Util::FieldHash qw( :all); + + package Person; + + { + CFF::fieldhash my %name; + CFF::fieldhash my %title; + + sub init { + my $p = shift; + $name{ $p} = shift || ''; + $title{ $p} = shift || ''; + $p; + } + + sub name { $name{ shift()} } + sub title { $title{ shift() } } + } + + sub new { + my $class = shift; + bless( \ my $x, $class)->init( @_); + } + + sub greeting { + my $p = shift; + my $greet = "Hi, I'm "; + $_ and $greet .= "$_ " for $p->title; + $greet .= $p->name; + $greet; + } + + package Impostor; + use base 'Person'; + + { + CFF::fieldhash my %assumed_title; + + sub init { + my $p = shift; + my ( $name, $title) = @_; + $p->Person::init( $name, $title); + $p->assume_title( $title); + $p; + } + + sub title { $assumed_title{ shift()} } + + sub assume_title { + my $p = shift; + $assumed_title{ $p} = shift || ''; + $p; + } + } +} diff --git a/ext/Hash-Util-FieldHash/t/04_thread.t b/ext/Hash-Util-FieldHash/t/04_thread.t new file mode 100644 index 0000000000..b74d2c8052 --- /dev/null +++ b/ext/Hash-Util-FieldHash/t/04_thread.t @@ -0,0 +1,70 @@ +#!perl + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir 't' if -d 't'; + @INC = '../lib'; + } +} + +use strict; use warnings; +use Test::More; +my $n_tests; + +use Hash::Util::FieldHash qw( :all); +my $ob_reg = Hash::Util::FieldHash::_ob_reg; + +{ + my $n_basic; + BEGIN { + $n_basic = 6; # 6 tests per call of basic_func() + $n_tests += 5*$n_basic; + } + my %h; + fieldhash %h; + + sub basic_func { + my $level = shift; + + my @res; + my $push_is = sub { + my ( $hash, $should, $name) = @_; + push @res, [ scalar keys %$hash, $should, $name]; + }; + + my $obj = []; + $push_is->( \ %h, 0, "$level: initially clear"); + $push_is->( $ob_reg, 0, "$level: ob_reg initially clear"); + $h{ $obj} = 123; + $push_is->( \ %h, 1, "$level: one object"); + $push_is->( $ob_reg, 1, "$level: ob_reg one object"); + undef $obj; + $push_is->( \ %h, 0, "$level: garbage collected"); + $push_is->( $ob_reg, 0, "$level: ob_reg garbage collected"); + @res; + } + + &is( @$_) for basic_func( "home"); + + SKIP: { + require Config; + skip "No thread support", 3*$n_basic unless + $Config::Config{ usethreads}; + require threads; + my ( $t) = threads->create( \ &basic_func, "thread 1"); + &is( @$_) for $t->join; + + &is( @$_) for basic_func( "back home"); + + ( $t) = threads->create( sub { + my ( $t) = threads->create( \ &basic_func, "thread 2"); + $t->join; + }); + &is( @$_) for $t->join; + } + + &is( @$_) for basic_func( "back home again"); + +} + +BEGIN { plan tests => $n_tests } diff --git a/ext/Hash-Util-FieldHash/t/05_perlhook.t b/ext/Hash-Util-FieldHash/t/05_perlhook.t new file mode 100644 index 0000000000..3a4ebc8851 --- /dev/null +++ b/ext/Hash-Util-FieldHash/t/05_perlhook.t @@ -0,0 +1,215 @@ +#!perl + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir 't' if -d 't'; + @INC = '../lib'; + } +} + +use strict; use warnings; +use Test::More; +my $n_tests; + +use Hash::Util::FieldHash; +use Scalar::Util qw( weaken); + +# The functions in Hash::Util::FieldHash +# _test_uvar_get, _test_uvar_get and _test_uvar_both + +# _test_uvar_get( $anyref, \ $counter) makes the referent of $anyref +# "uvar"-magical with get magic only. $counter is reset if the magic +# could be established. $counter will be incremented each time the +# magic "get" function is called. + +# _test_uvar_set does the same for "set" magic. _test_uvar_both +# sets both magic functions identically. Both use the same counter. + +# magical weak ref (patch to sv.c) +{ + my( $magref, $counter); + + $counter = 123; + Hash::Util::FieldHash::_test_uvar_set( \ $magref, \ $counter); + is( $counter, 0, "got magical scalar"); + + my $ref = []; + $magref = $ref; + is( $counter, 1, "store triggers magic"); + + weaken $magref; + is( $counter, 1, "weaken doesn't trigger magic"); + + { my $x = $magref } + is( $counter, 1, "read doesn't trigger magic"); + + undef $ref; + is( $counter, 2, "ref expiry triggers magic (weakref patch worked)"); + + is( $magref, undef, "weak ref works normally"); + + # same, but overwrite weakref before expiry + $counter = 0; + weaken( $magref = $ref = []); + is( $counter, 1, "setup for overwrite"); + + $magref = my $other_ref = []; + is( $counter, 2, "overwrite triggers"); + + undef $ref; + is( $counter, 2, "ref expiry doesn't trigger after overwrite"); + + is( $magref, $other_ref, "weak ref doesn't kill overwritten value"); + + BEGIN { $n_tests += 10 } +} + +# magical hash (patches to mg.c and hv.c) +{ + # the hook is only sensitive if the set function is NULL + my ( %h, $counter); + $counter = 123; + Hash::Util::FieldHash::_test_uvar_get( \ %h, \ $counter); + is( $counter, 0, "got magical hash"); + + %h = ( abc => 123); + is( $counter, 1, "list assign triggers"); + + + my $x = keys %h; + is( $counter, 1, "scalar keys doesn't trigger"); + is( $x, 1, "there is one key"); + + my (@x) = keys %h; + is( $counter, 1, "list keys doesn't trigger"); + is( "@x", "abc", "key is correct"); + + $x = values %h; + is( $counter, 1, "scalar values doesn't trigger"); + is( $x, 1, "the value is correct"); + + (@x) = values %h; + is( $counter, 1, "list values doesn't trigger"); + is( "@x", "123", "the value is correct"); + + $x = each %h; + is( $counter, 1, "scalar each doesn't trigger"); + is( $x, "abc", "the return is correct"); + + $x = each %h; + is( $counter, 1, "scalar each doesn't trigger"); + is( $x, undef, "the return is correct"); + + (@x) = each %h; + is( $counter, 1, "list each doesn't trigger"); + is( "@x", "abc 123", "the return is correct"); + + $x = %h; + is( $counter, 1, "hash in scalar context doesn't trigger"); + like( $x, qr!^\d+/\d+$!, "correct result"); + + (@x) = %h; + is( $counter, 1, "hash in list context doesn't trigger"); + is( "@x", "abc 123", "correct result"); + + + $h{ def} = 456; + is( $counter, 2, "lvalue assign triggers"); + + (@x) = sort %h; + is( $counter, 2, "hash in list context doesn't trigger"); + is( "@x", "123 456 abc def", "correct result"); + + exists $h{ def}; + is( $counter, 3, "good exists triggers"); + + exists $h{ xyz}; + is( $counter, 4, "bad exists triggers"); + + delete $h{ def}; + is( $counter, 5, "good delete triggers"); + + (@x) = sort %h; + is( $counter, 5, "hash in list context doesn't trigger"); + is( "@x", "123 abc", "correct result"); + + delete $h{ xyz}; + is( $counter, 6, "bad delete triggers"); + + (@x) = sort %h; + is( $counter, 6, "hash in list context doesn't trigger"); + is( "@x", "123 abc", "correct result"); + + $x = $h{ abc}; + is( $counter, 7, "good read triggers"); + + $x = $h{ xyz}; + is( $counter, 8, "bad read triggers"); + + (@x) = sort %h; + is( $counter, 8, "hash in list context doesn't trigger"); + is( "@x", "123 abc", "correct result"); + + + bless \ %h; + is( $counter, 8, "bless doesn't trigger"); + + bless \ %h, 'xyz'; + is( $counter, 8, "bless doesn't trigger"); + + # see that normal set magic doesn't trigger (identity condition) + my %i; + Hash::Util::FieldHash::_test_uvar_set( \ %i, \ $counter); + is( $counter, 0, "got magical hash"); + + %i = ( abc => 123); + $i{ def} = 456; + exists $i{ def}; + exists $i{ xyz}; + delete $i{ def}; + delete $i{ xyz}; + $x = $i{ abc}; + $x = $i{ xyz}; + $x = keys %i; + () = keys %i; + $x = values %i; + () = values %i; + $x = each %i; + () = each %i; + + is( $counter, 0, "normal set magic never triggers"); + + bless \ %i, 'abc'; + is( $counter, 1, "...except with bless"); + + # see that magic with both set and get doesn't trigger + $counter = 123; + my %j; + Hash::Util::FieldHash::_test_uvar_same( \ %j, \ $counter); + is( $counter, 0, "got magical hash"); + + %j = ( abc => 123); + $j{ def} = 456; + exists $j{ def}; + exists $j{ xyz}; + delete $j{ def}; + delete $j{ xyz}; + $x = $j{ abc}; + $x = $j{ xyz}; + $x = keys %j; + () = keys %j; + $x = values %j; + () = values %j; + $x = each %j; + () = each %j; + + is( $counter, 0, "get/set magic never triggers"); + + bless \ %j, 'abc'; + is( $counter, 1, "...except for bless"); + + BEGIN { $n_tests += 43 } +} + +BEGIN { plan tests => $n_tests } + diff --git a/ext/Hash-Util-FieldHash/t/10_hash.t b/ext/Hash-Util-FieldHash/t/10_hash.t new file mode 100644 index 0000000000..29c2f4dafa --- /dev/null +++ b/ext/Hash-Util-FieldHash/t/10_hash.t @@ -0,0 +1,118 @@ +#!./perl -w + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir 't' if -d 't'; + @INC = '../lib'; + } +} + +use Test::More; + +use strict; +use Hash::Util::FieldHash qw( :all); + +no warnings 'misc'; + +plan tests => 5; + +fieldhash my %h; + +ok (!Internals::HvREHASH(%h), "hash doesn't start with rehash flag on"); + +foreach (1..10) { + $h{"\0"x$_}++; +} + +ok (!Internals::HvREHASH(%h), "10 entries doesn't trigger rehash"); + +foreach (11..20) { + $h{"\0"x$_}++; +} + +ok (Internals::HvREHASH(%h), "20 entries triggers rehash"); + + + + +# second part using an emulation of the PERL_HASH in perl, mounting an +# attack on a pre-populated hash. This is also useful if you need normal +# keys which don't contain \0 -- suitable for stashes + +use constant MASK_U32 => 2**32; +use constant HASH_SEED => 0; +use constant THRESHOLD => 14; +use constant START => "a"; + +# some initial hash data +fieldhash my %h2; +%h2 = map {$_ => 1} 'a'..'cc'; + +ok (!Internals::HvREHASH(%h2), + "starting with pre-populated non-pathological hash (rehash flag if off)"); + +my @keys = get_keys(\%h2); +$h2{$_}++ for @keys; +ok (Internals::HvREHASH(%h2), + scalar(@keys) . " colliding into the same bucket keys are triggering rehash"); + +sub get_keys { + my $hr = shift; + + # the minimum of bits required to mount the attack on a hash + my $min_bits = log(THRESHOLD)/log(2); + + # if the hash has already been populated with a significant amount + # of entries the number of mask bits can be higher + my $keys = scalar keys %$hr; + my $bits = $keys ? log($keys)/log(2) : 0; + $bits = $min_bits if $min_bits > $bits; + + $bits = int($bits) < $bits ? int($bits) + 1 : int($bits); + # need to add 2 bits to cover the internal split cases + $bits += 2; + my $mask = 2**$bits-1; + print "# using mask: $mask ($bits)\n"; + + my @keys; + my $s = START; + my $c = 0; + # get 2 keys on top of the THRESHOLD + my $hash; + while (@keys < THRESHOLD+2) { + # next if exists $hash->{$s}; + $hash = hash($s); + next unless ($hash & $mask) == 0; + $c++; + printf "# %2d: %5s, %10s\n", $c, $s, $hash; + push @keys, $s; + } continue { + $s++; + } + + return @keys; +} + + +# trying to provide the fastest equivalent of C macro's PERL_HASH in +# Perl - the main complication is that it uses U32 integer, which we +# can't do it perl, without doing some tricks +sub hash { + my $s = shift; + my @c = split //, $s; + my $u = HASH_SEED; + for (@c) { + # (A % M) + (B % M) == (A + B) % M + # This works because '+' produces a NV, which is big enough to hold + # the intermediate result. We only need the % before any "^" and "&" + # to get the result in the range for an I32. + # and << doesn't work on NV, so using 1 << 10 + $u += ord; + $u += $u * (1 << 10); $u %= MASK_U32; + $u ^= $u >> 6; + } + $u += $u << 3; $u %= MASK_U32; + $u ^= $u >> 11; $u %= MASK_U32; + $u += $u << 15; $u %= MASK_U32; + $u; +} diff --git a/ext/Hash-Util-FieldHash/t/11_hashassign.t b/ext/Hash-Util-FieldHash/t/11_hashassign.t new file mode 100644 index 0000000000..a42682fe7d --- /dev/null +++ b/ext/Hash-Util-FieldHash/t/11_hashassign.t @@ -0,0 +1,321 @@ +#!./perl -w + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir 't' if -d 't'; + @INC = '../lib'; + } +} + +use Test::More; + +# use strict; +use Hash::Util::FieldHash qw( :all); +no warnings 'misc'; + +plan tests => 215; + +my @comma = ("key", "value"); + +# The peephole optimiser already knows that it should convert the string in +# $foo{string} into a shared hash key scalar. It might be worth making the +# tokeniser build the LHS of => as a shared hash key scalar too. +# And so there's the possiblility of it going wrong +# And going right on 8 bit but wrong on utf8 keys. +# And really we should also try utf8 literals in {} and => in utf8.t + +# Some of these tests are (effectively) duplicated in each.t +fieldhash my %comma; +%comma = @comma; +ok (keys %comma == 1, 'keys on comma hash'); +ok (values %comma == 1, 'values on comma hash'); +# defeat any tokeniser or optimiser cunning +my $key = 'ey'; +is ($comma{"k" . $key}, "value", 'is key present? (unoptimised)'); +# now with cunning: +is ($comma{key}, "value", 'is key present? (maybe optimised)'); +#tokeniser may treat => differently. +my @temp = (key=>undef); +is ($comma{$temp[0]}, "value", 'is key present? (using LHS of =>)'); + +@temp = %comma; +ok (eq_array (\@comma, \@temp), 'list from comma hash'); + +@temp = each %comma; +ok (eq_array (\@comma, \@temp), 'first each from comma hash'); +@temp = each %comma; +ok (eq_array ([], \@temp), 'last each from comma hash'); + +my %temp = %comma; + +ok (keys %temp == 1, 'keys on copy of comma hash'); +ok (values %temp == 1, 'values on copy of comma hash'); +is ($temp{'k' . $key}, "value", 'is key present? (unoptimised)'); +# now with cunning: +is ($temp{key}, "value", 'is key present? (maybe optimised)'); +@temp = (key=>undef); +is ($comma{$temp[0]}, "value", 'is key present? (using LHS of =>)'); + +@temp = %temp; +ok (eq_array (\@temp, \@temp), 'list from copy of comma hash'); + +@temp = each %temp; +ok (eq_array (\@temp, \@temp), 'first each from copy of comma hash'); +@temp = each %temp; +ok (eq_array ([], \@temp), 'last each from copy of comma hash'); + +my @arrow = (Key =>"Value"); + +fieldhash my %arrow; +%arrow = @arrow; +ok (keys %arrow == 1, 'keys on arrow hash'); +ok (values %arrow == 1, 'values on arrow hash'); +# defeat any tokeniser or optimiser cunning +$key = 'ey'; +is ($arrow{"K" . $key}, "Value", 'is key present? (unoptimised)'); +# now with cunning: +is ($arrow{Key}, "Value", 'is key present? (maybe optimised)'); +#tokeniser may treat => differently. +@temp = ('Key', undef); +is ($arrow{$temp[0]}, "Value", 'is key present? (using LHS of =>)'); + +@temp = %arrow; +ok (eq_array (\@arrow, \@temp), 'list from arrow hash'); + +@temp = each %arrow; +ok (eq_array (\@arrow, \@temp), 'first each from arrow hash'); +@temp = each %arrow; +ok (eq_array ([], \@temp), 'last each from arrow hash'); + +%temp = %arrow; + +ok (keys %temp == 1, 'keys on copy of arrow hash'); +ok (values %temp == 1, 'values on copy of arrow hash'); +is ($temp{'K' . $key}, "Value", 'is key present? (unoptimised)'); +# now with cunning: +is ($temp{Key}, "Value", 'is key present? (maybe optimised)'); +@temp = ('Key', undef); +is ($arrow{$temp[0]}, "Value", 'is key present? (using LHS of =>)'); + +@temp = %temp; +ok (eq_array (\@temp, \@temp), 'list from copy of arrow hash'); + +@temp = each %temp; +ok (eq_array (\@temp, \@temp), 'first each from copy of arrow hash'); +@temp = each %temp; +ok (eq_array ([], \@temp), 'last each from copy of arrow hash'); + +fieldhash my %direct; +fieldhash my %slow; +%direct = ('Camel', 2, 'Dromedary', 1); +$slow{Dromedary} = 1; +$slow{Camel} = 2; + +ok (eq_hash (\%slow, \%direct), "direct list assignment to hash"); +%direct = (Camel => 2, 'Dromedary' => 1); +ok (eq_hash (\%slow, \%direct), "direct list assignment to hash using =>"); + +$slow{Llama} = 0; # A llama is not a camel :-) +ok (!eq_hash (\%direct, \%slow), "different hashes should not be equal!"); + +my (%names, %names_copy); +fieldhash %names; +%names = ('$' => 'Scalar', '@' => 'Array', # Grr ' + '%', 'Hash', '&', 'Code'); +%names_copy = %names; +ok (eq_hash (\%names, \%names_copy), "check we can copy our hash"); + +sub in { + my %args = @_; + return eq_hash (\%names, \%args); +} + +ok (in (%names), "pass hash into a method"); + +sub in_method { + my $self = shift; + my %args = @_; + return eq_hash (\%names, \%args); +} + +ok (main->in_method (%names), "pass hash into a method"); + +sub out { + return %names; +} +%names_copy = out (); + +ok (eq_hash (\%names, \%names_copy), "pass hash from a subroutine"); + +sub out_method { + my $self = shift; + return %names; +} +%names_copy = main->out_method (); + +ok (eq_hash (\%names, \%names_copy), "pass hash from a method"); + +sub in_out { + my %args = @_; + return %args; +} +%names_copy = in_out (%names); + +ok (eq_hash (\%names, \%names_copy), "pass hash to and from a subroutine"); + +sub in_out_method { + my $self = shift; + my %args = @_; + return %args; +} +%names_copy = main->in_out_method (%names); + +ok (eq_hash (\%names, \%names_copy), "pass hash to and from a method"); + +my %names_copy2 = %names; +ok (eq_hash (\%names, \%names_copy2), "check copy worked"); + +# This should get ignored. +%names_copy = ('%', 'Associative Array', %names); + +ok (eq_hash (\%names, \%names_copy), "duplicates at the start of a list"); + +# This should not +%names_copy = ('*', 'Typeglob', %names); + +$names_copy2{'*'} = 'Typeglob'; +ok (eq_hash (\%names_copy, \%names_copy2), "duplicates at the end of a list"); + +%names_copy = ('%', 'Associative Array', '*', 'Endangered species', %names, + '*', 'Typeglob',); + +ok (eq_hash (\%names_copy, \%names_copy2), "duplicates at both ends"); + +# And now UTF8 + +foreach my $chr (60, 200, 600, 6000, 60000) { + # This little game may set a UTF8 flag internally. Or it may not. :-) + my ($key, $value) = (chr ($chr) . "\x{ABCD}", "$chr\x{ABCD}"); + chop ($key, $value); + my @utf8c = ($key, $value); + fieldhash my %utf8c; + %utf8c = @utf8c; + + ok (keys %utf8c == 1, 'keys on utf8 comma hash'); + ok (values %utf8c == 1, 'values on utf8 comma hash'); + # defeat any tokeniser or optimiser cunning + is ($utf8c{"" . $key}, $value, 'is key present? (unoptimised)'); + my $tempval = sprintf '$utf8c{"\x{%x}"}', $chr; + is (eval $tempval, $value, "is key present? (maybe $tempval is optimised)"); + $tempval = sprintf '@temp = ("\x{%x}" => undef)', $chr; + eval $tempval or die "'$tempval' gave $@"; + is ($utf8c{$temp[0]}, $value, 'is key present? (using LHS of $tempval)'); + + @temp = %utf8c; + ok (eq_array (\@utf8c, \@temp), 'list from utf8 comma hash'); + + @temp = each %utf8c; + ok (eq_array (\@utf8c, \@temp), 'first each from utf8 comma hash'); + @temp = each %utf8c; + ok (eq_array ([], \@temp), 'last each from utf8 comma hash'); + + %temp = %utf8c; + + ok (keys %temp == 1, 'keys on copy of utf8 comma hash'); + ok (values %temp == 1, 'values on copy of utf8 comma hash'); + is ($temp{"" . $key}, $value, 'is key present? (unoptimised)'); + $tempval = sprintf '$temp{"\x{%x}"}', $chr; + is (eval $tempval, $value, "is key present? (maybe $tempval is optimised)"); + $tempval = sprintf '@temp = ("\x{%x}" => undef)', $chr; + eval $tempval or die "'$tempval' gave $@"; + is ($temp{$temp[0]}, $value, "is key present? (using LHS of $tempval)"); + + @temp = %temp; + ok (eq_array (\@temp, \@temp), 'list from copy of utf8 comma hash'); + + @temp = each %temp; + ok (eq_array (\@temp, \@temp), 'first each from copy of utf8 comma hash'); + @temp = each %temp; + ok (eq_array ([], \@temp), 'last each from copy of utf8 comma hash'); + + my $assign = sprintf '("\x{%x}" => "%d")', $chr, $chr; + print "# $assign\n"; + my (@utf8a) = eval $assign; + + fieldhash my %utf8a; + %utf8a = @utf8a; + ok (keys %utf8a == 1, 'keys on utf8 arrow hash'); + ok (values %utf8a == 1, 'values on utf8 arrow hash'); + # defeat any tokeniser or optimiser cunning + is ($utf8a{$key . ""}, $value, 'is key present? (unoptimised)'); + $tempval = sprintf '$utf8a{"\x{%x}"}', $chr; + is (eval $tempval, $value, "is key present? (maybe $tempval is optimised)"); + $tempval = sprintf '@temp = ("\x{%x}" => undef)', $chr; + eval $tempval or die "'$tempval' gave $@"; + is ($utf8a{$temp[0]}, $value, "is key present? (using LHS of $tempval)"); + + @temp = %utf8a; + ok (eq_array (\@utf8a, \@temp), 'list from utf8 arrow hash'); + + @temp = each %utf8a; + ok (eq_array (\@utf8a, \@temp), 'first each from utf8 arrow hash'); + @temp = each %utf8a; + ok (eq_array ([], \@temp), 'last each from utf8 arrow hash'); + + %temp = %utf8a; + + ok (keys %temp == 1, 'keys on copy of utf8 arrow hash'); + ok (values %temp == 1, 'values on copy of utf8 arrow hash'); + is ($temp{'' . $key}, $value, 'is key present? (unoptimised)'); + $tempval = sprintf '$temp{"\x{%x}"}', $chr; + is (eval $tempval, $value, "is key present? (maybe $tempval is optimised)"); + $tempval = sprintf '@temp = ("\x{%x}" => undef)', $chr; + eval $tempval or die "'$tempval' gave $@"; + is ($temp{$temp[0]}, $value, "is key present? (using LHS of $tempval)"); + + @temp = %temp; + ok (eq_array (\@temp, \@temp), 'list from copy of utf8 arrow hash'); + + @temp = each %temp; + ok (eq_array (\@temp, \@temp), 'first each from copy of utf8 arrow hash'); + @temp = each %temp; + ok (eq_array ([], \@temp), 'last each from copy of utf8 arrow hash'); + +} + +# now some tests for hash assignment in scalar and list context with +# duplicate keys [perl #24380] +{ + my %h; my $x; my $ar; + fieldhash %h; + is( (join ':', %h = (1) x 8), '1:1', + 'hash assignment in list context removes duplicates' ); + is( scalar( %h = (1,2,1,3,1,4,1,5) ), 2, + 'hash assignment in scalar context' ); + is( scalar( ($x,%h) = (0,1,2,1,3,1,4,1,5) ), 3, + 'scalar + hash assignment in scalar context' ); + $ar = [ %h = (1,2,1,3,1,4,1,5) ]; + is( $#$ar, 1, 'hash assignment in list context' ); + is( "@$ar", "1 5", '...gets the last values' ); + $ar = [ ($x,%h) = (0,1,2,1,3,1,4,1,5) ]; + is( $#$ar, 2, 'scalar + hash assignment in list context' ); + is( "@$ar", "0 1 5", '...gets the last values' ); +} + +# test stringification of keys +{ + no warnings 'once', 'misc'; + my @types = qw( SCALAR ARRAY HASH CODE GLOB); + my @refs = ( \ do { my $x }, [], {}, sub {}, \ *x); + my(%h, %expect); + fieldhash %h; + @h{@refs} = @types; + @expect{map "$_", @refs} = @types; + ok (!eq_hash(\%h, \%expect), 'unblessed ref stringification different'); + + bless $_ for @refs; + %h = (); %expect = (); + @h{@refs} = @types; + @expect{map "$_", @refs} = @types; + ok (!eq_hash(\%h, \%expect), 'blessed ref stringification different'); +} diff --git a/ext/Hash-Util-FieldHash/t/12_hashwarn.t b/ext/Hash-Util-FieldHash/t/12_hashwarn.t new file mode 100644 index 0000000000..9d4474df3c --- /dev/null +++ b/ext/Hash-Util-FieldHash/t/12_hashwarn.t @@ -0,0 +1,62 @@ +#!./perl + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir 't' if -d 't'; + @INC = '../lib'; + } +} + +use Test::More; + +plan( tests => 12 ); + +use strict; +use warnings; +use Hash::Util::FieldHash qw( :all); + +use vars qw{ @warnings }; + +BEGIN { + $SIG{'__WARN__'} = sub { push @warnings, @_ }; + $| = 1; +} + +my $fail_odd = 'Odd number of elements in hash assignment at '; +my $fail_odd_anon = 'Odd number of elements in anonymous hash at '; +my $fail_ref = 'Reference found where even-sized list expected at '; +my $fail_not_hr = 'Not a HASH reference at '; + +{ + @warnings = (); + fieldhash my %hash; + %hash = (1..3); + cmp_ok(scalar(@warnings),'==',1,'odd count'); + cmp_ok(substr($warnings[0],0,length($fail_odd)),'eq',$fail_odd,'odd msg'); + + @warnings = (); + %hash = 1; + cmp_ok(scalar(@warnings),'==',1,'scalar count'); + cmp_ok(substr($warnings[0],0,length($fail_odd)),'eq',$fail_odd,'scalar msg'); + + @warnings = (); + %hash = { 1..3 }; + cmp_ok(scalar(@warnings),'==',2,'odd hashref count'); + cmp_ok(substr($warnings[0],0,length($fail_odd_anon)),'eq',$fail_odd_anon,'odd hashref msg 1'); + cmp_ok(substr($warnings[1],0,length($fail_ref)),'eq',$fail_ref,'odd hashref msg 2'); + + @warnings = (); + %hash = [ 1..3 ]; + cmp_ok(scalar(@warnings),'==',1,'arrayref count'); + cmp_ok(substr($warnings[0],0,length($fail_ref)),'eq',$fail_ref,'arrayref msg'); + + @warnings = (); + %hash = sub { print "fenice" }; + cmp_ok(scalar(@warnings),'==',1,'coderef count'); + cmp_ok(substr($warnings[0],0,length($fail_odd)),'eq',$fail_odd,'coderef msg'); + + @warnings = (); + $_ = { 1..10 }; + cmp_ok(scalar(@warnings),'==',0,'hashref assign'); + +} |