diff options
author | unknown <ram@mysql.r18.ru> | 2002-10-30 15:57:05 +0400 |
---|---|---|
committer | unknown <ram@mysql.r18.ru> | 2002-10-30 15:57:05 +0400 |
commit | 155e78f014de1a2e259ae5119f4621fbb210a784 (patch) | |
tree | 6881a3cca88bea0bb9eeffd5aae34be437152786 /bdb/hash | |
parent | b8798d25ab71436bf690ee8ae48285a655c5487e (diff) | |
download | mariadb-git-155e78f014de1a2e259ae5119f4621fbb210a784.tar.gz |
BDB 4.1.24
BitKeeper/deleted/.del-ex_access.wpj~3df6ae8c99bf7c5f:
Delete: bdb/build_vxworks/ex_access/ex_access.wpj
BitKeeper/deleted/.del-ex_btrec.wpj~a7622f1c6f432dc6:
Delete: bdb/build_vxworks/ex_btrec/ex_btrec.wpj
BitKeeper/deleted/.del-ex_dbclient.wpj~7345440f3b204cdd:
Delete: bdb/build_vxworks/ex_dbclient/ex_dbclient.wpj
BitKeeper/deleted/.del-ex_env.wpj~fbe1ab10b04e8b74:
Delete: bdb/build_vxworks/ex_env/ex_env.wpj
BitKeeper/deleted/.del-ex_mpool.wpj~4479cfd5c45f327d:
Delete: bdb/build_vxworks/ex_mpool/ex_mpool.wpj
BitKeeper/deleted/.del-ex_tpcb.wpj~f78093006e14bf41:
Delete: bdb/build_vxworks/ex_tpcb/ex_tpcb.wpj
BitKeeper/deleted/.del-db_buildall.dsp~bd749ff6da11682:
Delete: bdb/build_win32/db_buildall.dsp
BitKeeper/deleted/.del-cxx_app.cpp~ad8df8e0791011ed:
Delete: bdb/cxx/cxx_app.cpp
BitKeeper/deleted/.del-cxx_log.cpp~a50ff3118fe06952:
Delete: bdb/cxx/cxx_log.cpp
BitKeeper/deleted/.del-cxx_table.cpp~ecd751e79b055556:
Delete: bdb/cxx/cxx_table.cpp
BitKeeper/deleted/.del-namemap.txt~796a3acd3885d8fd:
Delete: bdb/cxx/namemap.txt
BitKeeper/deleted/.del-Design.fileop~3ca4da68f1727373:
Delete: bdb/db/Design.fileop
BitKeeper/deleted/.del-db185_int.h~61bee3736e7959ef:
Delete: bdb/db185/db185_int.h
BitKeeper/deleted/.del-acconfig.h~411e8854d67ad8b5:
Delete: bdb/dist/acconfig.h
BitKeeper/deleted/.del-mutex.m4~a13383cde18a64e1:
Delete: bdb/dist/aclocal/mutex.m4
BitKeeper/deleted/.del-options.m4~b9d0ca637213750a:
Delete: bdb/dist/aclocal/options.m4
BitKeeper/deleted/.del-programs.m4~3ce7890b47732b30:
Delete: bdb/dist/aclocal/programs.m4
BitKeeper/deleted/.del-tcl.m4~f944e2db93c3b6db:
Delete: bdb/dist/aclocal/tcl.m4
BitKeeper/deleted/.del-types.m4~59cae158c9a32cff:
Delete: bdb/dist/aclocal/types.m4
BitKeeper/deleted/.del-script~d38f6d3a4f159cb4:
Delete: bdb/dist/build/script
BitKeeper/deleted/.del-configure.in~ac795a92c8fe049c:
Delete: bdb/dist/configure.in
BitKeeper/deleted/.del-ltconfig~66bbd007d8024af:
Delete: bdb/dist/ltconfig
BitKeeper/deleted/.del-rec_ctemp~a28554362534f00a:
Delete: bdb/dist/rec_ctemp
BitKeeper/deleted/.del-s_tcl~2ffe4326459fcd9f:
Delete: bdb/dist/s_tcl
BitKeeper/deleted/.del-.IGNORE_ME~d8148b08fa7d5d15:
Delete: bdb/dist/template/.IGNORE_ME
BitKeeper/deleted/.del-btree.h~179f2aefec1753d:
Delete: bdb/include/btree.h
BitKeeper/deleted/.del-cxx_int.h~6b649c04766508f8:
Delete: bdb/include/cxx_int.h
BitKeeper/deleted/.del-db.src~6b433ae615b16a8d:
Delete: bdb/include/db.src
BitKeeper/deleted/.del-db_185.h~ad8b373d9391d35c:
Delete: bdb/include/db_185.h
BitKeeper/deleted/.del-db_am.h~a714912b6b75932f:
Delete: bdb/include/db_am.h
BitKeeper/deleted/.del-db_cxx.h~fcafadf45f5d19e9:
Delete: bdb/include/db_cxx.h
BitKeeper/deleted/.del-db_dispatch.h~6844f20f7eb46904:
Delete: bdb/include/db_dispatch.h
BitKeeper/deleted/.del-db_int.src~419a3f48b6a01da7:
Delete: bdb/include/db_int.src
BitKeeper/deleted/.del-db_join.h~76f9747a42c3399a:
Delete: bdb/include/db_join.h
BitKeeper/deleted/.del-db_page.h~e302ca3a4db3abdc:
Delete: bdb/include/db_page.h
BitKeeper/deleted/.del-db_server_int.h~e1d20b6ba3bca1ab:
Delete: bdb/include/db_server_int.h
BitKeeper/deleted/.del-db_shash.h~5fbf2d696fac90f3:
Delete: bdb/include/db_shash.h
BitKeeper/deleted/.del-db_swap.h~1e60887550864a59:
Delete: bdb/include/db_swap.h
BitKeeper/deleted/.del-db_upgrade.h~c644eee73701fc8d:
Delete: bdb/include/db_upgrade.h
BitKeeper/deleted/.del-db_verify.h~b8d6c297c61f342e:
Delete: bdb/include/db_verify.h
BitKeeper/deleted/.del-debug.h~dc2b4f2cf27ccebc:
Delete: bdb/include/debug.h
BitKeeper/deleted/.del-hash.h~2aaa548b28882dfb:
Delete: bdb/include/hash.h
BitKeeper/deleted/.del-lock.h~a761c1b7de57b77f:
Delete: bdb/include/lock.h
BitKeeper/deleted/.del-log.h~ff20184238e35e4d:
Delete: bdb/include/log.h
BitKeeper/deleted/.del-mp.h~7e317597622f3411:
Delete: bdb/include/mp.h
BitKeeper/deleted/.del-mutex.h~d3ae7a2977a68137:
Delete: bdb/include/mutex.h
BitKeeper/deleted/.del-os.h~91867cc8757cd0e3:
Delete: bdb/include/os.h
BitKeeper/deleted/.del-os_jump.h~e1b939fa5151d4be:
Delete: bdb/include/os_jump.h
BitKeeper/deleted/.del-qam.h~6fad0c1b5723d597:
Delete: bdb/include/qam.h
BitKeeper/deleted/.del-queue.h~4c72c0826c123d5:
Delete: bdb/include/queue.h
BitKeeper/deleted/.del-region.h~513fe04d977ca0fc:
Delete: bdb/include/region.h
BitKeeper/deleted/.del-shqueue.h~525fc3e6c2025c36:
Delete: bdb/include/shqueue.h
BitKeeper/deleted/.del-tcl_db.h~c536fd61a844f23f:
Delete: bdb/include/tcl_db.h
BitKeeper/deleted/.del-txn.h~c8d94b221ec147e4:
Delete: bdb/include/txn.h
BitKeeper/deleted/.del-xa.h~ecc466493aae9d9a:
Delete: bdb/include/xa.h
BitKeeper/deleted/.del-DbRecoveryInit.java~756b52601a0b9023:
Delete: bdb/java/src/com/sleepycat/db/DbRecoveryInit.java
BitKeeper/deleted/.del-DbTxnRecover.java~74607cba7ab89d6d:
Delete: bdb/java/src/com/sleepycat/db/DbTxnRecover.java
BitKeeper/deleted/.del-lock_conflict.c~fc5e0f14cf597a2b:
Delete: bdb/lock/lock_conflict.c
BitKeeper/deleted/.del-log.src~53ac9e7b5cb023f2:
Delete: bdb/log/log.src
BitKeeper/deleted/.del-log_findckp.c~24287f008916e81f:
Delete: bdb/log/log_findckp.c
BitKeeper/deleted/.del-log_rec.c~d51711f2cac09297:
Delete: bdb/log/log_rec.c
BitKeeper/deleted/.del-log_register.c~b40bb4efac75ca15:
Delete: bdb/log/log_register.c
BitKeeper/deleted/.del-Design~b3d0f179f2767b:
Delete: bdb/mp/Design
BitKeeper/deleted/.del-os_finit.c~95dbefc6fe79b26c:
Delete: bdb/os/os_finit.c
BitKeeper/deleted/.del-os_abs.c~df95d1e7db81924:
Delete: bdb/os_vxworks/os_abs.c
BitKeeper/deleted/.del-os_finit.c~803b484bdb9d0122:
Delete: bdb/os_vxworks/os_finit.c
BitKeeper/deleted/.del-os_map.c~3a6d7926398b76d3:
Delete: bdb/os_vxworks/os_map.c
BitKeeper/deleted/.del-os_finit.c~19a227c6d3c78ad:
Delete: bdb/os_win32/os_finit.c
BitKeeper/deleted/.del-log-corruption.patch~1cf2ecc7c6408d5d:
Delete: bdb/patches/log-corruption.patch
BitKeeper/deleted/.del-Btree.pm~af6d0c5eaed4a98e:
Delete: bdb/perl.BerkeleyDB/BerkeleyDB/Btree.pm
BitKeeper/deleted/.del-BerkeleyDB.pm~7244036d4482643:
Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pm
BitKeeper/deleted/.del-BerkeleyDB.pod~e7b18fd6132448e3:
Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pod
BitKeeper/deleted/.del-Hash.pm~10292a26c06a5c95:
Delete: bdb/perl.BerkeleyDB/BerkeleyDB/Hash.pm
BitKeeper/deleted/.del-BerkeleyDB.pod.P~79f76a1495eda203:
Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pod.P
BitKeeper/deleted/.del-BerkeleyDB.xs~80c99afbd98e392c:
Delete: bdb/perl.BerkeleyDB/BerkeleyDB.xs
BitKeeper/deleted/.del-Changes~729c1891efa60de9:
Delete: bdb/perl.BerkeleyDB/Changes
BitKeeper/deleted/.del-MANIFEST~63a1e34aecf157a0:
Delete: bdb/perl.BerkeleyDB/MANIFEST
BitKeeper/deleted/.del-Makefile.PL~c68797707d8df87a:
Delete: bdb/perl.BerkeleyDB/Makefile.PL
BitKeeper/deleted/.del-README~5f2f579b1a241407:
Delete: bdb/perl.BerkeleyDB/README
BitKeeper/deleted/.del-Todo~dca3c66c193adda9:
Delete: bdb/perl.BerkeleyDB/Todo
BitKeeper/deleted/.del-config.in~ae81681e450e0999:
Delete: bdb/perl.BerkeleyDB/config.in
BitKeeper/deleted/.del-dbinfo~28ad67d83be4f68e:
Delete: bdb/perl.BerkeleyDB/dbinfo
BitKeeper/deleted/.del-mkconsts~543ab60669c7a04e:
Delete: bdb/perl.BerkeleyDB/mkconsts
BitKeeper/deleted/.del-mkpod~182c0ca54e439afb:
Delete: bdb/perl.BerkeleyDB/mkpod
BitKeeper/deleted/.del-5.004~e008cb5a48805543:
Delete: bdb/perl.BerkeleyDB/patches/5.004
BitKeeper/deleted/.del-irix_6_5.pl~61662bb08afcdec8:
Delete: bdb/perl.BerkeleyDB/hints/irix_6_5.pl
BitKeeper/deleted/.del-solaris.pl~6771e7182394e152:
Delete: bdb/perl.BerkeleyDB/hints/solaris.pl
BitKeeper/deleted/.del-typemap~783b8f5295b05f3d:
Delete: bdb/perl.BerkeleyDB/typemap
BitKeeper/deleted/.del-5.004_01~6081ce2fff7b0bc:
Delete: bdb/perl.BerkeleyDB/patches/5.004_01
BitKeeper/deleted/.del-5.004_02~87214eac35ad9e6:
Delete: bdb/perl.BerkeleyDB/patches/5.004_02
BitKeeper/deleted/.del-5.004_03~9a672becec7cb40f:
Delete: bdb/perl.BerkeleyDB/patches/5.004_03
BitKeeper/deleted/.del-5.004_04~e326cb51af09d154:
Delete: bdb/perl.BerkeleyDB/patches/5.004_04
BitKeeper/deleted/.del-5.004_05~7ab457a1e41a92fe:
Delete: bdb/perl.BerkeleyDB/patches/5.004_05
BitKeeper/deleted/.del-5.005~f9e2d59b5964cd4b:
Delete: bdb/perl.BerkeleyDB/patches/5.005
BitKeeper/deleted/.del-5.005_01~3eb9fb7b5842ea8e:
Delete: bdb/perl.BerkeleyDB/patches/5.005_01
BitKeeper/deleted/.del-5.005_02~67477ce0bef717cb:
Delete: bdb/perl.BerkeleyDB/patches/5.005_02
BitKeeper/deleted/.del-5.005_03~c4c29a1fb21e290a:
Delete: bdb/perl.BerkeleyDB/patches/5.005_03
BitKeeper/deleted/.del-5.6.0~e1fb9897d124ee22:
Delete: bdb/perl.BerkeleyDB/patches/5.6.0
BitKeeper/deleted/.del-btree.t~e4a1a3c675ddc406:
Delete: bdb/perl.BerkeleyDB/t/btree.t
BitKeeper/deleted/.del-db-3.0.t~d2c60991d84558f2:
Delete: bdb/perl.BerkeleyDB/t/db-3.0.t
BitKeeper/deleted/.del-db-3.1.t~6ee88cd13f55e018:
Delete: bdb/perl.BerkeleyDB/t/db-3.1.t
BitKeeper/deleted/.del-db-3.2.t~f73b6461f98fd1cf:
Delete: bdb/perl.BerkeleyDB/t/db-3.2.t
BitKeeper/deleted/.del-destroy.t~cc6a2ae1980a2ecd:
Delete: bdb/perl.BerkeleyDB/t/destroy.t
BitKeeper/deleted/.del-env.t~a8604a4499c4bd07:
Delete: bdb/perl.BerkeleyDB/t/env.t
BitKeeper/deleted/.del-examples.t~2571b77c3cc75574:
Delete: bdb/perl.BerkeleyDB/t/examples.t
BitKeeper/deleted/.del-examples.t.T~8228bdd75ac78b88:
Delete: bdb/perl.BerkeleyDB/t/examples.t.T
BitKeeper/deleted/.del-examples3.t.T~66a186897a87026d:
Delete: bdb/perl.BerkeleyDB/t/examples3.t.T
BitKeeper/deleted/.del-examples3.t~fe3822ba2f2d7f83:
Delete: bdb/perl.BerkeleyDB/t/examples3.t
BitKeeper/deleted/.del-filter.t~f87b045c1b708637:
Delete: bdb/perl.BerkeleyDB/t/filter.t
BitKeeper/deleted/.del-hash.t~616bfb4d644de3a3:
Delete: bdb/perl.BerkeleyDB/t/hash.t
BitKeeper/deleted/.del-join.t~29fc39f74a83ca22:
Delete: bdb/perl.BerkeleyDB/t/join.t
BitKeeper/deleted/.del-mldbm.t~31f5015341eea040:
Delete: bdb/perl.BerkeleyDB/t/mldbm.t
BitKeeper/deleted/.del-queue.t~8f338034ce44a641:
Delete: bdb/perl.BerkeleyDB/t/queue.t
BitKeeper/deleted/.del-recno.t~d4ddbd3743add63e:
Delete: bdb/perl.BerkeleyDB/t/recno.t
BitKeeper/deleted/.del-strict.t~6885cdd2ea71ca2d:
Delete: bdb/perl.BerkeleyDB/t/strict.t
BitKeeper/deleted/.del-subdb.t~aab62a5d5864c603:
Delete: bdb/perl.BerkeleyDB/t/subdb.t
BitKeeper/deleted/.del-txn.t~65033b8558ae1216:
Delete: bdb/perl.BerkeleyDB/t/txn.t
BitKeeper/deleted/.del-unknown.t~f3710458682665e1:
Delete: bdb/perl.BerkeleyDB/t/unknown.t
BitKeeper/deleted/.del-Changes~436f74a5c414c65b:
Delete: bdb/perl.DB_File/Changes
BitKeeper/deleted/.del-DB_File.pm~ae0951c6c7665a82:
Delete: bdb/perl.DB_File/DB_File.pm
BitKeeper/deleted/.del-DB_File.xs~89e49a0b5556f1d8:
Delete: bdb/perl.DB_File/DB_File.xs
BitKeeper/deleted/.del-DB_File_BS~290fad5dbbb87069:
Delete: bdb/perl.DB_File/DB_File_BS
BitKeeper/deleted/.del-MANIFEST~90ee581572bdd4ac:
Delete: bdb/perl.DB_File/MANIFEST
BitKeeper/deleted/.del-Makefile.PL~ac0567bb5a377e38:
Delete: bdb/perl.DB_File/Makefile.PL
BitKeeper/deleted/.del-README~77e924a5a9bae6b3:
Delete: bdb/perl.DB_File/README
BitKeeper/deleted/.del-config.in~ab4c2792b86a810b:
Delete: bdb/perl.DB_File/config.in
BitKeeper/deleted/.del-dbinfo~461c43b30fab2cb:
Delete: bdb/perl.DB_File/dbinfo
BitKeeper/deleted/.del-dynixptx.pl~50dcddfae25d17e9:
Delete: bdb/perl.DB_File/hints/dynixptx.pl
BitKeeper/deleted/.del-typemap~55cffb3288a9e587:
Delete: bdb/perl.DB_File/typemap
BitKeeper/deleted/.del-version.c~a4df0e646f8b3975:
Delete: bdb/perl.DB_File/version.c
BitKeeper/deleted/.del-5.004_01~d6830d0082702af7:
Delete: bdb/perl.DB_File/patches/5.004_01
BitKeeper/deleted/.del-5.004_02~78b082dc80c91031:
Delete: bdb/perl.DB_File/patches/5.004_02
BitKeeper/deleted/.del-5.004~4411ec2e3c9e008b:
Delete: bdb/perl.DB_File/patches/5.004
BitKeeper/deleted/.del-sco.pl~1e795fe14fe4dcfe:
Delete: bdb/perl.DB_File/hints/sco.pl
BitKeeper/deleted/.del-5.004_03~33f274648b160d95:
Delete: bdb/perl.DB_File/patches/5.004_03
BitKeeper/deleted/.del-5.004_04~8f3d1b3cf18bb20a:
Delete: bdb/perl.DB_File/patches/5.004_04
BitKeeper/deleted/.del-5.004_05~9c0f02e7331e142:
Delete: bdb/perl.DB_File/patches/5.004_05
BitKeeper/deleted/.del-5.005~c2108cb2e3c8d951:
Delete: bdb/perl.DB_File/patches/5.005
BitKeeper/deleted/.del-5.005_01~3b45e9673afc4cfa:
Delete: bdb/perl.DB_File/patches/5.005_01
BitKeeper/deleted/.del-5.005_02~9fe5766bb02a4522:
Delete: bdb/perl.DB_File/patches/5.005_02
BitKeeper/deleted/.del-5.005_03~ffa1c38c19ae72ea:
Delete: bdb/perl.DB_File/patches/5.005_03
BitKeeper/deleted/.del-5.6.0~373be3a5ce47be85:
Delete: bdb/perl.DB_File/patches/5.6.0
BitKeeper/deleted/.del-db-btree.t~3231595a1c241eb3:
Delete: bdb/perl.DB_File/t/db-btree.t
BitKeeper/deleted/.del-db-hash.t~7c4ad0c795c7fad2:
Delete: bdb/perl.DB_File/t/db-hash.t
BitKeeper/deleted/.del-db-recno.t~6c2d3d80b9ba4a50:
Delete: bdb/perl.DB_File/t/db-recno.t
BitKeeper/deleted/.del-db_server.sed~cdb00ebcd48a64e2:
Delete: bdb/rpc_server/db_server.sed
BitKeeper/deleted/.del-db_server_proc.c~d46c8f409c3747f4:
Delete: bdb/rpc_server/db_server_proc.c
BitKeeper/deleted/.del-db_server_svc.sed~3f5e59f334fa4607:
Delete: bdb/rpc_server/db_server_svc.sed
BitKeeper/deleted/.del-db_server_util.c~a809f3a4629acda:
Delete: bdb/rpc_server/db_server_util.c
BitKeeper/deleted/.del-log.tcl~ff1b41f1355b97d7:
Delete: bdb/test/log.tcl
BitKeeper/deleted/.del-mpool.tcl~b0df4dc1b04db26c:
Delete: bdb/test/mpool.tcl
BitKeeper/deleted/.del-mutex.tcl~52fd5c73a150565:
Delete: bdb/test/mutex.tcl
BitKeeper/deleted/.del-txn.tcl~c4ff071550b5446e:
Delete: bdb/test/txn.tcl
BitKeeper/deleted/.del-README~e800a12a5392010a:
Delete: bdb/test/upgrade/README
BitKeeper/deleted/.del-pack-2.6.6.pl~89d5076d758d3e98:
Delete: bdb/test/upgrade/generate-2.X/pack-2.6.6.pl
BitKeeper/deleted/.del-test-2.6.patch~4a52dc83d447547b:
Delete: bdb/test/upgrade/generate-2.X/test-2.6.patch
Diffstat (limited to 'bdb/hash')
-rw-r--r-- | bdb/hash/hash.c | 1386 | ||||
-rw-r--r-- | bdb/hash/hash.src | 219 | ||||
-rw-r--r-- | bdb/hash/hash_conv.c | 32 | ||||
-rw-r--r-- | bdb/hash/hash_dup.c | 396 | ||||
-rw-r--r-- | bdb/hash/hash_func.c | 11 | ||||
-rw-r--r-- | bdb/hash/hash_meta.c | 56 | ||||
-rw-r--r-- | bdb/hash/hash_method.c | 12 | ||||
-rw-r--r-- | bdb/hash/hash_open.c | 558 | ||||
-rw-r--r-- | bdb/hash/hash_page.c | 799 | ||||
-rw-r--r-- | bdb/hash/hash_rec.c | 500 | ||||
-rw-r--r-- | bdb/hash/hash_reclaim.c | 59 | ||||
-rw-r--r-- | bdb/hash/hash_stat.c | 137 | ||||
-rw-r--r-- | bdb/hash/hash_upgrade.c | 23 | ||||
-rw-r--r-- | bdb/hash/hash_verify.c | 238 |
14 files changed, 2673 insertions, 1753 deletions
diff --git a/bdb/hash/hash.c b/bdb/hash/hash.c index e96fd4898f0..2f972a3238d 100644 --- a/bdb/hash/hash.c +++ b/bdb/hash/hash.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ /* @@ -43,7 +43,7 @@ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: hash.c,v 11.94 2001/01/03 16:42:26 ubell Exp $"; +static const char revid[] = "$Id: hash.c,v 11.166 2002/08/06 06:11:25 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -54,446 +54,70 @@ static const char revid[] = "$Id: hash.c,v 11.94 2001/01/03 16:42:26 ubell Exp $ #endif #include "db_int.h" -#include "db_page.h" -#include "db_am.h" -#include "db_ext.h" -#include "db_shash.h" -#include "db_swap.h" -#include "hash.h" -#include "btree.h" -#include "log.h" -#include "lock.h" -#include "txn.h" +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/lock.h" +static int __ham_bulk __P((DBC *, DBT *, u_int32_t)); static int __ham_c_close __P((DBC *, db_pgno_t, int *)); static int __ham_c_del __P((DBC *)); static int __ham_c_destroy __P((DBC *)); static int __ham_c_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); static int __ham_c_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); static int __ham_c_writelock __P((DBC *)); -static int __ham_del_dups __P((DBC *, DBT *)); -static int __ham_delete __P((DB *, DB_TXN *, DBT *, u_int32_t)); static int __ham_dup_return __P((DBC *, DBT *, u_int32_t)); static int __ham_expand_table __P((DBC *)); -static int __ham_init_htab __P((DBC *, - const char *, db_pgno_t, u_int32_t, u_int32_t)); static int __ham_lookup __P((DBC *, const DBT *, u_int32_t, db_lockmode_t, db_pgno_t *)); static int __ham_overwrite __P((DBC *, DBT *, u_int32_t)); /* - * __ham_metachk -- + * __ham_quick_delete -- + * When performing a DB->del operation that does not involve secondary + * indices and is not removing an off-page duplicate tree, we can + * speed things up substantially by removing the entire duplicate + * set, if any is present, in one operation, rather than by conjuring + * up and deleting each of the items individually. (All are stored + * in one big HKEYDATA structure.) We don't bother to distinguish + * on-page duplicate sets from single, non-dup items; they're deleted + * in exactly the same way. * - * PUBLIC: int __ham_metachk __P((DB *, const char *, HMETA *)); - */ -int -__ham_metachk(dbp, name, hashm) - DB *dbp; - const char *name; - HMETA *hashm; -{ - DB_ENV *dbenv; - u_int32_t vers; - int ret; - - dbenv = dbp->dbenv; - - /* - * At this point, all we know is that the magic number is for a Hash. - * Check the version, the database may be out of date. - */ - vers = hashm->dbmeta.version; - if (F_ISSET(dbp, DB_AM_SWAP)) - M_32_SWAP(vers); - switch (vers) { - case 4: - case 5: - case 6: - __db_err(dbenv, - "%s: hash version %lu requires a version upgrade", - name, (u_long)vers); - return (DB_OLD_VERSION); - case 7: - break; - default: - __db_err(dbenv, - "%s: unsupported hash version: %lu", name, (u_long)vers); - return (EINVAL); - } - - /* Swap the page if we need to. */ - if (F_ISSET(dbp, DB_AM_SWAP) && (ret = __ham_mswap((PAGE *)hashm)) != 0) - return (ret); - - /* Check the type. */ - if (dbp->type != DB_HASH && dbp->type != DB_UNKNOWN) - return (EINVAL); - dbp->type = DB_HASH; - DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); - - /* - * Check application info against metadata info, and set info, flags, - * and type based on metadata info. - */ - if ((ret = __db_fchk(dbenv, - "DB->open", hashm->dbmeta.flags, - DB_HASH_DUP | DB_HASH_SUBDB | DB_HASH_DUPSORT)) != 0) - return (ret); - - if (F_ISSET(&hashm->dbmeta, DB_HASH_DUP)) - F_SET(dbp, DB_AM_DUP); - else - if (F_ISSET(dbp, DB_AM_DUP)) { - __db_err(dbenv, - "%s: DB_DUP specified to open method but not set in database", - name); - return (EINVAL); - } - - if (F_ISSET(&hashm->dbmeta, DB_HASH_SUBDB)) - F_SET(dbp, DB_AM_SUBDB); - else - if (F_ISSET(dbp, DB_AM_SUBDB)) { - __db_err(dbenv, - "%s: multiple databases specified but not supported in file", - name); - return (EINVAL); - } - - if (F_ISSET(&hashm->dbmeta, DB_HASH_DUPSORT)) { - if (dbp->dup_compare == NULL) - dbp->dup_compare = __bam_defcmp; - } else - if (dbp->dup_compare != NULL) { - __db_err(dbenv, - "%s: duplicate sort function specified but not set in database", - name); - return (EINVAL); - } - - /* Set the page size. */ - dbp->pgsize = hashm->dbmeta.pagesize; - - /* Copy the file's ID. */ - memcpy(dbp->fileid, hashm->dbmeta.uid, DB_FILE_ID_LEN); - - return (0); -} - -/* - * __ham_open -- + * This function is called by __db_delete when the appropriate + * conditions are met, and it performs the delete in the optimized way. * - * PUBLIC: int __ham_open __P((DB *, const char *, db_pgno_t, u_int32_t)); + * The cursor should be set to the first item in the duplicate + * set, or to the sole key/data pair when the key does not have a + * duplicate set, before the function is called. + * + * PUBLIC: int __ham_quick_delete __P((DBC *)); */ int -__ham_open(dbp, name, base_pgno, flags) - DB *dbp; - const char *name; - db_pgno_t base_pgno; - u_int32_t flags; -{ - DB_ENV *dbenv; - DBC *dbc; - HASH_CURSOR *hcp; - HASH *hashp; - int need_sync, ret, t_ret; - - dbc = NULL; - dbenv = dbp->dbenv; - need_sync = 0; - - /* Initialize the remaining fields/methods of the DB. */ - dbp->del = __ham_delete; - dbp->stat = __ham_stat; - - /* - * Get a cursor. If DB_CREATE is specified, we may be creating - * pages, and to do that safely in CDB we need a write cursor. - * In STD_LOCKING mode, we'll synchronize using the meta page - * lock instead. - */ - if ((ret = dbp->cursor(dbp, - dbp->open_txn, &dbc, LF_ISSET(DB_CREATE) && CDB_LOCKING(dbenv) ? - DB_WRITECURSOR : 0)) != 0) - return (ret); - - hcp = (HASH_CURSOR *)dbc->internal; - hashp = dbp->h_internal; - hashp->meta_pgno = base_pgno; - if ((ret = __ham_get_meta(dbc)) != 0) - goto err1; - - /* - * If this is a new file, initialize it, and put it back dirty. - * - * Initialize the hdr structure. - */ - if (hcp->hdr->dbmeta.magic == DB_HASHMAGIC) { - /* File exists, verify the data in the header. */ - if (hashp->h_hash == NULL) - hashp->h_hash = hcp->hdr->dbmeta.version < 5 - ? __ham_func4 : __ham_func5; - if (!F_ISSET(dbp, DB_RDONLY) && - hashp->h_hash(dbp, - CHARKEY, sizeof(CHARKEY)) != hcp->hdr->h_charkey) { - __db_err(dbp->dbenv, - "hash: incompatible hash function"); - ret = EINVAL; - goto err2; - } - if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUP)) - F_SET(dbp, DB_AM_DUP); - if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUPSORT)) - F_SET(dbp, DB_AM_DUPSORT); - if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_SUBDB)) - F_SET(dbp, DB_AM_SUBDB); - } else if (!IS_RECOVERING(dbenv)) { - /* - * File does not exist, we must initialize the header. If - * locking is enabled that means getting a write lock first. - * During recovery the meta page will be in the log. - */ - dbc->lock.pgno = base_pgno; - - if (STD_LOCKING(dbc) && - ((ret = lock_put(dbenv, &hcp->hlock)) != 0 || - (ret = lock_get(dbenv, dbc->locker, - DB_NONBLOCK(dbc) ? DB_LOCK_NOWAIT : 0, - &dbc->lock_dbt, DB_LOCK_WRITE, &hcp->hlock)) != 0)) - goto err2; - else if (CDB_LOCKING(dbp->dbenv)) { - DB_ASSERT(LF_ISSET(DB_CREATE)); - if ((ret = lock_get(dbenv, dbc->locker, - DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE, - &dbc->mylock)) != 0) - goto err2; - } - if ((ret = __ham_init_htab(dbc, name, - base_pgno, hashp->h_nelem, hashp->h_ffactor)) != 0) - goto err2; - - need_sync = 1; - } - -err2: /* Release the meta data page */ - if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) - ret = t_ret; -err1: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - /* Sync the file so that we know that the meta data goes to disk. */ - if (ret == 0 && need_sync) - ret = dbp->sync(dbp, 0); -#if CONFIG_TEST - if (ret == 0) - DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name); - -DB_TEST_RECOVERY_LABEL -#endif - if (ret != 0) - (void)__ham_db_close(dbp); - - return (ret); -} - -/************************** LOCAL CREATION ROUTINES **********************/ -/* - * Returns 0 on No Error - */ -static int -__ham_init_htab(dbc, name, pgno, nelem, ffactor) +__ham_quick_delete(dbc) DBC *dbc; - const char *name; - db_pgno_t pgno; - u_int32_t nelem, ffactor; { - DB *dbp; - DB_LOCK metalock; - DB_LSN orig_lsn; - DBMETA *mmeta; - HASH_CURSOR *hcp; - HASH *hashp; - PAGE *h; - db_pgno_t mpgno; - int32_t l2, nbuckets; - int dirty_mmeta, i, ret, t_ret; - - hcp = (HASH_CURSOR *)dbc->internal; - dbp = dbc->dbp; - hashp = dbp->h_internal; - mmeta = NULL; - h = NULL; - ret = 0; - dirty_mmeta = 0; - metalock.off = LOCK_INVALID; - - if (hashp->h_hash == NULL) - hashp->h_hash = DB_HASHVERSION < 5 ? __ham_func4 : __ham_func5; - - if (nelem != 0 && ffactor != 0) { - nelem = (nelem - 1) / ffactor + 1; - l2 = __db_log2(nelem > 2 ? nelem : 2); - } else - l2 = 1; - nbuckets = 1 << l2; - - orig_lsn = hcp->hdr->dbmeta.lsn; - memset(hcp->hdr, 0, sizeof(HMETA)); - ZERO_LSN(hcp->hdr->dbmeta.lsn); - hcp->hdr->dbmeta.pgno = pgno; - hcp->hdr->dbmeta.magic = DB_HASHMAGIC; - hcp->hdr->dbmeta.version = DB_HASHVERSION; - hcp->hdr->dbmeta.pagesize = dbp->pgsize; - hcp->hdr->dbmeta.type = P_HASHMETA; - hcp->hdr->dbmeta.free = PGNO_INVALID; - hcp->hdr->max_bucket = hcp->hdr->high_mask = nbuckets - 1; - hcp->hdr->low_mask = (nbuckets >> 1) - 1; - hcp->hdr->ffactor = ffactor; - hcp->hdr->h_charkey = hashp->h_hash(dbp, CHARKEY, sizeof(CHARKEY)); - memcpy(hcp->hdr->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN); - - if (F_ISSET(dbp, DB_AM_DUP)) - F_SET(&hcp->hdr->dbmeta, DB_HASH_DUP); - if (F_ISSET(dbp, DB_AM_SUBDB)) - F_SET(&hcp->hdr->dbmeta, DB_HASH_SUBDB); - if (dbp->dup_compare != NULL) - F_SET(&hcp->hdr->dbmeta, DB_HASH_DUPSORT); - - if ((ret = memp_fset(dbp->mpf, hcp->hdr, DB_MPOOL_DIRTY)) != 0) - goto err; - - /* - * Create the first and second buckets pages so that we have the - * page numbers for them and we can store that page number - * in the meta-data header (spares[0]). - */ - hcp->hdr->spares[0] = nbuckets; - if ((ret = memp_fget(dbp->mpf, - &hcp->hdr->spares[0], DB_MPOOL_NEW_GROUP, &h)) != 0) - goto err; - - P_INIT(h, dbp->pgsize, hcp->hdr->spares[0], PGNO_INVALID, - PGNO_INVALID, 0, P_HASH); - - /* Fill in the last fields of the meta data page. */ - hcp->hdr->spares[0] -= (nbuckets - 1); - for (i = 1; i <= l2; i++) - hcp->hdr->spares[i] = hcp->hdr->spares[0]; - for (; i < NCACHED; i++) - hcp->hdr->spares[i] = PGNO_INVALID; - - /* - * Before we are about to put any dirty pages, we need to log - * the meta-data page create. - */ - ret = __db_log_page(dbp, name, &orig_lsn, pgno, (PAGE *)hcp->hdr); - - if (dbp->open_txn != NULL) { - mmeta = (DBMETA *) hcp->hdr; - if (F_ISSET(dbp, DB_AM_SUBDB)) { - - /* - * If this is a subdatabase, then we need to - * get the LSN off the master meta data page - * because that's where free pages are linked - * and during recovery we need to access - * that page and roll it backward/forward - * correctly with respect to LSN. - */ - mpgno = PGNO_BASE_MD; - if ((ret = __db_lget(dbc, - 0, mpgno, DB_LOCK_WRITE, 0, &metalock)) != 0) - goto err; - if ((ret = memp_fget(dbp->mpf, - &mpgno, 0, (PAGE **)&mmeta)) != 0) - goto err; - } - if ((t_ret = __ham_groupalloc_log(dbp->dbenv, - dbp->open_txn, &LSN(mmeta), 0, dbp->log_fileid, - &LSN(mmeta), hcp->hdr->spares[0], - hcp->hdr->max_bucket + 1, mmeta->free)) != 0 && ret == 0) - ret = t_ret; - if (ret == 0) { - /* need to update real LSN for buffer manager */ - dirty_mmeta = 1; - } - - } - - DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, name); - -DB_TEST_RECOVERY_LABEL -err: if (h != NULL && - (t_ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0 && ret == 0) - ret = t_ret; - - if (F_ISSET(dbp, DB_AM_SUBDB) && mmeta != NULL) - if ((t_ret = memp_fput(dbp->mpf, mmeta, - dirty_mmeta ? DB_MPOOL_DIRTY : 0)) != 0 && ret == 0) - ret = t_ret; - if (metalock.off != LOCK_INVALID) - (void)__TLPUT(dbc, metalock); - - return (ret); -} - -static int -__ham_delete(dbp, txn, key, flags) - DB *dbp; - DB_TXN *txn; - DBT *key; - u_int32_t flags; -{ - DBC *dbc; - HASH_CURSOR *hcp; - db_pgno_t pgno; int ret, t_ret; - /* - * This is the only access method routine called directly from - * the dbp, so we have to do error checking. - */ - - PANIC_CHECK(dbp->dbenv); - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->del"); - DB_CHECK_TXN(dbp, txn); - - if ((ret = - __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0) - return (ret); - - if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0) + if ((ret = __ham_get_meta(dbc)) != 0) return (ret); - DEBUG_LWRITE(dbc, txn, "ham_delete", key, NULL, flags); + /* Assert that we're not using secondary indices. */ + DB_ASSERT(!F_ISSET(dbc->dbp, DB_AM_SECONDARY)); + /* + * We should assert that we're not a primary either, but that + * would require grabbing the dbp's mutex, so we don't bother. + */ - hcp = (HASH_CURSOR *)dbc->internal; - if ((ret = __ham_get_meta(dbc)) != 0) - goto out; + /* Assert that we're set, but not to an off-page duplicate. */ + DB_ASSERT(IS_INITIALIZED(dbc)); + DB_ASSERT(((HASH_CURSOR *)dbc->internal)->opd == NULL); - pgno = PGNO_INVALID; - if ((ret = __ham_lookup(dbc, key, 0, DB_LOCK_WRITE, &pgno)) == 0) { - if (F_ISSET(hcp, H_OK)) { - if (pgno == PGNO_INVALID) - ret = __ham_del_pair(dbc, 1); - else { - /* When we close the cursor in __ham_del_dups, - * that will make the off-page dup tree go - * go away as well as our current entry. When - * it updates cursors, ours should get marked - * as H_DELETED. - */ - ret = __ham_del_dups(dbc, key); - } - } else - ret = DB_NOTFOUND; - } + ret = __ham_del_pair(dbc, 1); if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) ret = t_ret; -out: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; return (ret); } @@ -517,8 +141,8 @@ __ham_c_init(dbc) 1, sizeof(struct cursor_t), &new_curs)) != 0) return (ret); if ((ret = __os_malloc(dbenv, - dbc->dbp->pgsize, NULL, &new_curs->split_buf)) != 0) { - __os_free(new_curs, sizeof(*new_curs)); + dbc->dbp->pgsize, &new_curs->split_buf)) != 0) { + __os_free(dbenv, new_curs); return (ret); } @@ -527,8 +151,10 @@ __ham_c_init(dbc) dbc->c_count = __db_c_count; dbc->c_del = __db_c_del; dbc->c_dup = __db_c_dup; - dbc->c_get = __db_c_get; + dbc->c_get = dbc->c_real_get = __db_c_get; + dbc->c_pget = __db_c_pget; dbc->c_put = __db_c_put; + dbc->c_am_bulk = __ham_bulk; dbc->c_am_close = __ham_c_close; dbc->c_am_del = __ham_c_del; dbc->c_am_destroy = __ham_c_destroy; @@ -551,12 +177,14 @@ __ham_c_close(dbc, root_pgno, rmroot) db_pgno_t root_pgno; int *rmroot; { + DB_MPOOLFILE *mpf; HASH_CURSOR *hcp; HKEYDATA *dp; int doroot, gotmeta, ret, t_ret; u_int32_t dirty; COMPQUIET(rmroot, 0); + mpf = dbc->dbp->mpf; dirty = 0; doroot = gotmeta = ret = 0; hcp = (HASH_CURSOR *) dbc->internal; @@ -568,9 +196,14 @@ __ham_c_close(dbc, root_pgno, rmroot) gotmeta = 1; if ((ret = __ham_get_cpage(dbc, DB_LOCK_READ)) != 0) goto out; - dp = (HKEYDATA *)H_PAIRDATA(hcp->page, hcp->indx); - DB_ASSERT(HPAGE_PTYPE(dp) == H_OFFDUP); - memcpy(&root_pgno, HOFFPAGE_PGNO(dp), sizeof(db_pgno_t)); + dp = (HKEYDATA *)H_PAIRDATA(dbc->dbp, hcp->page, hcp->indx); + + /* If its not a dup we aborted before we changed it. */ + if (HPAGE_PTYPE(dp) == H_OFFDUP) + memcpy(&root_pgno, + HOFFPAGE_PGNO(dp), sizeof(db_pgno_t)); + else + root_pgno = PGNO_INVALID; if ((ret = hcp->opd->c_am_close(hcp->opd, root_pgno, &doroot)) != 0) @@ -583,7 +216,7 @@ __ham_c_close(dbc, root_pgno, rmroot) } out: if (hcp->page != NULL && (t_ret = - memp_fput(dbc->dbp->mpf, hcp->page, dirty)) != 0 && ret == 0) + mpf->put(mpf, hcp->page, dirty)) != 0 && ret == 0) ret = t_ret; if (gotmeta != 0 && (t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) ret = t_ret; @@ -605,8 +238,8 @@ __ham_c_destroy(dbc) hcp = (HASH_CURSOR *)dbc->internal; if (hcp->split_buf != NULL) - __os_free(hcp->split_buf, dbc->dbp->pgsize); - __os_free(hcp, sizeof(HASH_CURSOR)); + __os_free(dbc->dbp->dbenv, hcp->split_buf); + __os_free(dbc->dbp->dbenv, hcp); return (0); } @@ -623,6 +256,7 @@ __ham_c_count(dbc, recnop) db_recno_t *recnop; { DB *dbp; + DB_MPOOLFILE *mpf; HASH_CURSOR *hcp; db_indx_t len; db_recno_t recno; @@ -630,22 +264,23 @@ __ham_c_count(dbc, recnop) u_int8_t *p, *pend; dbp = dbc->dbp; - hcp = (HASH_CURSOR *) dbc->internal; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; recno = 0; if ((ret = __ham_get_cpage(dbc, DB_LOCK_READ)) != 0) return (ret); - switch (HPAGE_PTYPE(H_PAIRDATA(hcp->page, hcp->indx))) { + switch (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))) { case H_KEYDATA: case H_OFFPAGE: recno = 1; break; case H_DUPLICATE: - p = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx)); + p = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); pend = p + - LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx); + LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); for (; p < pend; recno++) { /* p may be odd, so copy rather than just dereffing */ memcpy(&len, p, sizeof(db_indx_t)); @@ -654,14 +289,13 @@ __ham_c_count(dbc, recnop) break; default: - ret = __db_unknown_type(dbp->dbenv, "__ham_c_count", - HPAGE_PTYPE(H_PAIRDATA(hcp->page, hcp->indx))); + ret = __db_pgfmt(dbp->dbenv, hcp->pgno); goto err; } *recnop = recno; -err: if ((t_ret = memp_fput(dbc->dbp->mpf, hcp->page, 0)) != 0 && ret == 0) +err: if ((t_ret = mpf->put(mpf, hcp->page, 0)) != 0 && ret == 0) ret = t_ret; hcp->page = NULL; return (ret); @@ -673,10 +307,12 @@ __ham_c_del(dbc) { DB *dbp; DBT repldbt; + DB_MPOOLFILE *mpf; HASH_CURSOR *hcp; int ret, t_ret; dbp = dbc->dbp; + mpf = dbp->mpf; hcp = (HASH_CURSOR *)dbc->internal; if (F_ISSET(hcp, H_DELETED)) @@ -689,12 +325,12 @@ __ham_c_del(dbc) goto out; /* Off-page duplicates. */ - if (HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) + if (HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) goto out; if (F_ISSET(hcp, H_ISDUP)) { /* On-page duplicate. */ if (hcp->dup_off == 0 && - DUP_SIZE(hcp->dup_len) == LEN_HDATA(hcp->page, + DUP_SIZE(hcp->dup_len) == LEN_HDATA(dbp, hcp->page, hcp->hdr->dbmeta.pagesize, hcp->indx)) ret = __ham_del_pair(dbc, 1); else { @@ -703,21 +339,25 @@ __ham_c_del(dbc) repldbt.doff = hcp->dup_off; repldbt.dlen = DUP_SIZE(hcp->dup_len); repldbt.size = 0; - repldbt.data = HKEYDATA_DATA(H_PAIRDATA(hcp->page, + repldbt.data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); - ret = __ham_replpair(dbc, &repldbt, 0); - hcp->dup_tlen -= DUP_SIZE(hcp->dup_len); - F_SET(hcp, H_DELETED); - ret = __ham_c_update(dbc, DUP_SIZE(hcp->dup_len), 0, 1); + if ((ret = __ham_replpair(dbc, &repldbt, 0)) == 0) { + hcp->dup_tlen -= DUP_SIZE(hcp->dup_len); + F_SET(hcp, H_DELETED); + ret = __ham_c_update(dbc, + DUP_SIZE(hcp->dup_len), 0, 1); + } } } else /* Not a duplicate */ ret = __ham_del_pair(dbc, 1); -out: if (ret == 0 && hcp->page != NULL && - (t_ret = memp_fput(dbp->mpf, hcp->page, DB_MPOOL_DIRTY)) != 0) - ret = t_ret; - hcp->page = NULL; +out: if (hcp->page != NULL) { + if ((t_ret = mpf->put(mpf, + hcp->page, ret == 0 ? DB_MPOOL_DIRTY : 0)) && ret == 0) + ret = t_ret; + hcp->page = NULL; + } if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) ret = t_ret; return (ret); @@ -760,7 +400,7 @@ __ham_c_dup(orig_dbc, new_dbc) * holds a lock of the correct type, so if we need a write lock and * request it, we know that we'll get it. */ - if (orig->lock.off == LOCK_INVALID || orig_dbc->txn != NULL) + if (!LOCK_ISSET(orig->lock) || orig_dbc->txn != NULL) return (0); return (__ham_lock_bucket(new_dbc, DB_LOCK_READ)); @@ -775,12 +415,14 @@ __ham_c_get(dbc, key, data, flags, pgnop) db_pgno_t *pgnop; { DB *dbp; + DB_MPOOLFILE *mpf; HASH_CURSOR *hcp; db_lockmode_t lock_type; int get_key, ret, t_ret; hcp = (HASH_CURSOR *)dbc->internal; dbp = dbc->dbp; + mpf = dbp->mpf; /* Clear OR'd in additional bits so we can check for flag equality. */ if (F_ISSET(dbc, DBC_RMW)) @@ -827,6 +469,7 @@ __ham_c_get(dbc, key, data, flags, pgnop) case DB_SET: case DB_SET_RANGE: case DB_GET_BOTH: + case DB_GET_BOTH_RANGE: ret = __ham_lookup(dbc, key, 0, lock_type, pgnop); get_key = 0; break; @@ -856,11 +499,11 @@ __ham_c_get(dbc, key, data, flags, pgnop) goto err; else if (F_ISSET(hcp, H_OK)) { if (*pgnop == PGNO_INVALID) - ret = __ham_dup_return (dbc, data, flags); + ret = __ham_dup_return(dbc, data, flags); break; } else if (!F_ISSET(hcp, H_NOMORE)) { __db_err(dbp->dbenv, - "H_NOMORE returned to __ham_c_get"); + "H_NOMORE returned to __ham_c_get"); ret = EINVAL; break; } @@ -872,7 +515,7 @@ __ham_c_get(dbc, key, data, flags, pgnop) case DB_LAST: case DB_PREV: case DB_PREV_NODUP: - ret = memp_fput(dbp->mpf, hcp->page, 0); + ret = mpf->put(mpf, hcp->page, 0); hcp->page = NULL; if (hcp->bucket == 0) { ret = DB_NOTFOUND; @@ -890,7 +533,7 @@ __ham_c_get(dbc, key, data, flags, pgnop) case DB_FIRST: case DB_NEXT: case DB_NEXT_NODUP: - ret = memp_fput(dbp->mpf, hcp->page, 0); + ret = mpf->put(mpf, hcp->page, 0); hcp->page = NULL; hcp->indx = NDX_INVALID; hcp->bucket++; @@ -907,6 +550,7 @@ __ham_c_get(dbc, key, data, flags, pgnop) break; case DB_GET_BOTH: case DB_GET_BOTHC: + case DB_GET_BOTH_RANGE: case DB_NEXT_DUP: case DB_SET: case DB_SET_RANGE: @@ -940,6 +584,382 @@ err: if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) return (ret); } +/* + * __ham_bulk -- Return bulk data from a hash table. + */ +static int +__ham_bulk(dbc, data, flags) + DBC *dbc; + DBT *data; + u_int32_t flags; +{ + DB *dbp; + DB_MPOOLFILE *mpf; + HASH_CURSOR *cp; + PAGE *pg; + db_indx_t dup_len, dup_off, dup_tlen, indx, *inp; + db_lockmode_t lock_mode; + db_pgno_t pgno; + int32_t *endp, key_off, *offp, *saveoff; + u_int32_t key_size, size, space; + u_int8_t *dbuf, *dp, *hk, *np, *tmp; + int is_dup, is_key; + int need_pg, next_key, no_dup, pagesize, ret, t_ret; + + ret = 0; + key_off = 0; + dup_len = dup_off = dup_tlen = 0; + size = 0; + dbp = dbc->dbp; + pagesize = dbp->pgsize; + mpf = dbp->mpf; + cp = (HASH_CURSOR *)dbc->internal; + is_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1 : 0; + next_key = is_key && LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP; + no_dup = LF_ISSET(DB_OPFLAGS_MASK) == DB_NEXT_NODUP; + dbuf = data->data; + np = dp = dbuf; + + /* Keep track of space that is left. There is an termination entry */ + space = data->ulen; + space -= sizeof(*offp); + + /* Build the offset/size table from the end up. */ + endp = (int32_t *) ((u_int8_t *)dbuf + data->ulen); + endp--; + offp = endp; + + key_size = 0; + lock_mode = F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE: DB_LOCK_READ; + +next_pg: + need_pg = 1; + indx = cp->indx; + pg = cp->page; + inp = P_INP(dbp, pg); + + do { + if (is_key) { + hk = H_PAIRKEY(dbp, pg, indx); + if (HPAGE_PTYPE(hk) == H_OFFPAGE) { + memcpy(&key_size, + HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); + memcpy(&pgno, + HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); + size = key_size; + if (key_size > space) + goto get_key_space; + if ((ret = __bam_bulk_overflow( + dbc, key_size, pgno, np)) != 0) + return (ret); + space -= key_size; + key_off = (int32_t)(np - dbuf); + np += key_size; + } else { + if (need_pg) { + dp = np; + size = pagesize - HOFFSET(pg); + if (space < size) { +get_key_space: + if (offp == endp) { + data->size = + ALIGN(size + + pagesize, + sizeof(u_int32_t)); + return (ENOMEM); + } + goto back_up; + } + memcpy(dp, + (u_int8_t *)pg + HOFFSET(pg), size); + need_pg = 0; + space -= size; + np += size; + } + key_size = LEN_HKEY(dbp, pg, pagesize, indx); + key_off = (int32_t)(inp[indx] - HOFFSET(pg) + + dp - dbuf + SSZA(HKEYDATA, data)); + } + } + + hk = H_PAIRDATA(dbp, pg, indx); + switch (HPAGE_PTYPE(hk)) { + case H_DUPLICATE: + case H_KEYDATA: + if (need_pg) { + dp = np; + size = pagesize - HOFFSET(pg); + if (space < size) { +back_up: + if (indx != 0) { + indx -= 2; + /* XXX + * It's not clear that this is + * the right way to fix this, + * but here goes. + * If we are backing up onto a + * duplicate, then we need to + * position ourselves at the + * end of the duplicate set. + * We probably need to make + * this work for H_OFFDUP too. + * It might be worth making a + * dummy cursor and calling + * __ham_item_prev. + */ + tmp = H_PAIRDATA(dbp, pg, indx); + if (HPAGE_PTYPE(tmp) == + H_DUPLICATE) { + dup_off = dup_tlen = + LEN_HDATA(dbp, pg, + pagesize, indx + 1); + memcpy(&dup_len, + HKEYDATA_DATA(tmp), + sizeof(db_indx_t)); + } + goto get_space; + } + /* indx == 0 */ + if ((ret = __ham_item_prev(dbc, + lock_mode, &pgno)) != 0) { + if (ret != DB_NOTFOUND) + return (ret); + if ((ret = mpf->put(mpf, + cp->page, 0)) != 0) + return (ret); + cp->page = NULL; + if (cp->bucket == 0) { + cp->indx = indx = + NDX_INVALID; + goto get_space; + } + if ((ret = + __ham_get_meta(dbc)) != 0) + return (ret); + + cp->bucket--; + cp->pgno = BUCKET_TO_PAGE(cp, + cp->bucket); + cp->indx = NDX_INVALID; + if ((ret = __ham_release_meta( + dbc)) != 0) + return (ret); + if ((ret = __ham_item_prev(dbc, + lock_mode, &pgno)) != 0) + return (ret); + } + indx = cp->indx; +get_space: + /* + * See if we put any data in the buffer. + */ + if (offp >= endp || + F_ISSET(dbc, DBC_TRANSIENT)) { + data->size = ALIGN(size + + data->ulen - space, + sizeof(u_int32_t)); + return (ENOMEM); + } + /* + * Don't continue; we're all out + * of space, even though we're + * returning success. + */ + next_key = 0; + break; + } + memcpy(dp, (u_int8_t *)pg + HOFFSET(pg), size); + need_pg = 0; + space -= size; + np += size; + } + + /* + * We're about to crack the offset(s) and length(s) + * out of an H_KEYDATA or H_DUPLICATE item. + * There are three cases: + * 1. We were moved into a duplicate set by + * the standard hash cursor code. Respect + * the dup_off and dup_tlen we were given. + * 2. We stumbled upon a duplicate set while + * walking the page on our own. We need to + * recognize it as a dup and set dup_off and + * dup_tlen. + * 3. The current item is not a dup. + */ + if (F_ISSET(cp, H_ISDUP)) { + /* Case 1 */ + is_dup = 1; + dup_len = cp->dup_len; + dup_off = cp->dup_off; + dup_tlen = cp->dup_tlen; + } else if (HPAGE_PTYPE(hk) == H_DUPLICATE) { + /* Case 2 */ + is_dup = 1; + /* + * If we run out of memory and bail, + * make sure the fact we're in a dup set + * isn't ignored later. + */ + F_SET(cp, H_ISDUP); + dup_off = 0; + memcpy(&dup_len, + HKEYDATA_DATA(hk), sizeof(db_indx_t)); + dup_tlen = LEN_HDATA(dbp, pg, pagesize, indx); + } else + /* Case 3 */ + is_dup = dup_len = dup_off = dup_tlen = 0; + + do { + space -= (is_key ? 4 : 2) * sizeof(*offp); + size += (is_key ? 4 : 2) * sizeof(*offp); + /* + * Since space is an unsigned, if we happen + * to wrap, then this comparison will turn out + * to be true. XXX Wouldn't it be better to + * simply check above that space is greater than + * the value we're about to subtract??? + */ + if (space > data->ulen) { + if (!is_dup || dup_off == 0) + goto back_up; + dup_off -= (db_indx_t)DUP_SIZE(offp[1]); + goto get_space; + } + if (is_key) { + *offp-- = key_off; + *offp-- = key_size; + } + if (is_dup) { + *offp-- = (int32_t)( + inp[indx + 1] - HOFFSET(pg) + + dp - dbuf + SSZA(HKEYDATA, data) + + dup_off + sizeof(db_indx_t)); + memcpy(&dup_len, + HKEYDATA_DATA(hk) + dup_off, + sizeof(db_indx_t)); + dup_off += DUP_SIZE(dup_len); + *offp-- = dup_len; + } else { + *offp-- = (int32_t)( + inp[indx + 1] - HOFFSET(pg) + + dp - dbuf + SSZA(HKEYDATA, data)); + *offp-- = LEN_HDATA(dbp, pg, + pagesize, indx); + } + } while (is_dup && dup_off < dup_tlen && no_dup == 0); + F_CLR(cp, H_ISDUP); + break; + case H_OFFDUP: + memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); + space -= 2 * sizeof(*offp); + if (space > data->ulen) + goto back_up; + + if (is_key) { + space -= 2 * sizeof(*offp); + if (space > data->ulen) + goto back_up; + *offp-- = key_off; + *offp-- = key_size; + } + saveoff = offp; + if ((ret = __bam_bulk_duplicates(dbc, + pgno, dbuf, is_key ? offp + 2 : NULL, + &offp, &np, &space, no_dup)) != 0) { + if (ret == ENOMEM) { + size = space; + if (is_key && saveoff == offp) { + offp += 2; + goto back_up; + } + goto get_space; + } + return (ret); + } + break; + case H_OFFPAGE: + space -= (is_key ? 4 : 2) * sizeof(*offp); + if (space > data->ulen) + goto back_up; + + memcpy(&size, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); + memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); + if (size > space) + goto back_up; + + if ((ret = + __bam_bulk_overflow(dbc, size, pgno, np)) != 0) + return (ret); + + if (is_key) { + *offp-- = key_off; + *offp-- = key_size; + } + + *offp-- = (int32_t)(np - dbuf); + *offp-- = size; + + np += size; + space -= size; + break; + } + } while (next_key && (indx += 2) < NUM_ENT(pg)); + + cp->indx = indx; + cp->dup_len = dup_len; + cp->dup_off = dup_off; + cp->dup_tlen = dup_tlen; + + /* If we are off the page then try to the next page. */ + if (ret == 0 && next_key && indx >= NUM_ENT(pg)) { + if ((ret = __ham_item_next(dbc, lock_mode, &pgno)) == 0) + goto next_pg; + if (ret != DB_NOTFOUND) + return (ret); + if ((ret = mpf->put(dbc->dbp->mpf, cp->page, 0)) != 0) + return (ret); + cp->page = NULL; + if ((ret = __ham_get_meta(dbc)) != 0) + return (ret); + + cp->bucket++; + if (cp->bucket > cp->hdr->max_bucket) { + /* + * Restore cursor to its previous state. We're past + * the last item in the last bucket, so the next + * DBC->c_get(DB_NEXT) will return DB_NOTFOUND. + */ + cp->bucket--; + ret = DB_NOTFOUND; + } else { + /* + * Start on the next bucket. + * + * Note that if this new bucket happens to be empty, + * but there's another non-empty bucket after it, + * we'll return early. This is a rare case, and we + * don't guarantee any particular number of keys + * returned on each call, so just let the next call + * to bulk get move forward by yet another bucket. + */ + cp->pgno = BUCKET_TO_PAGE(cp, cp->bucket); + cp->indx = NDX_INVALID; + F_CLR(cp, H_ISDUP); + ret = __ham_item_next(dbc, lock_mode, &pgno); + } + + if ((t_ret = __ham_release_meta(dbc)) != 0) + return (t_ret); + if (ret == 0) + goto next_pg; + if (ret != DB_NOTFOUND) + return (ret); + } + *offp = (u_int32_t) -1; + return (0); +} + static int __ham_c_put(dbc, key, data, flags, pgnop) DBC *dbc; @@ -949,6 +969,7 @@ __ham_c_put(dbc, key, data, flags, pgnop) db_pgno_t *pgnop; { DB *dbp; + DB_MPOOLFILE *mpf; DBT tmp_val, *myval; HASH_CURSOR *hcp; u_int32_t nbytes; @@ -962,6 +983,7 @@ __ham_c_put(dbc, key, data, flags, pgnop) COMPQUIET(myval, NULL); dbp = dbc->dbp; + mpf = dbp->mpf; hcp = (HASH_CURSOR *)dbc->internal; if (F_ISSET(hcp, H_DELETED) && @@ -984,8 +1006,7 @@ __ham_c_put(dbc, key, data, flags, pgnop) ret = 0; if (hcp->seek_found_page != PGNO_INVALID && hcp->seek_found_page != hcp->pgno) { - if ((ret = memp_fput(dbp->mpf, hcp->page, 0)) - != 0) + if ((ret = mpf->put(mpf, hcp->page, 0)) != 0) goto err2; hcp->page = NULL; hcp->pgno = hcp->seek_found_page; @@ -1000,9 +1021,10 @@ __ham_c_put(dbc, key, data, flags, pgnop) * and then write the new bytes represented by * val. */ - if ((ret = __ham_init_dbt(dbp->dbenv, - &tmp_val, data->size + data->doff, - &dbc->rdata.data, &dbc->rdata.ulen)) == 0) { + if ((ret = __ham_init_dbt(dbp->dbenv, &tmp_val, + data->size + data->doff, + &dbc->my_rdata.data, + &dbc->my_rdata.ulen)) == 0) { memset(tmp_val.data, 0, data->doff); memcpy((u_int8_t *)tmp_val.data + data->doff, data->data, data->size); @@ -1038,8 +1060,8 @@ done: if (ret == 0 && F_ISSET(hcp, H_EXPAND)) { F_CLR(hcp, H_EXPAND); } - if (ret == 0 && - (t_ret = memp_fset(dbp->mpf, hcp->page, DB_MPOOL_DIRTY)) != 0) + if (hcp->page != NULL && + (t_ret = mpf->set(mpf, hcp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0) ret = t_ret; err2: if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) @@ -1058,17 +1080,30 @@ __ham_expand_table(dbc) DBC *dbc; { DB *dbp; - PAGE *h; + DB_LOCK metalock; + DB_LSN lsn; + DB_MPOOLFILE *mpf; + DBMETA *mmeta; HASH_CURSOR *hcp; - db_pgno_t pgno; - u_int32_t old_bucket, new_bucket; - int ret; + PAGE *h; + db_pgno_t pgno, mpgno; + u_int32_t newalloc, new_bucket, old_bucket; + int dirty_meta, got_meta, logn, new_double, ret; dbp = dbc->dbp; + mpf = dbp->mpf; hcp = (HASH_CURSOR *)dbc->internal; if ((ret = __ham_dirty_meta(dbc)) != 0) return (ret); + LOCK_INIT(metalock); + mmeta = (DBMETA *) hcp->hdr; + mpgno = mmeta->pgno; + h = NULL; + dirty_meta = 0; + got_meta = 0; + newalloc = 0; + /* * If the split point is about to increase, make sure that we * have enough extra pages. The calculation here is weird. @@ -1078,86 +1113,116 @@ __ham_expand_table(dbc) * see what the log of one greater than that is; here we have to * look at the log of max + 2. VERY NASTY STUFF. * - * It just got even nastier. With subdatabases, we have to request - * a chunk of contiguous pages, so we do that here using an - * undocumented feature of mpool (the MPOOL_NEW_GROUP flag) to - * give us a number of contiguous pages. Ouch. + * We figure out what we need to do, then we log it, then request + * the pages from mpool. We don't want to fail after extending + * the file. + * + * If the page we are about to split into has already been allocated, + * then we simply need to get it to get its LSN. If it hasn't yet + * been allocated, then we know it's LSN (0,0). */ - if (hcp->hdr->max_bucket == hcp->hdr->high_mask) { - /* - * Ask mpool to give us a set of contiguous page numbers - * large enough to contain the next doubling. - * - * Figure out how many new pages we need. This will return - * us the last page. We calculate its page number, initialize - * the page and then write it back to reserve all the pages - * in between. It is possible that the allocation of new pages - * has already been done, but the tranaction aborted. Since - * we don't undo the allocation, check for a valid pgno before - * doing the allocation. - */ - pgno = hcp->hdr->max_bucket + 1; - if (hcp->hdr->spares[__db_log2(pgno) + 1] == PGNO_INVALID) - /* Allocate a group of pages. */ - ret = memp_fget(dbp->mpf, - &pgno, DB_MPOOL_NEW_GROUP, &h); - else { - /* Just read in the last page of the batch */ - pgno = hcp->hdr->spares[__db_log2(pgno) + 1] + - hcp->hdr->max_bucket + 1; - /* Move to the last page of the group. */ - pgno += hcp->hdr->max_bucket; - ret = memp_fget(dbp->mpf, - &pgno, DB_MPOOL_CREATE, &h); - } - if (ret != 0) - return (ret); - P_INIT(h, dbp->pgsize, pgno, - PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - pgno -= hcp->hdr->max_bucket; - } else { - pgno = BUCKET_TO_PAGE(hcp, hcp->hdr->max_bucket + 1); + new_bucket = hcp->hdr->max_bucket + 1; + old_bucket = new_bucket & hcp->hdr->low_mask; + + new_double = hcp->hdr->max_bucket == hcp->hdr->high_mask; + logn = __db_log2(new_bucket); + + if (!new_double || hcp->hdr->spares[logn + 1] != PGNO_INVALID) { + /* Page exists; get it so we can get its LSN */ + pgno = BUCKET_TO_PAGE(hcp, new_bucket); if ((ret = - memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, &h)) != 0) - return (ret); + mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &h)) != 0) + goto err; + lsn = h->lsn; + } else { + /* Get the master meta-data page to do allocation. */ + if (F_ISSET(dbp, DB_AM_SUBDB)) { + mpgno = PGNO_BASE_MD; + if ((ret = __db_lget(dbc, + 0, mpgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + goto err; + if ((ret = + mpf->get(mpf, &mpgno, 0, (PAGE **)&mmeta)) != 0) + goto err; + got_meta = 1; + } + pgno = mmeta->last_pgno + 1; + ZERO_LSN(lsn); + newalloc = 1; } - /* Now we can log the meta-data split. */ - if (DB_LOGGING(dbc)) { - if ((ret = __ham_metagroup_log(dbp->dbenv, - dbc->txn, &h->lsn, 0, dbp->log_fileid, - hcp->hdr->max_bucket, pgno, &hcp->hdr->dbmeta.lsn, - &h->lsn)) != 0) { - (void)memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY); - return (ret); - } + /* Log the meta-data split first. */ + if (DBC_LOGGING(dbc)) { + /* + * We always log the page number of the first page of + * the allocation group. However, the LSN that we log + * is either the LSN on the first page (if we did not + * do the actual allocation here) or the LSN on the last + * page of the unit (if we did do the allocation here). + */ + if ((ret = __ham_metagroup_log(dbp, dbc->txn, + &lsn, 0, hcp->hdr->max_bucket, mpgno, &mmeta->lsn, + hcp->hdr->dbmeta.pgno, &hcp->hdr->dbmeta.lsn, + pgno, &lsn, newalloc)) != 0) + goto err; + } else + LSN_NOT_LOGGED(lsn); - hcp->hdr->dbmeta.lsn = h->lsn; - } + hcp->hdr->dbmeta.lsn = lsn; - /* If we allocated some new pages, write out the last page. */ - if ((ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0) - return (ret); + if (new_double && hcp->hdr->spares[logn + 1] == PGNO_INVALID) { + /* + * We need to begin a new doubling and we have not allocated + * any pages yet. Read the last page in and initialize it to + * make the allocation contiguous. The pgno we calculated + * above is the first page allocated. The entry in spares is + * that page number minus any buckets already allocated (it + * simplifies bucket to page transaction). After we've set + * that, we calculate the last pgno. + */ + + hcp->hdr->spares[logn + 1] = pgno - new_bucket; + pgno += hcp->hdr->max_bucket; + mmeta->last_pgno = pgno; + mmeta->lsn = lsn; + dirty_meta = DB_MPOOL_DIRTY; - new_bucket = ++hcp->hdr->max_bucket; - old_bucket = (hcp->hdr->max_bucket & hcp->hdr->low_mask); + if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &h)) != 0) + goto err; + + P_INIT(h, dbp->pgsize, + pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + } + + /* Write out whatever page we ended up modifying. */ + h->lsn = lsn; + if ((ret = mpf->put(mpf, h, DB_MPOOL_DIRTY)) != 0) + goto err; + h = NULL; /* - * If we started a new doubling, fill in the spares array with - * the starting page number negatively offset by the bucket number. + * Update the meta-data page of this hash database. */ - if (new_bucket > hcp->hdr->high_mask) { - /* Starting a new doubling */ + hcp->hdr->max_bucket = new_bucket; + if (new_double) { hcp->hdr->low_mask = hcp->hdr->high_mask; hcp->hdr->high_mask = new_bucket | hcp->hdr->low_mask; - if (hcp->hdr->spares[__db_log2(new_bucket) + 1] == PGNO_INVALID) - hcp->hdr->spares[__db_log2(new_bucket) + 1] = - pgno - new_bucket; } /* Relocate records to the new bucket */ - return (__ham_split_page(dbc, old_bucket, new_bucket)); + ret = __ham_split_page(dbc, old_bucket, new_bucket); + +err: if (got_meta) + (void)mpf->put(mpf, mmeta, dirty_meta); + + if (LOCK_ISSET(metalock)) + (void)__TLPUT(dbc, metalock); + + if (h != NULL) + (void)mpf->put(mpf, h, 0); + + return (ret); } /* @@ -1191,7 +1256,7 @@ __ham_call_hash(dbc, k, len) * everything held by the cursor. */ static int -__ham_dup_return (dbc, val, flags) +__ham_dup_return(dbc, val, flags) DBC *dbc; DBT *val; u_int32_t flags; @@ -1211,7 +1276,7 @@ __ham_dup_return (dbc, val, flags) dbp = dbc->dbp; hcp = (HASH_CURSOR *)dbc->internal; ndx = H_DATAINDEX(hcp->indx); - type = HPAGE_TYPE(hcp->page, ndx); + type = HPAGE_TYPE(dbp, hcp->page, ndx); pp = hcp->page; myval = val; @@ -1228,8 +1293,8 @@ __ham_dup_return (dbc, val, flags) DB_ASSERT(type != H_OFFDUP); /* Case 1 */ - if (type != H_DUPLICATE && - flags != DB_GET_BOTH && flags != DB_GET_BOTHC) + if (type != H_DUPLICATE && flags != DB_GET_BOTH && + flags != DB_GET_BOTHC && flags != DB_GET_BOTH_RANGE) return (0); /* @@ -1239,11 +1304,11 @@ __ham_dup_return (dbc, val, flags) */ if (!F_ISSET(hcp, H_ISDUP) && type == H_DUPLICATE) { F_SET(hcp, H_ISDUP); - hcp->dup_tlen = LEN_HDATA(hcp->page, + hcp->dup_tlen = LEN_HDATA(dbp, hcp->page, hcp->hdr->dbmeta.pagesize, hcp->indx); - hk = H_PAIRDATA(hcp->page, hcp->indx); - if (flags == DB_LAST - || flags == DB_PREV || flags == DB_PREV_NODUP) { + hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); + if (flags == DB_LAST || + flags == DB_PREV || flags == DB_PREV_NODUP) { hcp->dup_off = 0; do { memcpy(&len, @@ -1265,7 +1330,8 @@ __ham_dup_return (dbc, val, flags) * may need to adjust the cursor before returning data. * Case 4 */ - if (flags == DB_GET_BOTH || flags == DB_GET_BOTHC) { + if (flags == DB_GET_BOTH || + flags == DB_GET_BOTHC || flags == DB_GET_BOTH_RANGE) { if (F_ISSET(hcp, H_ISDUP)) { /* * If we're doing a join, search forward from the @@ -1274,7 +1340,7 @@ __ham_dup_return (dbc, val, flags) if (flags == DB_GET_BOTHC) F_SET(hcp, H_CONTINUE); - __ham_dsearch(dbc, val, &off, &cmp); + __ham_dsearch(dbc, val, &off, &cmp, flags); /* * This flag is set nowhere else and is safe to @@ -1283,7 +1349,7 @@ __ham_dup_return (dbc, val, flags) F_CLR(hcp, H_CONTINUE); hcp->dup_off = off; } else { - hk = H_PAIRDATA(hcp->page, hcp->indx); + hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); if (((HKEYDATA *)hk)->type == H_OFFPAGE) { memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); @@ -1298,7 +1364,7 @@ __ham_dup_return (dbc, val, flags) * routines may only look at data and size. */ tmp_val.data = HKEYDATA_DATA(hk); - tmp_val.size = LEN_HDATA(hcp->page, + tmp_val.size = LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); cmp = dbp->dup_compare == NULL ? __bam_defcmp(dbp, &tmp_val, val) : @@ -1311,6 +1377,18 @@ __ham_dup_return (dbc, val, flags) } /* + * If we're doing a bulk get, we don't want to actually return + * the data: __ham_bulk will take care of cracking out the + * duplicates appropriately. + * + * The rest of this function calculates partial offsets and + * handles the actual __db_ret, so just return if + * DB_MULTIPLE(_KEY) is set. + */ + if (F_ISSET(dbc, DBC_MULTIPLE | DBC_MULTIPLE_KEY)) + return (0); + + /* * Now, everything is initialized, grab a duplicate if * necessary. */ @@ -1351,8 +1429,8 @@ __ham_dup_return (dbc, val, flags) * Finally, if we had a duplicate, pp, ndx, and myval should be * set appropriately. */ - if ((ret = __db_ret(dbp, pp, ndx, myval, &dbc->rdata.data, - &dbc->rdata.ulen)) != 0) + if ((ret = __db_ret(dbp, pp, ndx, myval, &dbc->rdata->data, + &dbc->rdata->ulen)) != 0) return (ret); /* @@ -1374,6 +1452,7 @@ __ham_overwrite(dbc, nval, flags) u_int32_t flags; { DB *dbp; + DB_ENV *dbenv; HASH_CURSOR *hcp; DBT *myval, tmp_val, tmp_val2; void *newrec; @@ -1383,6 +1462,7 @@ __ham_overwrite(dbc, nval, flags) int ret; dbp = dbc->dbp; + dbenv = dbp->dbenv; hcp = (HASH_CURSOR *)dbc->internal; if (F_ISSET(hcp, H_ISDUP)) { /* @@ -1399,7 +1479,7 @@ __ham_overwrite(dbc, nval, flags) */ memset(&tmp_val, 0, sizeof(tmp_val)); if ((ret = - __ham_dup_return (dbc, &tmp_val, DB_CURRENT)) != 0) + __ham_dup_return(dbc, &tmp_val, DB_CURRENT)) != 0) return (ret); /* Figure out new size. */ @@ -1435,7 +1515,7 @@ __ham_overwrite(dbc, nval, flags) } if ((ret = __os_malloc(dbp->dbenv, - DUP_SIZE(newsize), NULL, &newrec)) != 0) + DUP_SIZE(newsize), &newrec)) != 0) return (ret); memset(&tmp_val2, 0, sizeof(tmp_val2)); F_SET(&tmp_val2, DB_DBT_PARTIAL); @@ -1483,8 +1563,7 @@ __ham_overwrite(dbc, nval, flags) tmp_val2.size = newsize; if (dbp->dup_compare( dbp, &tmp_val, &tmp_val2) != 0) { - (void)__os_free(newrec, - DUP_SIZE(newsize)); + (void)__os_free(dbenv, newrec); return (__db_duperr(dbp, flags)); } } @@ -1495,7 +1574,7 @@ __ham_overwrite(dbc, nval, flags) tmp_val2.dlen = DUP_SIZE(hcp->dup_len); ret = __ham_replpair(dbc, &tmp_val2, 0); - (void)__os_free(newrec, DUP_SIZE(newsize)); + (void)__os_free(dbenv, newrec); /* Update cursor */ if (ret != 0) @@ -1520,7 +1599,7 @@ __ham_overwrite(dbc, nval, flags) /* Make sure we maintain sort order. */ if (dbp->dup_compare != NULL) { tmp_val2.data = - HKEYDATA_DATA(H_PAIRDATA(hcp->page, + HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)) + hcp->dup_off + sizeof(db_indx_t); tmp_val2.size = hcp->dup_len; @@ -1529,8 +1608,8 @@ __ham_overwrite(dbc, nval, flags) } /* Overwriting a complete duplicate. */ if ((ret = - __ham_make_dup(dbp->dbenv, nval, - &tmp_val, &dbc->rdata.data, &dbc->rdata.ulen)) != 0) + __ham_make_dup(dbp->dbenv, nval, &tmp_val, + &dbc->my_rdata.data, &dbc->my_rdata.ulen)) != 0) return (ret); /* Now fix what we are replacing. */ tmp_val.doff = hcp->dup_off; @@ -1541,7 +1620,7 @@ __ham_overwrite(dbc, nval, flags) hcp->dup_tlen += (nval->size - hcp->dup_len); else hcp->dup_tlen -= (hcp->dup_len - nval->size); - hcp->dup_len = DUP_SIZE(nval->size); + hcp->dup_len = (db_indx_t)DUP_SIZE(nval->size); } myval = &tmp_val; } else if (!F_ISSET(nval, DB_DBT_PARTIAL)) { @@ -1549,12 +1628,12 @@ __ham_overwrite(dbc, nval, flags) memcpy(&tmp_val, nval, sizeof(*nval)); F_SET(&tmp_val, DB_DBT_PARTIAL); tmp_val.doff = 0; - hk = H_PAIRDATA(hcp->page, hcp->indx); + hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); if (HPAGE_PTYPE(hk) == H_OFFPAGE) memcpy(&tmp_val.dlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); else - tmp_val.dlen = LEN_HDATA(hcp->page, + tmp_val.dlen = LEN_HDATA(dbp, hcp->page, hcp->hdr->dbmeta.pagesize, hcp->indx); myval = &tmp_val; } else @@ -1601,7 +1680,7 @@ __ham_lookup(dbc, key, sought, mode, pgnop) hcp->bucket = __ham_call_hash(dbc, (u_int8_t *)key->data, key->size); hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - while (1) { + for (;;) { *pgnop = PGNO_INVALID; if ((ret = __ham_item_next(dbc, mode, pgnop)) != 0) return (ret); @@ -1609,7 +1688,7 @@ __ham_lookup(dbc, key, sought, mode, pgnop) if (F_ISSET(hcp, H_NOMORE)) break; - hk = H_PAIRKEY(hcp->page, hcp->indx); + hk = H_PAIRKEY(dbp, hcp->page, hcp->indx); switch (HPAGE_PTYPE(hk)) { case H_OFFPAGE: memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); @@ -1625,12 +1704,12 @@ __ham_lookup(dbc, key, sought, mode, pgnop) break; case H_KEYDATA: if (key->size == - LEN_HKEY(hcp->page, dbp->pgsize, hcp->indx) && + LEN_HKEY(dbp, hcp->page, dbp->pgsize, hcp->indx) && memcmp(key->data, HKEYDATA_DATA(hk), key->size) == 0) { /* Found the key, check for data type. */ found_key: F_SET(hcp, H_OK); - dk = H_PAIRDATA(hcp->page, hcp->indx); + dk = H_PAIRDATA(dbp, hcp->page, hcp->indx); if (HPAGE_PTYPE(dk) == H_OFFDUP) memcpy(pgnop, HOFFDUP_PGNO(dk), sizeof(db_pgno_t)); @@ -1643,7 +1722,7 @@ found_key: F_SET(hcp, H_OK); * These are errors because keys are never * duplicated, only data items are. */ - return (__db_pgfmt(dbp, PGNO(hcp->page))); + return (__db_pgfmt(dbp->dbenv, PGNO(hcp->page))); } } @@ -1677,7 +1756,7 @@ __ham_init_dbt(dbenv, dbt, size, bufp, sizep) memset(dbt, 0, sizeof(*dbt)); if (*sizep < size) { - if ((ret = __os_realloc(dbenv, size, NULL, bufp)) != 0) { + if ((ret = __os_realloc(dbenv, size, bufp)) != 0) { *sizep = 0; return (ret); } @@ -1732,8 +1811,8 @@ __ham_c_update(dbc, len, add, is_dup) MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp); /* - * Calcuate the order of this deleted record. - * This will be one grater than any cursor that is pointing + * Calculate the order of this deleted record. + * This will be one greater than any cursor that is pointing * at this record and already marked as deleted. */ order = 0; @@ -1749,11 +1828,11 @@ __ham_c_update(dbc, len, add, is_dup) continue; lcp = (HASH_CURSOR *)cp->internal; if (F_ISSET(lcp, H_DELETED) && - hcp->pgno == lcp->pgno && - hcp->indx == lcp->indx && - order <= lcp->order && - (!is_dup || hcp->dup_off == lcp->dup_off)) - order = lcp->order +1; + hcp->pgno == lcp->pgno && + hcp->indx == lcp->indx && + order <= lcp->order && + (!is_dup || hcp->dup_off == lcp->dup_off)) + order = lcp->order + 1; } MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); } @@ -1788,8 +1867,8 @@ __ham_c_update(dbc, len, add, is_dup) * We are "undeleting" so unmark all * cursors with the same order. */ - if (lcp->indx == hcp->indx - && F_ISSET(lcp, H_DELETED)) { + if (lcp->indx == hcp->indx && + F_ISSET(lcp, H_DELETED)) { if (lcp->order == hcp->order) F_CLR(lcp, H_DELETED); else if (lcp->order > @@ -1815,12 +1894,13 @@ __ham_c_update(dbc, len, add, is_dup) } else { if (lcp->indx > hcp->indx) { lcp->indx -= 2; - if (lcp->indx == hcp->indx - && F_ISSET(lcp, H_DELETED)) + if (lcp->indx == hcp->indx && + F_ISSET(lcp, H_DELETED)) lcp->order += order; - } else if (lcp->indx == hcp->indx - && !F_ISSET(lcp, H_DELETED)) { + } else if (lcp->indx == hcp->indx && + !F_ISSET(lcp, H_DELETED)) { F_SET(lcp, H_DELETED); + F_CLR(lcp, H_ISDUP); lcp->order = order; } } @@ -1833,10 +1913,10 @@ __ham_c_update(dbc, len, add, is_dup) */ if (add) { lcp->dup_tlen += len; - if (lcp->dup_off == hcp->dup_off - && F_ISSET(hcp, H_DELETED) - && F_ISSET(lcp, H_DELETED)) { - /* Abort of a delete. */ + if (lcp->dup_off == hcp->dup_off && + F_ISSET(hcp, H_DELETED) && + F_ISSET(lcp, H_DELETED)) { + /* Abort of a delete. */ if (lcp->order == hcp->order) F_CLR(lcp, H_DELETED); else if (lcp->order > @@ -1851,8 +1931,9 @@ __ham_c_update(dbc, len, add, is_dup) lcp->dup_tlen -= len; if (lcp->dup_off > hcp->dup_off) { lcp->dup_off -= len; - if (lcp->dup_off == hcp->dup_off - && F_ISSET(lcp, H_DELETED)) + if (lcp->dup_off == + hcp->dup_off && + F_ISSET(lcp, H_DELETED)) lcp->order += order; } else if (lcp->dup_off == hcp->dup_off && @@ -1867,10 +1948,9 @@ __ham_c_update(dbc, len, add, is_dup) } MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); - if (found != 0 && DB_LOGGING(dbc)) { - if ((ret = __ham_curadj_log(dbenv, - my_txn, &lsn, 0, dbp->log_fileid, hcp->pgno, - hcp->indx, len, hcp->dup_off, add, is_dup, order)) != 0) + if (found != 0 && DBC_LOGGING(dbc)) { + if ((ret = __ham_curadj_log(dbp, my_txn, &lsn, 0, hcp->pgno, + hcp->indx, len, hcp->dup_off, add, is_dup, order)) != 0) return (ret); } @@ -1885,13 +1965,12 @@ __ham_c_update(dbc, len, add, is_dup) * cursors on a split. The latter is so we can update cursors when we * move items off page. * - * PUBLIC: int __ham_get_clist __P((DB *, - * PUBLIC: db_pgno_t, u_int32_t, DBC ***)); + * PUBLIC: int __ham_get_clist __P((DB *, db_pgno_t, u_int32_t, DBC ***)); */ int -__ham_get_clist(dbp, bucket, indx, listp) +__ham_get_clist(dbp, pgno, indx, listp) DB *dbp; - db_pgno_t bucket; + db_pgno_t pgno; u_int32_t indx; DBC ***listp; { @@ -1915,18 +1994,20 @@ __ham_get_clist(dbp, bucket, indx, listp) MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL; cp = TAILQ_NEXT(cp, links)) - if (cp->dbtype == DB_HASH && - ((indx == NDX_INVALID && - ((HASH_CURSOR *)(cp->internal))->bucket - == bucket) || (indx != NDX_INVALID && - cp->internal->pgno == bucket && - cp->internal->indx == indx))) { + /* + * We match if cp->pgno matches the specified + * pgno, and if either the cp->indx matches + * or we weren't given an index. + */ + if (cp->internal->pgno == pgno && + (indx == NDX_INVALID || + cp->internal->indx == indx)) { if (nused >= nalloc) { nalloc += 10; if ((ret = __os_realloc(dbp->dbenv, nalloc * sizeof(HASH_CURSOR *), - NULL, listp)) != 0) - return (ret); + listp)) != 0) + goto err; } (*listp)[nused++] = cp; } @@ -1939,74 +2020,25 @@ __ham_get_clist(dbp, bucket, indx, listp) if (nused >= nalloc) { nalloc++; if ((ret = __os_realloc(dbp->dbenv, - nalloc * sizeof(HASH_CURSOR *), NULL, listp)) != 0) + nalloc * sizeof(HASH_CURSOR *), listp)) != 0) return (ret); } (*listp)[nused] = NULL; } return (0); -} - -static int -__ham_del_dups(orig_dbc, key) - DBC *orig_dbc; - DBT *key; -{ - DBC *dbc; - DBT data, lkey; - int ret, t_ret; - - /* Allocate a cursor. */ - if ((ret = orig_dbc->c_dup(orig_dbc, &dbc, 0)) != 0) - return (ret); - - /* - * Walk a cursor through the key/data pairs, deleting as we go. Set - * the DB_DBT_USERMEM flag, as this might be a threaded application - * and the flags checking will catch us. We don't actually want the - * keys or data, so request a partial of length 0. - */ - memset(&lkey, 0, sizeof(lkey)); - F_SET(&lkey, DB_DBT_USERMEM | DB_DBT_PARTIAL); - memset(&data, 0, sizeof(data)); - F_SET(&data, DB_DBT_USERMEM | DB_DBT_PARTIAL); - - /* Walk through the set of key/data pairs, deleting as we go. */ - if ((ret = dbc->c_get(dbc, key, &data, DB_SET)) != 0) { - if (ret == DB_NOTFOUND) - ret = 0; - goto err; - } - - for (;;) { - if ((ret = dbc->c_del(dbc, 0)) != 0) - goto err; - if ((ret = dbc->c_get(dbc, &lkey, &data, DB_NEXT_DUP)) != 0) { - if (ret == DB_NOTFOUND) { - ret = 0; - break; - } - goto err; - } - } - -err: /* - * Discard the cursor. This will cause the underlying off-page dup - * tree to go away as well as the actual entry on the page. - */ - if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; - +err: + MUTEX_THREAD_UNLOCK(dbp->dbenv, dbp->mutexp); + MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); return (ret); - } static int __ham_c_writelock(dbc) DBC *dbc; { - HASH_CURSOR *hcp; + DB_ENV *dbenv; DB_LOCK tmp_lock; + HASH_CURSOR *hcp; int ret; /* @@ -2017,79 +2049,13 @@ __ham_c_writelock(dbc) return (0); hcp = (HASH_CURSOR *)dbc->internal; - if ((hcp->lock.off == LOCK_INVALID || hcp->lock_mode == DB_LOCK_READ)) { + if ((!LOCK_ISSET(hcp->lock) || hcp->lock_mode == DB_LOCK_READ)) { tmp_lock = hcp->lock; if ((ret = __ham_lock_bucket(dbc, DB_LOCK_WRITE)) != 0) return (ret); - if (tmp_lock.off != LOCK_INVALID && - (ret = lock_put(dbc->dbp->dbenv, &tmp_lock)) != 0) - return (ret); - } - return (0); -} - -/* - * __ham_c_chgpg -- - * - * Adjust the cursors after moving an item from one page to another. - * If the old_index is NDX_INVALID, that means that we copied the - * page wholesale and we're leaving indices intact and just changing - * the page number. - * - * PUBLIC: int __ham_c_chgpg - * PUBLIC: __P((DBC *, db_pgno_t, u_int32_t, db_pgno_t, u_int32_t)); - */ -int -__ham_c_chgpg(dbc, old_pgno, old_index, new_pgno, new_index) - DBC *dbc; - db_pgno_t old_pgno, new_pgno; - u_int32_t old_index, new_index; -{ - DB *dbp, *ldbp; - DB_ENV *dbenv; - DB_LSN lsn; - DB_TXN *my_txn; - DBC *cp; - HASH_CURSOR *hcp; - int found, ret; - - dbp = dbc->dbp; - dbenv = dbp->dbenv; - - my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL; - found = 0; - - MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp); - for (ldbp = __dblist_get(dbenv, dbp->adj_fileid); - ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; - ldbp = LIST_NEXT(ldbp, dblistlinks)) { - MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); - for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL; - cp = TAILQ_NEXT(cp, links)) { - if (cp == dbc || cp->dbtype != DB_HASH) - continue; - - hcp = (HASH_CURSOR *)cp->internal; - if (hcp->pgno == old_pgno) { - if (old_index == NDX_INVALID) { - hcp->pgno = new_pgno; - } else if (hcp->indx == old_index) { - hcp->pgno = new_pgno; - hcp->indx = new_index; - } else - continue; - if (my_txn != NULL && cp->txn != my_txn) - found = 1; - } - } - MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); - } - MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); - - if (found != 0 && DB_LOGGING(dbc)) { - if ((ret = __ham_chgpg_log(dbenv, - my_txn, &lsn, 0, dbp->log_fileid, DB_HAM_CHGPG, - old_pgno, new_pgno, old_index, new_index)) != 0) + dbenv = dbc->dbp->dbenv; + if (LOCK_ISSET(tmp_lock) && + (ret = dbenv->lock_put(dbenv, &tmp_lock)) != 0) return (ret); } return (0); diff --git a/bdb/hash/hash.src b/bdb/hash/hash.src index e6ecd11c907..b4b633c56e6 100644 --- a/bdb/hash/hash.src +++ b/bdb/hash/hash.src @@ -1,8 +1,10 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. + * + * $Id: hash.src,v 10.38 2002/04/17 19:03:10 krinsky Exp $ */ /* * Copyright (c) 1995, 1996 @@ -38,44 +40,10 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $Id: hash.src,v 10.24 2000/12/12 17:41:48 bostic Exp $ - */ - -/* - * This is the source file used to create the logging functions for the - * hash package. Each access method (or set of routines wishing to register - * record types with the transaction system) should have a file like this. - * Each type of log record and its parameters is defined. The basic - * format of a record definition is: - * - * BEGIN <RECORD_TYPE> - * ARG|STRING|POINTER <variable name> <variable type> <printf format> - * ... - * END - * ARG the argument is a simple parameter of the type * specified. - * DBT the argument is a DBT (db.h) containing a length and pointer. - * PTR the argument is a pointer to the data type specified; the entire - * type should be logged. - * - * There are a set of shell scripts of the form xxx.sh that generate c - * code and or h files to process these. (This is probably better done - * in a single PERL script, but for now, this works.) - * - * The DB recovery system requires the following three fields appear in - * every record, and will assign them to the per-record-type structures - * as well as making them the first parameters to the appropriate logging - * call. - * rectype: record-type, identifies the structure and log/read call - * txnid: transaction id, a DBT in this implementation - * prev: the last LSN for this transaction */ -/* - * Use the argument of PREFIX as the prefix for all record types, - * routines, id numbers, etc. - */ -PREFIX ham +PREFIX __ham +DBPRIVATE INCLUDE #include "db_config.h" INCLUDE @@ -83,16 +51,18 @@ INCLUDE #ifndef NO_SYSTEM_INCLUDES INCLUDE #include <sys/types.h> INCLUDE INCLUDE #include <ctype.h> -INCLUDE #include <errno.h> INCLUDE #include <string.h> INCLUDE #endif INCLUDE INCLUDE #include "db_int.h" -INCLUDE #include "db_page.h" -INCLUDE #include "db_dispatch.h" -INCLUDE #include "db_am.h" -INCLUDE #include "hash.h" -INCLUDE #include "txn.h" +INCLUDE #include "dbinc/crypto.h" +INCLUDE #include "dbinc/db_page.h" +INCLUDE #include "dbinc/db_dispatch.h" +INCLUDE #include "dbinc/db_am.h" +INCLUDE #include "dbinc/hash.h" +INCLUDE #include "dbinc/rep.h" +INCLUDE #include "dbinc/log.h" +INCLUDE #include "dbinc/txn.h" INCLUDE /* @@ -109,8 +79,8 @@ INCLUDE */ BEGIN insdel 21 ARG opcode u_int32_t lu -ARG fileid int32_t ld -ARG pgno db_pgno_t lu +DB fileid int32_t ld +WRLOCK pgno db_pgno_t lu ARG ndx u_int32_t lu POINTER pagelsn DB_LSN * lu DBT key DBT s @@ -129,46 +99,26 @@ END */ BEGIN newpage 22 ARG opcode u_int32_t lu -ARG fileid int32_t ld -ARG prev_pgno db_pgno_t lu +DB fileid int32_t ld +WRLOCKNZ prev_pgno db_pgno_t lu POINTER prevlsn DB_LSN * lu -ARG new_pgno db_pgno_t lu +WRLOCKNZ new_pgno db_pgno_t lu POINTER pagelsn DB_LSN * lu -ARG next_pgno db_pgno_t lu +WRLOCKNZ next_pgno db_pgno_t lu POINTER nextlsn DB_LSN * lu END /* - * DEPRECATED in 3.0. - * Superceded by metagroup which allocates a group of new pages. - * - * Splitting requires two types of log messages. The first logs the - * meta-data of the split. - * - * For the meta-data split - * bucket: max_bucket in table before split - * ovflpoint: overflow point before split. - * spares: spares[ovflpoint] before split. - */ -DEPRECATED splitmeta 23 -ARG fileid int32_t ld -ARG bucket u_int32_t lu -ARG ovflpoint u_int32_t lu -ARG spares u_int32_t lu -POINTER metalsn DB_LSN * lu -END - -/* * Splitting requires two types of log messages. The second logs the * data on the original page. To redo the split, we have to visit the * new page (pages) and add the items back on the page if they are not * yet there. */ BEGIN splitdata 24 -ARG fileid int32_t ld +DB fileid int32_t ld ARG opcode u_int32_t lu -ARG pgno db_pgno_t lu -DBT pageimage DBT s +WRLOCK pgno db_pgno_t lu +PGDBT pageimage DBT s POINTER pagelsn DB_LSN * lu END @@ -185,8 +135,8 @@ END * makedup - this was a replacement that made an item a duplicate. */ BEGIN replace 25 -ARG fileid int32_t ld -ARG pgno db_pgno_t lu +DB fileid int32_t ld +WRLOCK pgno db_pgno_t lu ARG ndx u_int32_t lu POINTER pagelsn DB_LSN * lu ARG off int32_t ld @@ -196,52 +146,6 @@ ARG makedup u_int32_t lu END /* - * DEPRECATED in 3.0. - * Hash now uses the btree allocation and deletion page routines. - * - * HASH-newpgno: is used to record getting/deleting a new page number. - * This doesn't require much data modification, just modifying the - * meta-data. - * pgno is the page being allocated/freed. - * free_pgno is the next_pgno on the free list. - * old_type was the type of a page being deallocated. - * old_pgno was the next page number before the deallocation. - */ -DEPRECATED newpgno 26 -ARG opcode u_int32_t lu -ARG fileid int32_t ld -ARG pgno db_pgno_t lu -ARG free_pgno db_pgno_t lu -ARG old_type u_int32_t lu -ARG old_pgno db_pgno_t lu -ARG new_type u_int32_t lu -POINTER pagelsn DB_LSN * lu -POINTER metalsn DB_LSN * lu -END - -/* - * DEPRECATED in 3.0. - * Since we now pre-allocate the contiguous chunk of pages for a doubling, - * there is no big benefit to pre-allocating a few extra pages. It used - * to be that the file was only physically as large as the current bucket, - * so if you were on a doubling of 16K, but were only on the first bucket - * of that 16K, the file was much shorter than it would be at the end of - * the doubling, so we didn't want to force overflow pages at the end of the - * 16K pages. Since we now must allocate the 16K pages (because of sub - * databases), it's not a big deal to tack extra pages on at the end. - * - * ovfl: initialize a set of overflow pages. - */ -DEPRECATED ovfl 27 -ARG fileid int32_t ld -ARG start_pgno db_pgno_t lu -ARG npages u_int32_t lu -ARG free_pgno db_pgno_t lu -ARG ovflpoint u_int32_t lu -POINTER metalsn DB_LSN * lu -END - -/* * Used when we empty the first page in a bucket and there are pages after * it. The page after it gets copied into the bucket page (since bucket * pages have to be in fixed locations). @@ -252,33 +156,46 @@ END * nnextlsn: the LSN of nnext_pgno. */ BEGIN copypage 28 -ARG fileid int32_t ld -ARG pgno db_pgno_t lu +DB fileid int32_t ld +WRLOCK pgno db_pgno_t lu POINTER pagelsn DB_LSN * lu -ARG next_pgno db_pgno_t lu +WRLOCK next_pgno db_pgno_t lu POINTER nextlsn DB_LSN * lu -ARG nnext_pgno db_pgno_t lu +WRLOCKNZ nnext_pgno db_pgno_t lu POINTER nnextlsn DB_LSN * lu -DBT page DBT s +PGDBT page DBT s END /* - * This replaces the old splitmeta operation. It behaves largely the same - * way, but it has enough information so that we can record a group allocation - * which we do now because of sub databases. The number of pages allocated is - * always bucket + 1 pgno is the page number of the first newly allocated - * bucket. + * This record logs the meta-data aspects of a split operation. It has enough + * information so that we can record both an individual page allocation as well + * as a group allocation which we do because in sub databases, the pages in + * a hash doubling, must be contiguous. If we do a group allocation, the + * number of pages allocated is bucket + 1, pgno is the page number of the + * first newly allocated bucket. + * * bucket: Old maximum bucket number. - * pgno: Page allocated to bucket + 1 (first newly allocated page) + * mmpgno: Master meta-data page number (0 if same as mpgno). + * mmetalsn: Lsn of the master meta-data page. + * mpgno: Meta-data page number. * metalsn: Lsn of the meta-data page. - * pagelsn: Lsn of the maximum page allocated. + * pgno: Page allocated to bucket + 1 (first newly allocated page) + * pagelsn: Lsn of either the first page allocated (if newalloc == 0) or + * the last page allocated (if newalloc == 1). + * newalloc: 1 indicates that this record did the actual allocation; + * 0 indicates that the pages were already allocated from a + * previous (failed) allocation. */ BEGIN metagroup 29 -ARG fileid int32_t ld +DB fileid int32_t ld ARG bucket u_int32_t lu -ARG pgno db_pgno_t lu +WRLOCK mmpgno db_pgno_t lu +POINTER mmetalsn DB_LSN * lu +WRLOCKNZ mpgno db_pgno_t lu POINTER metalsn DB_LSN * lu +WRLOCK pgno db_pgno_t lu POINTER pagelsn DB_LSN * lu +ARG newalloc u_int32_t lu END /* @@ -293,28 +210,10 @@ END * start_pgno: starting page number * num: number of allocated pages */ -DEPRECATED groupalloc1 30 -ARG fileid int32_t ld -ARG pgno db_pgno_t lu -POINTER metalsn DB_LSN * lu -POINTER mmetalsn DB_LSN * lu -ARG start_pgno db_pgno_t lu -ARG num u_int32_t lu -END - -DEPRECATED groupalloc2 31 -ARG fileid int32_t ld -POINTER meta_lsn DB_LSN * lu -POINTER alloc_lsn DB_LSN * lu -ARG start_pgno db_pgno_t lu -ARG num u_int32_t lu -ARG free db_pgno_t lu -END - BEGIN groupalloc 32 -ARG fileid int32_t ld +DB fileid int32_t ld POINTER meta_lsn DB_LSN * lu -ARG start_pgno db_pgno_t lu +WRLOCK start_pgno db_pgno_t lu ARG num u_int32_t lu ARG free db_pgno_t lu END @@ -329,7 +228,7 @@ END * dup_off - if a dup its offset * add - 1 if add 0 if delete * is_dup - 1 if dup 0 otherwise. - * order - order assinged to this deleted record or dup. + * order - order assigned to this deleted record or dup. * * chgpg - rmoved a page, move the records to a new page * mode - CHGPG page was deleted or records move to new page. @@ -338,9 +237,15 @@ END * old_pgno, new_pgno - old and new page numbers. * old_index, new_index - old and new index numbers, NDX_INVALID if * it effects all records on the page. + * For three opcodes new in 3.3 (DB_HAM_DELFIRSTPG, DELMIDPG, + * and DELLASTPG), we overload old_indx and new_indx to avoid + * needing a new log record type: old_indx stores the only + * indx of interest to these records, and new_indx stores the + * order that's assigned to the lowest deleted record we're + * moving. */ BEGIN curadj 33 -ARG fileid int32_t ld +DB fileid int32_t ld ARG pgno db_pgno_t lu ARG indx u_int32_t lu ARG len u_int32_t lu @@ -351,7 +256,7 @@ ARG order u_int32_t lu END BEGIN chgpg 34 -ARG fileid int32_t ld +DB fileid int32_t ld ARG mode db_ham_mode ld ARG old_pgno db_pgno_t lu ARG new_pgno db_pgno_t lu diff --git a/bdb/hash/hash_conv.c b/bdb/hash/hash_conv.c index 30d17a6164d..a93e56a2ee4 100644 --- a/bdb/hash/hash_conv.c +++ b/bdb/hash/hash_conv.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: hash_conv.c,v 11.5 2000/03/31 00:30:32 ubell Exp $"; +static const char revid[] = "$Id: hash_conv.c,v 11.13 2002/08/06 05:34:35 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -15,20 +15,21 @@ static const char revid[] = "$Id: hash_conv.c,v 11.5 2000/03/31 00:30:32 ubell E #endif #include "db_int.h" -#include "db_page.h" -#include "db_swap.h" -#include "hash.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/hash.h" /* * __ham_pgin -- * Convert host-specific page layout from the host-independent format * stored on disk. * - * PUBLIC: int __ham_pgin __P((DB_ENV *, db_pgno_t, void *, DBT *)); + * PUBLIC: int __ham_pgin __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *)); */ int -__ham_pgin(dbenv, pg, pp, cookie) +__ham_pgin(dbenv, dummydbp, pg, pp, cookie) DB_ENV *dbenv; + DB *dummydbp; db_pgno_t pg; void *pp; DBT *cookie; @@ -45,16 +46,16 @@ __ham_pgin(dbenv, pg, pp, cookie) * initialize the rest of the page and return. */ if (h->type != P_HASHMETA && h->pgno == PGNO_INVALID) { - P_INIT(pp, pginfo->db_pagesize, + P_INIT(pp, (db_indx_t)pginfo->db_pagesize, pg, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); return (0); } - if (!pginfo->needswap) + if (!F_ISSET(pginfo, DB_AM_SWAP)) return (0); return (h->type == P_HASHMETA ? __ham_mswap(pp) : - __db_byteswap(dbenv, pg, pp, pginfo->db_pagesize, 1)); + __db_byteswap(dbenv, dummydbp, pg, pp, pginfo->db_pagesize, 1)); } /* @@ -62,11 +63,12 @@ __ham_pgin(dbenv, pg, pp, cookie) * Convert host-specific page layout to the host-independent format * stored on disk. * - * PUBLIC: int __ham_pgout __P((DB_ENV *, db_pgno_t, void *, DBT *)); + * PUBLIC: int __ham_pgout __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *)); */ int -__ham_pgout(dbenv, pg, pp, cookie) +__ham_pgout(dbenv, dummydbp, pg, pp, cookie) DB_ENV *dbenv; + DB *dummydbp; db_pgno_t pg; void *pp; DBT *cookie; @@ -75,12 +77,12 @@ __ham_pgout(dbenv, pg, pp, cookie) PAGE *h; pginfo = (DB_PGINFO *)cookie->data; - if (!pginfo->needswap) + if (!F_ISSET(pginfo, DB_AM_SWAP)) return (0); h = pp; return (h->type == P_HASHMETA ? __ham_mswap(pp) : - __db_byteswap(dbenv, pg, pp, pginfo->db_pagesize, 0)); + __db_byteswap(dbenv, dummydbp, pg, pp, pginfo->db_pagesize, 0)); } /* @@ -108,5 +110,7 @@ __ham_mswap(pg) SWAP32(p); /* h_charkey */ for (i = 0; i < NCACHED; ++i) SWAP32(p); /* spares */ + p += 59 * sizeof(u_int32_t); /* unusued */ + SWAP32(p); /* crypto_magic */ return (0); } diff --git a/bdb/hash/hash_dup.c b/bdb/hash/hash_dup.c index f5fbf4f472f..ec70e519d54 100644 --- a/bdb/hash/hash_dup.c +++ b/bdb/hash/hash_dup.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ /* @@ -38,20 +38,14 @@ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: hash_dup.c,v 11.49 2000/12/21 21:54:35 margo Exp $"; +static const char revid[] = "$Id: hash_dup.c,v 11.76 2002/08/06 05:34:40 bostic Exp $"; #endif /* not lint */ /* * PACKAGE: hashing * * DESCRIPTION: - * Manipulation of duplicates for the hash package. - * - * ROUTINES: - * - * External - * __add_dup - * Internal + * Manipulation of duplicates for the hash package. */ #ifndef NO_SYSTEM_INCLUDES @@ -61,13 +55,15 @@ static const char revid[] = "$Id: hash_dup.c,v 11.49 2000/12/21 21:54:35 margo E #endif #include "db_int.h" -#include "db_page.h" -#include "hash.h" -#include "btree.h" -#include "txn.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" +#include "dbinc/btree.h" +static int __ham_c_chgpg __P((DBC *, + db_pgno_t, u_int32_t, db_pgno_t, u_int32_t)); static int __ham_check_move __P((DBC *, u_int32_t)); static int __ham_dcursor __P((DBC *, db_pgno_t, u_int32_t)); +static int __ham_move_offpage __P((DBC *, PAGE *, u_int32_t, db_pgno_t)); /* * Called from hash_access to add a duplicate key. nval is the new @@ -92,13 +88,15 @@ __ham_add_dup(dbc, nval, flags, pgnop) db_pgno_t *pgnop; { DB *dbp; - HASH_CURSOR *hcp; DBT pval, tmp_val; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; u_int32_t add_bytes, new_size; int cmp, ret; u_int8_t *hk; dbp = dbc->dbp; + mpf = dbp->mpf; hcp = (HASH_CURSOR *)dbc->internal; DB_ASSERT(flags != DB_CURRENT); @@ -117,12 +115,12 @@ __ham_add_dup(dbc, nval, flags, pgnop) * hcp->dndx is the first free ndx or the index of the * current pointer into the duplicate set. */ - hk = H_PAIRDATA(hcp->page, hcp->indx); + hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); /* Add the len bytes to the current singleton. */ if (HPAGE_PTYPE(hk) != H_DUPLICATE) add_bytes += DUP_SIZE(0); new_size = - LEN_HKEYDATA(hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx)) + + LEN_HKEYDATA(dbp, hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx)) + add_bytes; /* @@ -132,7 +130,7 @@ __ham_add_dup(dbc, nval, flags, pgnop) */ if (HPAGE_PTYPE(hk) != H_OFFDUP && (HPAGE_PTYPE(hk) == H_OFFPAGE || ISBIG(hcp, new_size) || - add_bytes > P_FREESPACE(hcp->page))) { + add_bytes > P_FREESPACE(dbp, hcp->page))) { if ((ret = __ham_dup_convert(dbc)) != 0) return (ret); @@ -145,14 +143,14 @@ __ham_add_dup(dbc, nval, flags, pgnop) if (HPAGE_PTYPE(hk) != H_DUPLICATE) { pval.flags = 0; pval.data = HKEYDATA_DATA(hk); - pval.size = LEN_HDATA(hcp->page, dbp->pgsize, + pval.size = LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); if ((ret = __ham_make_dup(dbp->dbenv, - &pval, &tmp_val, &dbc->rdata.data, - &dbc->rdata.ulen)) != 0 || (ret = + &pval, &tmp_val, &dbc->my_rdata.data, + &dbc->my_rdata.ulen)) != 0 || (ret = __ham_replpair(dbc, &tmp_val, 1)) != 0) return (ret); - hk = H_PAIRDATA(hcp->page, hcp->indx); + hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); HPAGE_PTYPE(hk) = H_DUPLICATE; /* @@ -167,7 +165,7 @@ __ham_add_dup(dbc, nval, flags, pgnop) /* Now make the new entry a duplicate. */ if ((ret = __ham_make_dup(dbp->dbenv, nval, - &tmp_val, &dbc->rdata.data, &dbc->rdata.ulen)) != 0) + &tmp_val, &dbc->my_rdata.data, &dbc->my_rdata.ulen)) != 0) return (ret); tmp_val.dlen = 0; @@ -176,13 +174,14 @@ __ham_add_dup(dbc, nval, flags, pgnop) case DB_KEYLAST: case DB_NODUPDATA: if (dbp->dup_compare != NULL) { - __ham_dsearch(dbc, nval, &tmp_val.doff, &cmp); + __ham_dsearch(dbc, + nval, &tmp_val.doff, &cmp, flags); /* dup dups are not supported w/ sorted dups */ if (cmp == 0) return (__db_duperr(dbp, flags)); } else { - hcp->dup_tlen = LEN_HDATA(hcp->page, + hcp->dup_tlen = LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); hcp->dup_len = nval->size; F_SET(hcp, H_ISDUP); @@ -203,8 +202,7 @@ __ham_add_dup(dbc, nval, flags, pgnop) /* Add the duplicate. */ ret = __ham_replpair(dbc, &tmp_val, 0); if (ret == 0) - ret = memp_fset(dbp->mpf, hcp->page, DB_MPOOL_DIRTY); - + ret = mpf->set(mpf, hcp->page, DB_MPOOL_DIRTY); if (ret != 0) return (ret); @@ -213,12 +211,12 @@ __ham_add_dup(dbc, nval, flags, pgnop) case DB_AFTER: hcp->dup_off += DUP_SIZE(hcp->dup_len); hcp->dup_len = nval->size; - hcp->dup_tlen += DUP_SIZE(nval->size); + hcp->dup_tlen += (db_indx_t)DUP_SIZE(nval->size); break; case DB_KEYFIRST: case DB_KEYLAST: case DB_BEFORE: - hcp->dup_tlen += DUP_SIZE(nval->size); + hcp->dup_tlen += (db_indx_t)DUP_SIZE(nval->size); hcp->dup_len = nval->size; break; } @@ -230,8 +228,8 @@ __ham_add_dup(dbc, nval, flags, pgnop) * If we get here, then we're on duplicate pages; set pgnop and * return so the common code can handle it. */ - memcpy(pgnop, - HOFFDUP_PGNO(H_PAIRDATA(hcp->page, hcp->indx)), sizeof(db_pgno_t)); + memcpy(pgnop, HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)), + sizeof(db_pgno_t)); return (ret); } @@ -245,19 +243,21 @@ int __ham_dup_convert(dbc) DBC *dbc; { + BOVERFLOW bo; DB *dbp; DBC **hcs; + DBT dbt; DB_LSN lsn; - PAGE *dp; + DB_MPOOLFILE *mpf; HASH_CURSOR *hcp; - BOVERFLOW bo; - DBT dbt; HOFFPAGE ho; + PAGE *dp; db_indx_t i, len, off; int c, ret, t_ret; u_int8_t *p, *pend; dbp = dbc->dbp; + mpf = dbp->mpf; hcp = (HASH_CURSOR *)dbc->internal; /* @@ -274,24 +274,24 @@ __ham_dup_convert(dbc) */ if ((ret = __ham_get_clist(dbp, PGNO(hcp->page), (u_int32_t)hcp->indx, &hcs)) != 0) - return (ret); + goto err; /* * Now put the duplicates onto the new page. */ dbt.flags = 0; - switch (HPAGE_PTYPE(H_PAIRDATA(hcp->page, hcp->indx))) { + switch (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))) { case H_KEYDATA: /* Simple case, one key on page; move it to dup page. */ - dbt.size = LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx); - dbt.data = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx)); + dbt.size = LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); + dbt.data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); ret = __db_pitem(dbc, dp, 0, BKEYDATA_SIZE(dbt.size), NULL, &dbt); goto finish; case H_OFFPAGE: /* Simple case, one key on page; move it to dup page. */ - memcpy(&ho, - P_ENTRY(hcp->page, H_DATAINDEX(hcp->indx)), HOFFPAGE_SIZE); + memcpy(&ho, P_ENTRY(dbp, hcp->page, H_DATAINDEX(hcp->indx)), + HOFFPAGE_SIZE); UMRW_SET(bo.unused1); B_TSET(bo.type, ho.type, 0); UMRW_SET(bo.unused2); @@ -301,17 +301,15 @@ __ham_dup_convert(dbc) dbt.data = &bo; ret = __db_pitem(dbc, dp, 0, dbt.size, &dbt, NULL); - finish: if (ret == 0) { - memp_fset(dbp->mpf, dp, DB_MPOOL_DIRTY); - /* - * Update any other cursors - */ - if (hcs != NULL && DB_LOGGING(dbc) - && IS_SUBTRANSACTION(dbc->txn)) { - if ((ret = __ham_chgpg_log(dbp->dbenv, - dbc->txn, &lsn, 0, dbp->log_fileid, - DB_HAM_DUP, PGNO(hcp->page), + if ((ret = mpf->set(mpf, dp, DB_MPOOL_DIRTY)) != 0) + break; + + /* Update any other cursors. */ + if (hcs != NULL && DBC_LOGGING(dbc) && + IS_SUBTRANSACTION(dbc->txn)) { + if ((ret = __ham_chgpg_log(dbp, dbc->txn, + &lsn, 0, DB_HAM_DUP, PGNO(hcp->page), PGNO(dp), hcp->indx, 0)) != 0) break; } @@ -319,14 +317,12 @@ finish: if (ret == 0) { if ((ret = __ham_dcursor(hcs[c], PGNO(dp), 0)) != 0) break; - } break; - case H_DUPLICATE: - p = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx)); + p = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); pend = p + - LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx); + LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); /* * We need to maintain the duplicate cursor position. @@ -344,39 +340,48 @@ finish: if (ret == 0) { if ((ret = __db_pitem(dbc, dp, i, BKEYDATA_SIZE(dbt.size), NULL, &dbt)) != 0) break; - /* - * Update any other cursors - */ + + /* Update any other cursors */ + if (hcs != NULL && DBC_LOGGING(dbc) && + IS_SUBTRANSACTION(dbc->txn)) { + if ((ret = __ham_chgpg_log(dbp, dbc->txn, + &lsn, 0, DB_HAM_DUP, PGNO(hcp->page), + PGNO(dp), hcp->indx, i)) != 0) + break; + } for (c = 0; hcs != NULL && hcs[c] != NULL; c++) if (((HASH_CURSOR *)(hcs[c]->internal))->dup_off == off && (ret = __ham_dcursor(hcs[c], PGNO(dp), i)) != 0) - goto out; + goto err; off += len + 2 * sizeof(db_indx_t); } -out: break; - + break; default: - ret = __db_pgfmt(dbp, (u_long)hcp->pgno); + ret = __db_pgfmt(dbp->dbenv, (u_long)hcp->pgno); break; } - if (ret == 0) { - /* - * Now attach this to the source page in place of - * the old duplicate item. - */ - __ham_move_offpage(dbc, hcp->page, + + /* + * Now attach this to the source page in place of the old duplicate + * item. + */ + if (ret == 0) + ret = __ham_move_offpage(dbc, hcp->page, (u_int32_t)H_DATAINDEX(hcp->indx), PGNO(dp)); - ret = memp_fset(dbp->mpf, hcp->page, DB_MPOOL_DIRTY); - if ((t_ret = memp_fput(dbp->mpf, dp, DB_MPOOL_DIRTY)) != 0) - ret = t_ret; +err: if (ret == 0) + ret = mpf->set(mpf, hcp->page, DB_MPOOL_DIRTY); + + if ((t_ret = + mpf->put(mpf, dp, ret == 0 ? DB_MPOOL_DIRTY : 0)) != 0 && ret == 0) + ret = t_ret; + + if (ret == 0) hcp->dup_tlen = hcp->dup_off = hcp->dup_len = 0; - } else - (void)__db_free(dbc, dp); if (hcs != NULL) - __os_free(hcs, 0); + __os_free(dbp->dbenv, hcs); return (ret); } @@ -444,9 +449,10 @@ __ham_check_move(dbc, add_len) u_int32_t add_len; { DB *dbp; - HASH_CURSOR *hcp; DBT k, d; DB_LSN new_lsn; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; PAGE *next_pagep; db_pgno_t next_pgno; u_int32_t new_datalen, old_len, rectype; @@ -454,9 +460,10 @@ __ham_check_move(dbc, add_len) int ret; dbp = dbc->dbp; + mpf = dbp->mpf; hcp = (HASH_CURSOR *)dbc->internal; - hk = H_PAIRDATA(hcp->page, hcp->indx); + hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); /* * If the item is already off page duplicates or an offpage item, @@ -465,7 +472,7 @@ __ham_check_move(dbc, add_len) if (HPAGE_PTYPE(hk) == H_OFFDUP || HPAGE_PTYPE(hk) == H_OFFPAGE) return (0); - old_len = LEN_HITEM(hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx)); + old_len = LEN_HITEM(dbp, hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx)); new_datalen = old_len - HKEYDATA_SIZE(0) + add_len; if (HPAGE_PTYPE(hk) != H_DUPLICATE) new_datalen += DUP_SIZE(0); @@ -479,10 +486,10 @@ __ham_check_move(dbc, add_len) * If neither of these is true, then we can return. */ if (ISBIG(hcp, new_datalen) && (old_len > HOFFDUP_SIZE || - HOFFDUP_SIZE - old_len <= P_FREESPACE(hcp->page))) + HOFFDUP_SIZE - old_len <= P_FREESPACE(dbp, hcp->page))) return (0); - if (!ISBIG(hcp, new_datalen) && add_len <= P_FREESPACE(hcp->page)) + if (!ISBIG(hcp, new_datalen) && add_len <= P_FREESPACE(dbp, hcp->page)) return (0); /* @@ -494,20 +501,20 @@ __ham_check_move(dbc, add_len) new_datalen = ISBIG(hcp, new_datalen) ? HOFFDUP_SIZE : HKEYDATA_SIZE(new_datalen); - new_datalen += LEN_HITEM(hcp->page, dbp->pgsize, H_KEYINDEX(hcp->indx)); + new_datalen += LEN_HITEM(dbp, hcp->page, dbp->pgsize, H_KEYINDEX(hcp->indx)); next_pagep = NULL; for (next_pgno = NEXT_PGNO(hcp->page); next_pgno != PGNO_INVALID; next_pgno = NEXT_PGNO(next_pagep)) { if (next_pagep != NULL && - (ret = memp_fput(dbp->mpf, next_pagep, 0)) != 0) + (ret = mpf->put(mpf, next_pagep, 0)) != 0) return (ret); - if ((ret = memp_fget(dbp->mpf, + if ((ret = mpf->get(mpf, &next_pgno, DB_MPOOL_CREATE, &next_pagep)) != 0) return (ret); - if (P_FREESPACE(next_pagep) >= new_datalen) + if (P_FREESPACE(dbp, next_pagep) >= new_datalen) break; } @@ -517,58 +524,58 @@ __ham_check_move(dbc, add_len) return (ret); /* Add new page at the end of the chain. */ - if (P_FREESPACE(next_pagep) < new_datalen && (ret = + if (P_FREESPACE(dbp, next_pagep) < new_datalen && (ret = __ham_add_ovflpage(dbc, next_pagep, 1, &next_pagep)) != 0) { - (void)memp_fput(dbp->mpf, next_pagep, 0); + (void)mpf->put(mpf, next_pagep, 0); return (ret); } /* Copy the item to the new page. */ - if (DB_LOGGING(dbc)) { + if (DBC_LOGGING(dbc)) { rectype = PUTPAIR; k.flags = 0; d.flags = 0; if (HPAGE_PTYPE( - H_PAIRKEY(hcp->page, hcp->indx)) == H_OFFPAGE) { + H_PAIRKEY(dbp, hcp->page, hcp->indx)) == H_OFFPAGE) { rectype |= PAIR_KEYMASK; - k.data = H_PAIRKEY(hcp->page, hcp->indx); + k.data = H_PAIRKEY(dbp, hcp->page, hcp->indx); k.size = HOFFPAGE_SIZE; } else { k.data = - HKEYDATA_DATA(H_PAIRKEY(hcp->page, hcp->indx)); - k.size = LEN_HKEY(hcp->page, dbp->pgsize, hcp->indx); + HKEYDATA_DATA(H_PAIRKEY(dbp, hcp->page, hcp->indx)); + k.size = + LEN_HKEY(dbp, hcp->page, dbp->pgsize, hcp->indx); } if (HPAGE_PTYPE(hk) == H_OFFPAGE) { rectype |= PAIR_DATAMASK; - d.data = H_PAIRDATA(hcp->page, hcp->indx); + d.data = H_PAIRDATA(dbp, hcp->page, hcp->indx); d.size = HOFFPAGE_SIZE; } else { - if (HPAGE_PTYPE(H_PAIRDATA(hcp->page, hcp->indx)) + if (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx)) == H_DUPLICATE) rectype |= PAIR_DUPMASK; d.data = - HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx)); - d.size = LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx); + HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); + d.size = LEN_HDATA(dbp, hcp->page, + dbp->pgsize, hcp->indx); } - if ((ret = __ham_insdel_log(dbp->dbenv, - dbc->txn, &new_lsn, 0, rectype, - dbp->log_fileid, PGNO(next_pagep), + if ((ret = __ham_insdel_log(dbp, + dbc->txn, &new_lsn, 0, rectype, PGNO(next_pagep), (u_int32_t)NUM_ENT(next_pagep), &LSN(next_pagep), &k, &d)) != 0) { - (void)memp_fput(dbp->mpf, next_pagep, 0); + (void)mpf->put(mpf, next_pagep, 0); return (ret); } + } else + LSN_NOT_LOGGED(new_lsn); - /* Move lsn onto page. */ - LSN(next_pagep) = new_lsn; /* Structure assignment. */ - } + /* Move lsn onto page. */ + LSN(next_pagep) = new_lsn; /* Structure assignment. */ - __ham_copy_item(dbp->pgsize, - hcp->page, H_KEYINDEX(hcp->indx), next_pagep); - __ham_copy_item(dbp->pgsize, - hcp->page, H_DATAINDEX(hcp->indx), next_pagep); + __ham_copy_item(dbp, hcp->page, H_KEYINDEX(hcp->indx), next_pagep); + __ham_copy_item(dbp, hcp->page, H_DATAINDEX(hcp->indx), next_pagep); /* * We've just manually inserted a key and set of data onto @@ -581,7 +588,7 @@ __ham_check_move(dbc, add_len) * Note that __ham_del_pair should dirty the page we're moving * the items from, so we need only dirty the new page ourselves. */ - if ((ret = memp_fset(dbp->mpf, next_pagep, DB_MPOOL_DIRTY)) != 0) + if ((ret = mpf->set(mpf, next_pagep, DB_MPOOL_DIRTY)) != 0) goto out; /* Update all cursors that used to point to this item. */ @@ -596,12 +603,17 @@ __ham_check_move(dbc, add_len) * __ham_del_pair decremented nelem. This is incorrect; we * manually copied the element elsewhere, so the total number * of elements hasn't changed. Increment it again. + * + * !!! + * Note that we still have the metadata page pinned, and + * __ham_del_pair dirtied it, so we don't need to set the dirty + * flag again. */ if (!STD_LOCKING(dbc)) hcp->hdr->nelem++; out: - (void)memp_fput(dbp->mpf, hcp->page, DB_MPOOL_DIRTY); + (void)mpf->put(mpf, hcp->page, DB_MPOOL_DIRTY); hcp->page = next_pagep; hcp->pgno = PGNO(hcp->page); hcp->indx = NUM_ENT(hcp->page) - 2; @@ -620,9 +632,8 @@ out: * This is really just a special case of __onpage_replace; we should * probably combine them. * - * PUBLIC: void __ham_move_offpage __P((DBC *, PAGE *, u_int32_t, db_pgno_t)); */ -void +static int __ham_move_offpage(dbc, pagep, ndx, pgno) DBC *dbc; PAGE *pagep; @@ -630,48 +641,51 @@ __ham_move_offpage(dbc, pagep, ndx, pgno) db_pgno_t pgno; { DB *dbp; - HASH_CURSOR *hcp; DBT new_dbt; DBT old_dbt; HOFFDUP od; - db_indx_t i; + db_indx_t i, *inp; int32_t shrink; u_int8_t *src; + int ret; dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; od.type = H_OFFDUP; UMRW_SET(od.unused[0]); UMRW_SET(od.unused[1]); UMRW_SET(od.unused[2]); od.pgno = pgno; + ret = 0; - if (DB_LOGGING(dbc)) { + if (DBC_LOGGING(dbc)) { new_dbt.data = &od; new_dbt.size = HOFFDUP_SIZE; - old_dbt.data = P_ENTRY(pagep, ndx); - old_dbt.size = LEN_HITEM(pagep, dbp->pgsize, ndx); - (void)__ham_replace_log(dbp->dbenv, - dbc->txn, &LSN(pagep), 0, dbp->log_fileid, + old_dbt.data = P_ENTRY(dbp, pagep, ndx); + old_dbt.size = LEN_HITEM(dbp, pagep, dbp->pgsize, ndx); + if ((ret = __ham_replace_log(dbp, dbc->txn, &LSN(pagep), 0, PGNO(pagep), (u_int32_t)ndx, &LSN(pagep), -1, - &old_dbt, &new_dbt, 0); - } + &old_dbt, &new_dbt, 0)) != 0) + return (ret); + } else + LSN_NOT_LOGGED(LSN(pagep)); - shrink = LEN_HITEM(pagep, dbp->pgsize, ndx) - HOFFDUP_SIZE; + shrink = LEN_HITEM(dbp, pagep, dbp->pgsize, ndx) - HOFFDUP_SIZE; + inp = P_INP(dbp, pagep); if (shrink != 0) { /* Copy data. */ src = (u_int8_t *)(pagep) + HOFFSET(pagep); - memmove(src + shrink, src, pagep->inp[ndx] - HOFFSET(pagep)); + memmove(src + shrink, src, inp[ndx] - HOFFSET(pagep)); HOFFSET(pagep) += shrink; /* Update index table. */ for (i = ndx; i < NUM_ENT(pagep); i++) - pagep->inp[i] += shrink; + inp[i] += shrink; } /* Now copy the offdup entry onto the page. */ - memcpy(P_ENTRY(pagep, ndx), &od, HOFFDUP_SIZE); + memcpy(P_ENTRY(dbp, pagep, ndx), &od, HOFFDUP_SIZE); + return (ret); } /* @@ -679,13 +693,14 @@ __ham_move_offpage(dbc, pagep, ndx, pgno) * Locate a particular duplicate in a duplicate set. Make sure that * we exit with the cursor set appropriately. * - * PUBLIC: void __ham_dsearch __P((DBC *, DBT *, u_int32_t *, int *)); + * PUBLIC: void __ham_dsearch + * PUBLIC: __P((DBC *, DBT *, u_int32_t *, int *, u_int32_t)); */ void -__ham_dsearch(dbc, dbt, offp, cmpp) +__ham_dsearch(dbc, dbt, offp, cmpp, flags) DBC *dbc; DBT *dbt; - u_int32_t *offp; + u_int32_t *offp, flags; int *cmpp; { DB *dbp; @@ -697,25 +712,36 @@ __ham_dsearch(dbc, dbt, offp, cmpp) dbp = dbc->dbp; hcp = (HASH_CURSOR *)dbc->internal; - if (dbp->dup_compare == NULL) - func = __bam_defcmp; - else - func = dbp->dup_compare; + func = dbp->dup_compare == NULL ? __bam_defcmp : dbp->dup_compare; i = F_ISSET(hcp, H_CONTINUE) ? hcp->dup_off: 0; - data = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx)) + i; - hcp->dup_tlen = LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx); + data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)) + i; + hcp->dup_tlen = LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); while (i < hcp->dup_tlen) { memcpy(&len, data, sizeof(db_indx_t)); data += sizeof(db_indx_t); cur.data = data; cur.size = (u_int32_t)len; + + /* + * If we find an exact match, we're done. If in a sorted + * duplicate set and the item is larger than our test item, + * we're done. In the latter case, if permitting partial + * matches, it's not a failure. + */ *cmpp = func(dbp, dbt, &cur); - if (*cmpp == 0 || (*cmpp < 0 && dbp->dup_compare != NULL)) + if (*cmpp == 0) + break; + if (*cmpp < 0 && dbp->dup_compare != NULL) { + if (flags == DB_GET_BOTH_RANGE) + *cmpp = 0; break; + } + i += len + 2 * sizeof(db_indx_t); data += len + sizeof(db_indx_t); } + *offp = i; hcp->dup_off = i; hcp->dup_len = len; @@ -727,29 +753,22 @@ __ham_dsearch(dbc, dbt, offp, cmpp) * __ham_cprint -- * Display the current cursor list. * - * PUBLIC: int __ham_cprint __P((DB *)); + * PUBLIC: void __ham_cprint __P((DBC *)); */ -int -__ham_cprint(dbp) - DB *dbp; +void +__ham_cprint(dbc) + DBC *dbc; { HASH_CURSOR *cp; - DBC *dbc; - MUTEX_THREAD_LOCK(dbp->dbenv, dbp->mutexp); - for (dbc = TAILQ_FIRST(&dbp->active_queue); - dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { - cp = (HASH_CURSOR *)dbc->internal; - fprintf(stderr, "%#0lx->%#0lx: page: %lu index: %lu", - P_TO_ULONG(dbc), P_TO_ULONG(cp), (u_long)cp->pgno, - (u_long)cp->indx); - if (F_ISSET(cp, H_DELETED)) - fprintf(stderr, " (deleted)"); - fprintf(stderr, "\n"); - } - MUTEX_THREAD_UNLOCK(dbp->dbenv, dbp->mutexp); + cp = (HASH_CURSOR *)dbc->internal; - return (0); + fprintf(stderr, "%#0lx->%#0lx: page: %lu index: %lu", + P_TO_ULONG(dbc), P_TO_ULONG(cp), (u_long)cp->pgno, + (u_long)cp->indx); + if (F_ISSET(cp, H_DELETED)) + fprintf(stderr, " (deleted)"); + fprintf(stderr, "\n"); } #endif /* DEBUG */ @@ -765,17 +784,17 @@ __ham_dcursor(dbc, pgno, indx) u_int32_t indx; { DB *dbp; - DBC *dbc_nopd; HASH_CURSOR *hcp; BTREE_CURSOR *dcp; int ret; dbp = dbc->dbp; + hcp = (HASH_CURSOR *)dbc->internal; - if ((ret = __db_c_newopd(dbc, pgno, &dbc_nopd)) != 0) + if ((ret = __db_c_newopd(dbc, pgno, hcp->opd, &hcp->opd)) != 0) return (ret); - dcp = (BTREE_CURSOR *)dbc_nopd->internal; + dcp = (BTREE_CURSOR *)hcp->opd->internal; dcp->pgno = pgno; dcp->indx = indx; @@ -792,14 +811,81 @@ __ham_dcursor(dbc, pgno, indx) * Transfer the deleted flag from the top-level cursor to the * created one. */ - hcp = (HASH_CURSOR *)dbc->internal; if (F_ISSET(hcp, H_DELETED)) { F_SET(dcp, C_DELETED); F_CLR(hcp, H_DELETED); } - /* Stack the cursors and reset the initial cursor's index. */ - hcp->opd = dbc_nopd; + return (0); +} + +/* + * __ham_c_chgpg -- + * Adjust the cursors after moving an item to a new page. We only + * move cursors that are pointing at this one item and are not + * deleted; since we only touch non-deleted cursors, and since + * (by definition) no item existed at the pgno/indx we're moving the + * item to, we're guaranteed that all the cursors we affect here or + * on abort really do refer to this one item. + */ +static int +__ham_c_chgpg(dbc, old_pgno, old_index, new_pgno, new_index) + DBC *dbc; + db_pgno_t old_pgno, new_pgno; + u_int32_t old_index, new_index; +{ + DB *dbp, *ldbp; + DB_ENV *dbenv; + DB_LSN lsn; + DB_TXN *my_txn; + DBC *cp; + HASH_CURSOR *hcp; + int found, ret; + + dbp = dbc->dbp; + dbenv = dbp->dbenv; + + my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL; + found = 0; + + MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp); + for (ldbp = __dblist_get(dbenv, dbp->adj_fileid); + ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; + ldbp = LIST_NEXT(ldbp, dblistlinks)) { + MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); + for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL; + cp = TAILQ_NEXT(cp, links)) { + if (cp == dbc || cp->dbtype != DB_HASH) + continue; + + hcp = (HASH_CURSOR *)cp->internal; + /* + * If a cursor is deleted, it doesn't refer to this + * item--it just happens to have the same indx, but + * it points to a former neighbor. Don't move it. + */ + if (F_ISSET(hcp, H_DELETED)) + continue; + + if (hcp->pgno == old_pgno) { + if (hcp->indx == old_index) { + hcp->pgno = new_pgno; + hcp->indx = new_index; + } else + continue; + if (my_txn != NULL && cp->txn != my_txn) + found = 1; + } + } + MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); + } + MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); + + if (found != 0 && DBC_LOGGING(dbc)) { + if ((ret = __ham_chgpg_log(dbp, my_txn, &lsn, 0, DB_HAM_CHGPG, + old_pgno, new_pgno, old_index, new_index)) != 0) + return (ret); + } return (0); } diff --git a/bdb/hash/hash_func.c b/bdb/hash/hash_func.c index 22b4f08ee70..c6cc2ad4460 100644 --- a/bdb/hash/hash_func.c +++ b/bdb/hash/hash_func.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ /* @@ -43,7 +43,7 @@ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: hash_func.c,v 11.7 2000/08/16 18:26:19 ubell Exp $"; +static const char revid[] = "$Id: hash_func.c,v 11.12 2002/03/28 19:49:42 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -51,8 +51,6 @@ static const char revid[] = "$Id: hash_func.c,v 11.7 2000/08/16 18:26:19 ubell E #endif #include "db_int.h" -#include "db_page.h" -#include "hash.h" /* * __ham_func2 -- @@ -230,6 +228,11 @@ __ham_func5(dbp, key, len) return (h); } +/* + * __ham_test -- + * + * PUBLIC: u_int32_t __ham_test __P((DB *, const void *, u_int32_t)); + */ u_int32_t __ham_test(dbp, key, len) DB *dbp; diff --git a/bdb/hash/hash_meta.c b/bdb/hash/hash_meta.c index d96a6db3207..9f224454869 100644 --- a/bdb/hash/hash_meta.c +++ b/bdb/hash/hash_meta.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2000 + * Copyright (c) 1999-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: hash_meta.c,v 11.10 2000/12/21 21:54:35 margo Exp $"; +static const char revid[] = "$Id: hash_meta.c,v 11.19 2002/06/03 14:22:15 ubell Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -16,11 +16,10 @@ static const char revid[] = "$Id: hash_meta.c,v 11.10 2000/12/21 21:54:35 margo #endif #include "db_int.h" -#include "db_page.h" -#include "hash.h" -#include "db_shash.h" -#include "lock.h" -#include "txn.h" +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/hash.h" +#include "dbinc/lock.h" /* * Acquire the meta-data page. @@ -31,30 +30,32 @@ int __ham_get_meta(dbc) DBC *dbc; { - HASH_CURSOR *hcp; - HASH *hashp; DB *dbp; + DB_ENV *dbenv; + DB_MPOOLFILE *mpf; + HASH *hashp; + HASH_CURSOR *hcp; int ret; - hcp = (HASH_CURSOR *)dbc->internal; dbp = dbc->dbp; + dbenv = dbp->dbenv; + mpf = dbp->mpf; hashp = dbp->h_internal; + hcp = (HASH_CURSOR *)dbc->internal; - if (dbp->dbenv != NULL && - STD_LOCKING(dbc) && !F_ISSET(dbc, DBC_RECOVER)) { + if (dbenv != NULL && + STD_LOCKING(dbc) && !F_ISSET(dbc, DBC_RECOVER | DBC_COMPENSATE)) { dbc->lock.pgno = hashp->meta_pgno; - if ((ret = lock_get(dbp->dbenv, dbc->locker, + if ((ret = dbenv->lock_get(dbenv, dbc->locker, DB_NONBLOCK(dbc) ? DB_LOCK_NOWAIT : 0, &dbc->lock_dbt, DB_LOCK_READ, &hcp->hlock)) != 0) return (ret); } - if ((ret = memp_fget(dbc->dbp->mpf, + if ((ret = mpf->get(mpf, &hashp->meta_pgno, DB_MPOOL_CREATE, &(hcp->hdr))) != 0 && - hcp->hlock.off != LOCK_INVALID) { - (void)lock_put(dbc->dbp->dbenv, &hcp->hlock); - hcp->hlock.off = LOCK_INVALID; - } + LOCK_ISSET(hcp->hlock)) + (void)dbenv->lock_put(dbenv, &hcp->hlock); return (ret); } @@ -68,18 +69,19 @@ int __ham_release_meta(dbc) DBC *dbc; { + DB_MPOOLFILE *mpf; HASH_CURSOR *hcp; + mpf = dbc->dbp->mpf; hcp = (HASH_CURSOR *)dbc->internal; if (hcp->hdr) - (void)memp_fput(dbc->dbp->mpf, hcp->hdr, + (void)mpf->put(mpf, hcp->hdr, F_ISSET(hcp, H_DIRTY) ? DB_MPOOL_DIRTY : 0); hcp->hdr = NULL; - if (!F_ISSET(dbc, DBC_RECOVER) && - dbc->txn == NULL && hcp->hlock.off != LOCK_INVALID) - (void)lock_put(dbc->dbp->dbenv, &hcp->hlock); - hcp->hlock.off = LOCK_INVALID; + if (!F_ISSET(dbc, DBC_RECOVER | DBC_COMPENSATE) && + dbc->txn == NULL && LOCK_ISSET(hcp->hlock)) + (void)dbc->dbp->dbenv->lock_put(dbc->dbp->dbenv, &hcp->hlock); F_CLR(hcp, H_DIRTY); return (0); @@ -95,6 +97,7 @@ __ham_dirty_meta(dbc) DBC *dbc; { DB *dbp; + DB_ENV *dbenv; DB_LOCK _tmp; HASH *hashp; HASH_CURSOR *hcp; @@ -105,12 +108,13 @@ __ham_dirty_meta(dbc) hcp = (HASH_CURSOR *)dbc->internal; ret = 0; - if (STD_LOCKING(dbc) && !F_ISSET(dbc, DBC_RECOVER)) { + if (STD_LOCKING(dbc) && !F_ISSET(dbc, DBC_RECOVER | DBC_COMPENSATE)) { + dbenv = dbp->dbenv; dbc->lock.pgno = hashp->meta_pgno; - if ((ret = lock_get(dbp->dbenv, dbc->locker, + if ((ret = dbenv->lock_get(dbenv, dbc->locker, DB_NONBLOCK(dbc) ? DB_LOCK_NOWAIT : 0, &dbc->lock_dbt, DB_LOCK_WRITE, &_tmp)) == 0) { - ret = lock_put(dbp->dbenv, &hcp->hlock); + ret = dbenv->lock_put(dbenv, &hcp->hlock); hcp->hlock = _tmp; } } diff --git a/bdb/hash/hash_method.c b/bdb/hash/hash_method.c index f8239993dc5..9a6bf59536a 100644 --- a/bdb/hash/hash_method.c +++ b/bdb/hash/hash_method.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2000 + * Copyright (c) 1999-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: hash_method.c,v 11.7 2000/07/04 18:28:23 bostic Exp $"; +static const char revid[] = "$Id: hash_method.c,v 11.12 2002/03/27 04:32:12 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -16,8 +16,8 @@ static const char revid[] = "$Id: hash_method.c,v 11.7 2000/07/04 18:28:23 bosti #endif #include "db_int.h" -#include "db_page.h" -#include "hash.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" static int __ham_set_h_ffactor __P((DB *, u_int32_t)); static int __ham_set_h_hash @@ -38,7 +38,7 @@ __ham_db_create(dbp) int ret; if ((ret = __os_malloc(dbp->dbenv, - sizeof(HASH), NULL, &dbp->h_internal)) != 0) + sizeof(HASH), &dbp->h_internal)) != 0) return (ret); hashp = dbp->h_internal; @@ -63,7 +63,7 @@ __ham_db_close(dbp) { if (dbp->h_internal == NULL) return (0); - __os_free(dbp->h_internal, sizeof(HASH)); + __os_free(dbp->dbenv, dbp->h_internal); dbp->h_internal = NULL; return (0); } diff --git a/bdb/hash/hash_open.c b/bdb/hash/hash_open.c new file mode 100644 index 00000000000..041a1df1e7b --- /dev/null +++ b/bdb/hash/hash_open.c @@ -0,0 +1,558 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996-2002 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: hash_open.c,v 11.175 2002/09/04 19:06:44 margo Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <stdlib.h> +#include <string.h> +#endif + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" +#include "dbinc/log.h" +#include "dbinc/db_shash.h" +#include "dbinc/lock.h" +#include "dbinc/db_swap.h" +#include "dbinc/btree.h" +#include "dbinc/fop.h" + +static db_pgno_t __ham_init_meta __P((DB *, HMETA *, db_pgno_t, DB_LSN *)); + +/* + * __ham_open -- + * + * PUBLIC: int __ham_open __P((DB *, + * PUBLIC: DB_TXN *, const char * name, db_pgno_t, u_int32_t)); + */ +int +__ham_open(dbp, txn, name, base_pgno, flags) + DB *dbp; + DB_TXN *txn; + const char *name; + db_pgno_t base_pgno; + u_int32_t flags; +{ + DB_ENV *dbenv; + DBC *dbc; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; + HASH *hashp; + int ret, t_ret; + + COMPQUIET(name, NULL); + dbenv = dbp->dbenv; + dbc = NULL; + mpf = dbp->mpf; + + /* Initialize the remaining fields/methods of the DB. */ + dbp->stat = __ham_stat; + + /* + * Get a cursor. If DB_CREATE is specified, we may be creating + * pages, and to do that safely in CDB we need a write cursor. + * In STD_LOCKING mode, we'll synchronize using the meta page + * lock instead. + */ + if ((ret = dbp->cursor(dbp, + txn, &dbc, LF_ISSET(DB_CREATE) && CDB_LOCKING(dbenv) ? + DB_WRITECURSOR : 0)) != 0) + return (ret); + + hcp = (HASH_CURSOR *)dbc->internal; + hashp = dbp->h_internal; + hashp->meta_pgno = base_pgno; + if ((ret = __ham_get_meta(dbc)) != 0) + goto err1; + + /* Initialize the hdr structure. */ + if (hcp->hdr->dbmeta.magic == DB_HASHMAGIC) { + /* File exists, verify the data in the header. */ + if (hashp->h_hash == NULL) + hashp->h_hash = hcp->hdr->dbmeta.version < 5 + ? __ham_func4 : __ham_func5; + if (!F_ISSET(dbp, DB_AM_RDONLY) && !IS_RECOVERING(dbenv) && + hashp->h_hash(dbp, + CHARKEY, sizeof(CHARKEY)) != hcp->hdr->h_charkey) { + __db_err(dbp->dbenv, + "hash: incompatible hash function"); + ret = EINVAL; + goto err2; + } + if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUP)) + F_SET(dbp, DB_AM_DUP); + if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUPSORT)) + F_SET(dbp, DB_AM_DUPSORT); + if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_SUBDB)) + F_SET(dbp, DB_AM_SUBDB); + + /* We must initialize last_pgno, it could be stale. */ + if (!F_ISSET(dbp, DB_AM_RDONLY) && + dbp->meta_pgno == PGNO_BASE_MD) { + if ((ret = __ham_dirty_meta(dbc)) != 0) + goto err2; + mpf->last_pgno(mpf, &hcp->hdr->dbmeta.last_pgno); + } + } else if (!IS_RECOVERING(dbenv) && !F_ISSET(dbp, DB_AM_RECOVER)) + DB_ASSERT(0); + +err2: /* Release the meta data page */ + if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) + ret = t_ret; +err1: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __ham_metachk -- + * + * PUBLIC: int __ham_metachk __P((DB *, const char *, HMETA *)); + */ +int +__ham_metachk(dbp, name, hashm) + DB *dbp; + const char *name; + HMETA *hashm; +{ + DB_ENV *dbenv; + u_int32_t vers; + int ret; + + dbenv = dbp->dbenv; + + /* + * At this point, all we know is that the magic number is for a Hash. + * Check the version, the database may be out of date. + */ + vers = hashm->dbmeta.version; + if (F_ISSET(dbp, DB_AM_SWAP)) + M_32_SWAP(vers); + switch (vers) { + case 4: + case 5: + case 6: + __db_err(dbenv, + "%s: hash version %lu requires a version upgrade", + name, (u_long)vers); + return (DB_OLD_VERSION); + case 7: + case 8: + break; + default: + __db_err(dbenv, + "%s: unsupported hash version: %lu", name, (u_long)vers); + return (EINVAL); + } + + /* Swap the page if we need to. */ + if (F_ISSET(dbp, DB_AM_SWAP) && (ret = __ham_mswap((PAGE *)hashm)) != 0) + return (ret); + + /* Check the type. */ + if (dbp->type != DB_HASH && dbp->type != DB_UNKNOWN) + return (EINVAL); + dbp->type = DB_HASH; + DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); + + /* + * Check application info against metadata info, and set info, flags, + * and type based on metadata info. + */ + if ((ret = __db_fchk(dbenv, + "DB->open", hashm->dbmeta.flags, + DB_HASH_DUP | DB_HASH_SUBDB | DB_HASH_DUPSORT)) != 0) + return (ret); + + if (F_ISSET(&hashm->dbmeta, DB_HASH_DUP)) + F_SET(dbp, DB_AM_DUP); + else + if (F_ISSET(dbp, DB_AM_DUP)) { + __db_err(dbenv, + "%s: DB_DUP specified to open method but not set in database", + name); + return (EINVAL); + } + + if (F_ISSET(&hashm->dbmeta, DB_HASH_SUBDB)) + F_SET(dbp, DB_AM_SUBDB); + else + if (F_ISSET(dbp, DB_AM_SUBDB)) { + __db_err(dbenv, + "%s: multiple databases specified but not supported in file", + name); + return (EINVAL); + } + + if (F_ISSET(&hashm->dbmeta, DB_HASH_DUPSORT)) { + if (dbp->dup_compare == NULL) + dbp->dup_compare = __bam_defcmp; + } else + if (dbp->dup_compare != NULL) { + __db_err(dbenv, + "%s: duplicate sort function specified but not set in database", + name); + return (EINVAL); + } + + /* Set the page size. */ + dbp->pgsize = hashm->dbmeta.pagesize; + + /* Copy the file's ID. */ + memcpy(dbp->fileid, hashm->dbmeta.uid, DB_FILE_ID_LEN); + + return (0); +} + +/* + * __ham_init_meta -- + * + * Initialize a hash meta-data page. We assume that the meta-data page is + * contiguous with the initial buckets that we create. If that turns out + * to be false, we'll fix it up later. Return the initial number of buckets + * allocated. + */ +static db_pgno_t +__ham_init_meta(dbp, meta, pgno, lsnp) + DB *dbp; + HMETA *meta; + db_pgno_t pgno; + DB_LSN *lsnp; +{ + HASH *hashp; + db_pgno_t nbuckets; + int i; + int32_t l2; + + hashp = dbp->h_internal; + if (hashp->h_hash == NULL) + hashp->h_hash = DB_HASHVERSION < 5 ? __ham_func4 : __ham_func5; + + if (hashp->h_nelem != 0 && hashp->h_ffactor != 0) { + hashp->h_nelem = (hashp->h_nelem - 1) / hashp->h_ffactor + 1; + l2 = __db_log2(hashp->h_nelem > 2 ? hashp->h_nelem : 2); + } else + l2 = 1; + nbuckets = (db_pgno_t)(1 << l2); + + memset(meta, 0, sizeof(HMETA)); + meta->dbmeta.lsn = *lsnp; + meta->dbmeta.pgno = pgno; + meta->dbmeta.magic = DB_HASHMAGIC; + meta->dbmeta.version = DB_HASHVERSION; + meta->dbmeta.pagesize = dbp->pgsize; + if (F_ISSET(dbp, DB_AM_CHKSUM)) + FLD_SET(meta->dbmeta.metaflags, DBMETA_CHKSUM); + if (F_ISSET(dbp, DB_AM_ENCRYPT)) { + meta->dbmeta.encrypt_alg = + ((DB_CIPHER *)dbp->dbenv->crypto_handle)->alg; + DB_ASSERT(meta->dbmeta.encrypt_alg != 0); + meta->crypto_magic = meta->dbmeta.magic; + } + meta->dbmeta.type = P_HASHMETA; + meta->dbmeta.free = PGNO_INVALID; + meta->dbmeta.last_pgno = pgno; + meta->max_bucket = nbuckets - 1; + meta->high_mask = nbuckets - 1; + meta->low_mask = (nbuckets >> 1) - 1; + meta->ffactor = hashp->h_ffactor; + meta->h_charkey = hashp->h_hash(dbp, CHARKEY, sizeof(CHARKEY)); + memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN); + + if (F_ISSET(dbp, DB_AM_DUP)) + F_SET(&meta->dbmeta, DB_HASH_DUP); + if (F_ISSET(dbp, DB_AM_SUBDB)) + F_SET(&meta->dbmeta, DB_HASH_SUBDB); + if (dbp->dup_compare != NULL) + F_SET(&meta->dbmeta, DB_HASH_DUPSORT); + + /* + * Create the first and second buckets pages so that we have the + * page numbers for them and we can store that page number in the + * meta-data header (spares[0]). + */ + meta->spares[0] = pgno + 1; + + /* Fill in the last fields of the meta data page. */ + for (i = 1; i <= l2; i++) + meta->spares[i] = meta->spares[0]; + for (; i < NCACHED; i++) + meta->spares[i] = PGNO_INVALID; + + return (nbuckets); +} + +/* + * __ham_new_file -- + * Create the necessary pages to begin a new database file. If name + * is NULL, then this is an unnamed file, the mpf has been set in the dbp + * and we simply create the pages using mpool. In this case, we don't log + * because we never have to redo an unnamed create and the undo simply + * frees resources. + * + * This code appears more complex than it is because of the two cases (named + * and unnamed). The way to read the code is that for each page being created, + * there are three parts: 1) a "get page" chunk (which either uses malloc'd + * memory or calls mpf->get), 2) the initialization, and 3) the "put page" + * chunk which either does a fop write or an mpf->put. + * + * PUBLIC: int __ham_new_file __P((DB *, DB_TXN *, DB_FH *, const char *)); + */ +int +__ham_new_file(dbp, txn, fhp, name) + DB *dbp; + DB_TXN *txn; + DB_FH *fhp; + const char *name; +{ + DB_ENV *dbenv; + DB_LSN lsn; + DB_MPOOLFILE *mpf; + DB_PGINFO pginfo; + DBT pdbt; + HMETA *meta; + PAGE *page; + int ret; + db_pgno_t lpgno; + void *buf; + + dbenv = dbp->dbenv; + mpf = dbp->mpf; + meta = NULL; + page = NULL; + memset(&pdbt, 0, sizeof(pdbt)); + + /* Build meta-data page. */ + if (name == NULL) { + lpgno = PGNO_BASE_MD; + ret = mpf->get(mpf, &lpgno, DB_MPOOL_CREATE, &meta); + } else { + pginfo.db_pagesize = dbp->pgsize; + pginfo.type = dbp->type; + pginfo.flags = + F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); + pdbt.data = &pginfo; + pdbt.size = sizeof(pginfo); + ret = __os_calloc(dbp->dbenv, 1, dbp->pgsize, &buf); + meta = (HMETA *)buf; + } + if (ret != 0) + return (ret); + + LSN_NOT_LOGGED(lsn); + lpgno = __ham_init_meta(dbp, meta, PGNO_BASE_MD, &lsn); + meta->dbmeta.last_pgno = lpgno; + + if (name == NULL) + ret = mpf->put(mpf, meta, DB_MPOOL_DIRTY); + else { + if ((ret = __db_pgout(dbenv, PGNO_BASE_MD, meta, &pdbt)) != 0) + goto err; + ret = __fop_write(dbenv, txn, name, + DB_APP_DATA, fhp, 0, buf, dbp->pgsize, 1); + } + if (ret != 0) + goto err; + meta = NULL; + + /* Now allocate the final hash bucket. */ + if (name == NULL) { + if ((ret = mpf->get(mpf, &lpgno, DB_MPOOL_CREATE, &page)) != 0) + goto err; + } else { +#ifdef DIAGNOSTIC + memset(buf, dbp->pgsize, 0); +#endif + page = (PAGE *)buf; + } + + P_INIT(page, dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + LSN_NOT_LOGGED(page->lsn); + + if (name == NULL) + ret = mpf->put(mpf, page, DB_MPOOL_DIRTY); + else { + if ((ret = __db_pgout(dbenv, lpgno, buf, &pdbt)) != 0) + goto err; + ret = __fop_write(dbenv, txn, name, + DB_APP_DATA, fhp, lpgno * dbp->pgsize, buf, dbp->pgsize, 1); + } + if (ret != 0) + goto err; + page = NULL; + +err: if (name != NULL) + __os_free(dbenv, buf); + else { + if (meta != NULL) + (void)mpf->put(mpf, meta, 0); + if (page != NULL) + (void)mpf->put(mpf, page, 0); + } + return (ret); +} + +/* + * __ham_new_subdb -- + * Create the necessary pages to begin a new subdatabase. + * + * PUBLIC: int __ham_new_subdb __P((DB *, DB *, DB_TXN *)); + */ +int +__ham_new_subdb(mdbp, dbp, txn) + DB *mdbp, *dbp; + DB_TXN *txn; +{ + DBC *dbc; + DB_ENV *dbenv; + DB_LOCK metalock, mmlock; + DB_LSN lsn; + DB_MPOOLFILE *mpf; + DBMETA *mmeta; + HMETA *meta; + PAGE *h; + int i, ret, t_ret; + db_pgno_t lpgno, mpgno; + + dbenv = mdbp->dbenv; + mpf = mdbp->mpf; + dbc = NULL; + meta = NULL; + mmeta = NULL; + LOCK_INIT(metalock); + LOCK_INIT(mmlock); + + if ((ret = mdbp->cursor(mdbp, txn, + &dbc, CDB_LOCKING(dbenv) ? DB_WRITECURSOR : 0)) != 0) + return (ret); + + /* Get and lock the new meta data page. */ + if ((ret = __db_lget(dbc, + 0, dbp->meta_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + goto err; + if ((ret = mpf->get(mpf, &dbp->meta_pgno, DB_MPOOL_CREATE, &meta)) != 0) + goto err; + + /* Initialize the new meta-data page. */ + lsn = meta->dbmeta.lsn; + lpgno = __ham_init_meta(dbp, meta, dbp->meta_pgno, &lsn); + + /* + * We are about to allocate a set of contiguous buckets (lpgno + * worth). We need to get the master meta-data page to figure + * out where these pages are and to allocate them. So, lock and + * get the master meta data page. + */ + mpgno = PGNO_BASE_MD; + if ((ret = __db_lget(dbc, 0, mpgno, DB_LOCK_WRITE, 0, &mmlock)) != 0) + goto err; + if ((ret = mpf->get(mpf, &mpgno, 0, &mmeta)) != 0) + goto err; + + /* + * Now update the hash meta-data page to reflect where the first + * set of buckets are actually located. + */ + meta->spares[0] = mmeta->last_pgno + 1; + for (i = 0; i < NCACHED && meta->spares[i] != PGNO_INVALID; i++) + meta->spares[i] = meta->spares[0]; + + /* The new meta data page is now complete; log it. */ + if ((ret = __db_log_page(mdbp, + txn, &meta->dbmeta.lsn, dbp->meta_pgno, (PAGE *)meta)) != 0) + goto err; + + /* Reflect the group allocation. */ + if (DBENV_LOGGING(dbenv)) + if ((ret = __ham_groupalloc_log(mdbp, txn, + &LSN(mmeta), 0, &LSN(mmeta), + meta->spares[0], meta->max_bucket + 1, mmeta->free)) != 0) + goto err; + + /* Release the new meta-data page. */ + if ((ret = mpf->put(mpf, meta, DB_MPOOL_DIRTY)) != 0) + goto err; + meta = NULL; + + mmeta->last_pgno +=lpgno; + lpgno = mmeta->last_pgno; + + /* Now allocate the final hash bucket. */ + if ((ret = mpf->get(mpf, &lpgno, DB_MPOOL_CREATE, &h)) != 0) + goto err; + P_INIT(h, dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + LSN(h) = LSN(mmeta); + if ((ret = mpf->put(mpf, h, DB_MPOOL_DIRTY)) != 0) + goto err; + + /* Now put the master-metadata page back. */ + if ((ret = mpf->put(mpf, mmeta, DB_MPOOL_DIRTY)) != 0) + goto err; + mmeta = NULL; + +err: + if (mmeta != NULL) + if ((t_ret = mpf->put(mpf, mmeta, 0)) != 0 && ret == 0) + ret = t_ret; + if (LOCK_ISSET(mmlock)) + if ((t_ret = __LPUT(dbc, mmlock)) != 0 && ret == 0) + ret = t_ret; + if (meta != NULL) + if ((t_ret = mpf->put(mpf, meta, 0)) != 0 && ret == 0) + ret = t_ret; + if (LOCK_ISSET(metalock)) + if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + if (dbc != NULL) + if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} diff --git a/bdb/hash/hash_page.c b/bdb/hash/hash_page.c index 64f38853284..6788129773f 100644 --- a/bdb/hash/hash_page.c +++ b/bdb/hash/hash_page.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ /* @@ -43,23 +43,14 @@ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: hash_page.c,v 11.46 2001/01/11 18:19:51 bostic Exp $"; +static const char revid[] = "$Id: hash_page.c,v 11.87 2002/08/15 02:46:20 bostic Exp $"; #endif /* not lint */ /* * PACKAGE: hashing * * DESCRIPTION: - * Page manipulation for hashing package. - * - * ROUTINES: - * - * External - * __get_page - * __add_ovflpage - * __overflow_page - * Internal - * open_temp + * Page manipulation for hashing package. */ #ifndef NO_SYSTEM_INCLUDES @@ -69,11 +60,13 @@ static const char revid[] = "$Id: hash_page.c,v 11.46 2001/01/11 18:19:51 bostic #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "hash.h" -#include "lock.h" -#include "txn.h" +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/hash.h" +#include "dbinc/lock.h" + +static int __ham_c_delpg + __P((DBC *, db_pgno_t, db_pgno_t, u_int32_t, db_ham_mode, u_int32_t *)); /* * PUBLIC: int __ham_item __P((DBC *, db_lockmode_t, db_pgno_t *)); @@ -104,15 +97,15 @@ __ham_item(dbc, mode, pgnop) recheck: /* Check if we are looking for space in which to insert an item. */ - if (hcp->seek_size && hcp->seek_found_page == PGNO_INVALID - && hcp->seek_size < P_FREESPACE(hcp->page)) + if (hcp->seek_size && hcp->seek_found_page == PGNO_INVALID && + hcp->seek_size < P_FREESPACE(dbp, hcp->page)) hcp->seek_found_page = hcp->pgno; /* Check for off-page duplicates. */ if (hcp->indx < NUM_ENT(hcp->page) && - HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) { + HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) { memcpy(pgnop, - HOFFDUP_PGNO(H_PAIRDATA(hcp->page, hcp->indx)), + HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)), sizeof(db_pgno_t)); F_SET(hcp, H_OK); return (0); @@ -126,7 +119,7 @@ recheck: * pointer to be the beginning of the datum. */ memcpy(&hcp->dup_len, - HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx)) + + HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)) + hcp->dup_off, sizeof(db_indx_t)); if (hcp->indx >= (db_indx_t)NUM_ENT(hcp->page)) { @@ -153,15 +146,18 @@ int __ham_item_reset(dbc) DBC *dbc; { - HASH_CURSOR *hcp; DB *dbp; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; int ret; - ret = 0; dbp = dbc->dbp; + mpf = dbp->mpf; hcp = (HASH_CURSOR *)dbc->internal; + + ret = 0; if (hcp->page != NULL) - ret = memp_fput(dbp->mpf, hcp->page, 0); + ret = mpf->put(mpf, hcp->page, 0); __ham_item_init(dbc); return (ret); @@ -181,8 +177,7 @@ __ham_item_init(dbc) * If this cursor still holds any locks, we must * release them if we are not running with transactions. */ - if (hcp->lock.off != LOCK_INVALID && dbc->txn == NULL) - (void)lock_put(dbc->dbp->dbenv, &hcp->lock); + (void)__TLPUT(dbc, hcp->lock); /* * The following fields must *not* be initialized here @@ -191,7 +186,7 @@ __ham_item_init(dbc) */ hcp->bucket = BUCKET_INVALID; hcp->lbucket = BUCKET_INVALID; - hcp->lock.off = LOCK_INVALID; + LOCK_INIT(hcp->lock); hcp->lock_mode = DB_LOCK_NG; hcp->dup_off = 0; hcp->dup_len = 0; @@ -269,8 +264,9 @@ __ham_item_prev(dbc, mode, pgnop) db_pgno_t next_pgno; int ret; - dbp = dbc->dbp; hcp = (HASH_CURSOR *)dbc->internal; + dbp = dbc->dbp; + /* * There are 5 cases for backing up in a hash file. * Case 1: In the middle of a page, no duplicates, just dec the index. @@ -291,9 +287,10 @@ __ham_item_prev(dbc, mode, pgnop) * to handle backing up through keys. */ if (!F_ISSET(hcp, H_NEXT_NODUP) && F_ISSET(hcp, H_ISDUP)) { - if (HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) { + if (HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == + H_OFFDUP) { memcpy(pgnop, - HOFFDUP_PGNO(H_PAIRDATA(hcp->page, hcp->indx)), + HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)), sizeof(db_pgno_t)); F_SET(hcp, H_OK); return (0); @@ -302,7 +299,7 @@ __ham_item_prev(dbc, mode, pgnop) /* Duplicates are on-page. */ if (hcp->dup_off != 0) { memcpy(&hcp->dup_len, HKEYDATA_DATA( - H_PAIRDATA(hcp->page, hcp->indx)) + H_PAIRDATA(dbp, hcp->page, hcp->indx)) + hcp->dup_off - sizeof(db_indx_t), sizeof(db_indx_t)); hcp->dup_off -= @@ -396,7 +393,7 @@ __ham_item_next(dbc, mode, pgnop) if (F_ISSET(hcp, H_DELETED)) { if (hcp->indx != NDX_INVALID && F_ISSET(hcp, H_ISDUP) && - HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx)) + HPAGE_TYPE(dbc->dbp, hcp->page, H_DATAINDEX(hcp->indx)) == H_DUPLICATE && hcp->dup_tlen == hcp->dup_off) { if (F_ISSET(hcp, H_DUPONLY)) { F_CLR(hcp, H_OK); @@ -447,7 +444,7 @@ __ham_item_next(dbc, mode, pgnop) } /* - * PUBLIC: void __ham_putitem __P((PAGE *p, const DBT *, int)); + * PUBLIC: void __ham_putitem __P((DB *, PAGE *p, const DBT *, int)); * * This is a little bit sleazy in that we're overloading the meaning * of the H_OFFPAGE type here. When we recover deletes, we have the @@ -456,24 +453,27 @@ __ham_item_next(dbc, mode, pgnop) * an H_KEYDATA around it. */ void -__ham_putitem(p, dbt, type) +__ham_putitem(dbp, p, dbt, type) + DB *dbp; PAGE *p; const DBT *dbt; int type; { u_int16_t n, off; + db_indx_t *inp; n = NUM_ENT(p); + inp = P_INP(dbp, p); /* Put the item element on the page. */ if (type == H_OFFPAGE) { off = HOFFSET(p) - dbt->size; - HOFFSET(p) = p->inp[n] = off; - memcpy(P_ENTRY(p, n), dbt->data, dbt->size); + HOFFSET(p) = inp[n] = off; + memcpy(P_ENTRY(dbp, p, n), dbt->data, dbt->size); } else { off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size); - HOFFSET(p) = p->inp[n] = off; - PUT_HKEYDATA(P_ENTRY(p, n), dbt->data, dbt->size, type); + HOFFSET(p) = inp[n] = off; + PUT_HKEYDATA(P_ENTRY(dbp, p, n), dbt->data, dbt->size, type); } /* Adjust page info. */ @@ -481,8 +481,8 @@ __ham_putitem(p, dbt, type) } /* - * PUBLIC: void __ham_reputpair - * PUBLIC: __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *)); + * PUBLIC: void __ham_reputpair __P((DB *, PAGE *, + * PUBLIC: u_int32_t, const DBT *, const DBT *)); * * This is a special case to restore a key/data pair to its original * location during recovery. We are guaranteed that the pair fits @@ -490,17 +490,21 @@ __ham_putitem(p, dbt, type) * the last pair, the normal insert works). */ void -__ham_reputpair(p, psize, ndx, key, data) +__ham_reputpair(dbp, p, ndx, key, data) + DB *dbp; PAGE *p; - u_int32_t psize, ndx; + u_int32_t ndx; const DBT *key, *data; { - db_indx_t i, movebytes, newbytes; + db_indx_t i, *inp, movebytes, newbytes; + size_t psize; u_int8_t *from; + psize = dbp->pgsize; + inp = P_INP(dbp, p); /* First shuffle the existing items up on the page. */ - movebytes = - (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 2)]) - HOFFSET(p); + movebytes = (db_indx_t)( + (ndx == 0 ? psize : inp[H_DATAINDEX(ndx - 2)]) - HOFFSET(p)); newbytes = key->size + data->size; from = (u_int8_t *)p + HOFFSET(p); memmove(from - newbytes, from, movebytes); @@ -511,17 +515,17 @@ __ham_reputpair(p, psize, ndx, key, data) * we are dealing with index 0 (db_indx_t's are unsigned). */ for (i = NUM_ENT(p) - 1; ; i-- ) { - p->inp[i + 2] = p->inp[i] - newbytes; + inp[i + 2] = inp[i] - newbytes; if (i == H_KEYINDEX(ndx)) break; } /* Put the key and data on the page. */ - p->inp[H_KEYINDEX(ndx)] = - (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 2)]) - key->size; - p->inp[H_DATAINDEX(ndx)] = p->inp[H_KEYINDEX(ndx)] - data->size; - memcpy(P_ENTRY(p, H_KEYINDEX(ndx)), key->data, key->size); - memcpy(P_ENTRY(p, H_DATAINDEX(ndx)), data->data, data->size); + inp[H_KEYINDEX(ndx)] = (db_indx_t)( + (ndx == 0 ? psize : inp[H_DATAINDEX(ndx - 2)]) - key->size); + inp[H_DATAINDEX(ndx)] = inp[H_KEYINDEX(ndx)] - data->size; + memcpy(P_ENTRY(dbp, p, H_KEYINDEX(ndx)), key->data, key->size); + memcpy(P_ENTRY(dbp, p, H_DATAINDEX(ndx)), data->data, data->size); /* Adjust page info. */ HOFFSET(p) -= newbytes; @@ -537,25 +541,25 @@ __ham_del_pair(dbc, reclaim_page) int reclaim_page; { DB *dbp; - HASH_CURSOR *hcp; DBT data_dbt, key_dbt; - DB_ENV *dbenv; DB_LSN new_lsn, *n_lsn, tmp_lsn; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; PAGE *n_pagep, *nn_pagep, *p, *p_pagep; + db_ham_mode op; db_indx_t ndx; db_pgno_t chg_pgno, pgno, tmp_pgno; int ret, t_ret; + u_int32_t order; dbp = dbc->dbp; + mpf = dbp->mpf; hcp = (HASH_CURSOR *)dbc->internal; - - dbenv = dbp->dbenv; - ndx = hcp->indx; - n_pagep = p_pagep = nn_pagep = NULL; + ndx = hcp->indx; - if (hcp->page == NULL && (ret = memp_fget(dbp->mpf, - &hcp->pgno, DB_MPOOL_CREATE, &hcp->page)) != 0) + if (hcp->page == NULL && + (ret = mpf->get(mpf, &hcp->pgno, DB_MPOOL_CREATE, &hcp->page)) != 0) return (ret); p = hcp->page; @@ -567,17 +571,17 @@ __ham_del_pair(dbc, reclaim_page) * entry referring to the big item. */ ret = 0; - if (HPAGE_PTYPE(H_PAIRKEY(p, ndx)) == H_OFFPAGE) { - memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(p, H_KEYINDEX(ndx))), + if (HPAGE_PTYPE(H_PAIRKEY(dbp, p, ndx)) == H_OFFPAGE) { + memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_KEYINDEX(ndx))), sizeof(db_pgno_t)); ret = __db_doff(dbc, pgno); } if (ret == 0) - switch (HPAGE_PTYPE(H_PAIRDATA(p, ndx))) { + switch (HPAGE_PTYPE(H_PAIRDATA(dbp, p, ndx))) { case H_OFFPAGE: memcpy(&pgno, - HOFFPAGE_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))), + HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_DATAINDEX(ndx))), sizeof(db_pgno_t)); ret = __db_doff(dbc, pgno); break; @@ -596,21 +600,21 @@ __ham_del_pair(dbc, reclaim_page) return (ret); /* Now log the delete off this page. */ - if (DB_LOGGING(dbc)) { - key_dbt.data = P_ENTRY(p, H_KEYINDEX(ndx)); - key_dbt.size = LEN_HITEM(p, dbp->pgsize, H_KEYINDEX(ndx)); - data_dbt.data = P_ENTRY(p, H_DATAINDEX(ndx)); - data_dbt.size = LEN_HITEM(p, dbp->pgsize, H_DATAINDEX(ndx)); - - if ((ret = __ham_insdel_log(dbenv, - dbc->txn, &new_lsn, 0, DELPAIR, - dbp->log_fileid, PGNO(p), (u_int32_t)ndx, + if (DBC_LOGGING(dbc)) { + key_dbt.data = P_ENTRY(dbp, p, H_KEYINDEX(ndx)); + key_dbt.size = LEN_HITEM(dbp, p, dbp->pgsize, H_KEYINDEX(ndx)); + data_dbt.data = P_ENTRY(dbp, p, H_DATAINDEX(ndx)); + data_dbt.size = LEN_HITEM(dbp, p, dbp->pgsize, H_DATAINDEX(ndx)); + + if ((ret = __ham_insdel_log(dbp, + dbc->txn, &new_lsn, 0, DELPAIR, PGNO(p), (u_int32_t)ndx, &LSN(p), &key_dbt, &data_dbt)) != 0) return (ret); + } else + LSN_NOT_LOGGED(new_lsn); - /* Move lsn onto page. */ - LSN(p) = new_lsn; - } + /* Move lsn onto page. */ + LSN(p) = new_lsn; /* Do the delete. */ __ham_dpair(dbp, p, ndx); @@ -636,8 +640,11 @@ __ham_del_pair(dbc, reclaim_page) * XXX * Perhaps we can retain incremental numbers and apply them later. */ - if (!STD_LOCKING(dbc)) + if (!STD_LOCKING(dbc)) { --hcp->hdr->nelem; + if ((ret = __ham_dirty_meta(dbc)) != 0) + return (ret); + } /* * If we need to reclaim the page, then check if the page is empty. @@ -650,43 +657,43 @@ __ham_del_pair(dbc, reclaim_page) if (!reclaim_page || NUM_ENT(p) != 0 || (PREV_PGNO(p) == PGNO_INVALID && NEXT_PGNO(p) == PGNO_INVALID)) - return (memp_fset(dbp->mpf, p, DB_MPOOL_DIRTY)); + return (mpf->set(mpf, p, DB_MPOOL_DIRTY)); if (PREV_PGNO(p) == PGNO_INVALID) { /* * First page in chain is empty and we know that there * are more pages in the chain. */ - if ((ret = - memp_fget(dbp->mpf, &NEXT_PGNO(p), 0, &n_pagep)) != 0) + if ((ret = mpf->get(mpf, &NEXT_PGNO(p), 0, &n_pagep)) != 0) return (ret); - if (NEXT_PGNO(n_pagep) != PGNO_INVALID && - (ret = memp_fget(dbp->mpf, &NEXT_PGNO(n_pagep), 0, - &nn_pagep)) != 0) + if (NEXT_PGNO(n_pagep) != PGNO_INVALID && (ret = + mpf->get(mpf, &NEXT_PGNO(n_pagep), 0, &nn_pagep)) != 0) goto err; - if (DB_LOGGING(dbc)) { + if (DBC_LOGGING(dbc)) { key_dbt.data = n_pagep; key_dbt.size = dbp->pgsize; - if ((ret = __ham_copypage_log(dbenv, - dbc->txn, &new_lsn, 0, dbp->log_fileid, PGNO(p), + if ((ret = __ham_copypage_log(dbp, + dbc->txn, &new_lsn, 0, PGNO(p), &LSN(p), PGNO(n_pagep), &LSN(n_pagep), NEXT_PGNO(n_pagep), nn_pagep == NULL ? NULL : &LSN(nn_pagep), &key_dbt)) != 0) goto err; + } else + LSN_NOT_LOGGED(new_lsn); + + /* Move lsn onto page. */ + LSN(p) = new_lsn; /* Structure assignment. */ + LSN(n_pagep) = new_lsn; + if (NEXT_PGNO(n_pagep) != PGNO_INVALID) + LSN(nn_pagep) = new_lsn; - /* Move lsn onto page. */ - LSN(p) = new_lsn; /* Structure assignment. */ - LSN(n_pagep) = new_lsn; - if (NEXT_PGNO(n_pagep) != PGNO_INVALID) - LSN(nn_pagep) = new_lsn; - } if (nn_pagep != NULL) { PREV_PGNO(nn_pagep) = PGNO(p); - if ((ret = memp_fput(dbp->mpf, - nn_pagep, DB_MPOOL_DIRTY)) != 0) { + if ((ret = + mpf->put(mpf, nn_pagep, DB_MPOOL_DIRTY)) != 0) { nn_pagep = NULL; goto err; } @@ -703,26 +710,30 @@ __ham_del_pair(dbc, reclaim_page) * Update cursors to reflect the fact that records * on the second page have moved to the first page. */ - if ((ret = __ham_c_chgpg(dbc, - PGNO(n_pagep), NDX_INVALID, PGNO(p), NDX_INVALID)) != 0) - return (ret); + if ((ret = __ham_c_delpg(dbc, PGNO(n_pagep), + PGNO(p), 0, DB_HAM_DELFIRSTPG, &order)) != 0) + goto err; /* * Update the cursor to reflect its new position. */ hcp->indx = 0; hcp->pgno = PGNO(p); - if ((ret = memp_fset(dbp->mpf, p, DB_MPOOL_DIRTY)) != 0 || - (ret = __db_free(dbc, n_pagep)) != 0) - return (ret); + hcp->order += order; + + if ((ret = mpf->set(mpf, p, DB_MPOOL_DIRTY)) != 0) + goto err; + if ((ret = __db_free(dbc, n_pagep)) != 0) { + n_pagep = NULL; + goto err; + } } else { - if ((ret = - memp_fget(dbp->mpf, &PREV_PGNO(p), 0, &p_pagep)) != 0) + if ((ret = mpf->get(mpf, &PREV_PGNO(p), 0, &p_pagep)) != 0) goto err; if (NEXT_PGNO(p) != PGNO_INVALID) { - if ((ret = memp_fget(dbp->mpf, - &NEXT_PGNO(p), 0, &n_pagep)) != 0) + if ((ret = + mpf->get(mpf, &NEXT_PGNO(p), 0, &n_pagep)) != 0) goto err; n_lsn = &LSN(n_pagep); } else { @@ -734,32 +745,40 @@ __ham_del_pair(dbc, reclaim_page) if (n_pagep != NULL) PREV_PGNO(n_pagep) = PGNO(p_pagep); - if (DB_LOGGING(dbc)) { - if ((ret = __ham_newpage_log(dbenv, - dbc->txn, &new_lsn, 0, DELOVFL, - dbp->log_fileid, PREV_PGNO(p), &LSN(p_pagep), + if (DBC_LOGGING(dbc)) { + if ((ret = __ham_newpage_log(dbp, dbc->txn, + &new_lsn, 0, DELOVFL, PREV_PGNO(p), &LSN(p_pagep), PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0) goto err; + } else + LSN_NOT_LOGGED(new_lsn); + + /* Move lsn onto page. */ + LSN(p_pagep) = new_lsn; /* Structure assignment. */ + if (n_pagep) + LSN(n_pagep) = new_lsn; + LSN(p) = new_lsn; - /* Move lsn onto page. */ - LSN(p_pagep) = new_lsn; /* Structure assignment. */ - if (n_pagep) - LSN(n_pagep) = new_lsn; - LSN(p) = new_lsn; - } if (NEXT_PGNO(p) == PGNO_INVALID) { /* * There is no next page; put the cursor on the * previous page as if we'd deleted the last item - * on that page; index greater than number of - * valid entries and H_DELETED set. + * on that page, with index after the last valid + * entry. + * + * The deleted flag was set up above. */ hcp->pgno = PGNO(p_pagep); hcp->indx = NUM_ENT(p_pagep); - F_SET(hcp, H_DELETED); + op = DB_HAM_DELLASTPG; } else { + /* + * There is a next page, so put the cursor at + * the beginning of it. + */ hcp->pgno = NEXT_PGNO(p); hcp->indx = 0; + op = DB_HAM_DELMIDPG; } /* @@ -770,26 +789,28 @@ __ham_del_pair(dbc, reclaim_page) hcp->page = NULL; chg_pgno = PGNO(p); ret = __db_free(dbc, p); - if ((t_ret = memp_fput(dbp->mpf, p_pagep, DB_MPOOL_DIRTY)) != 0 - && ret == 0) + if ((t_ret = + mpf->put(mpf, p_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0) ret = t_ret; - if (n_pagep != NULL && (t_ret = memp_fput(dbp->mpf, - n_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0) + if (n_pagep != NULL && (t_ret = + mpf->put(mpf, n_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0) ret = t_ret; if (ret != 0) return (ret); - ret = __ham_c_chgpg(dbc, - chg_pgno, 0, hcp->pgno, hcp->indx); + if ((ret = __ham_c_delpg(dbc, + chg_pgno, hcp->pgno, hcp->indx, op, &order)) != 0) + return (ret); + hcp->order += order; } return (ret); err: /* Clean up any pages. */ if (n_pagep != NULL) - (void)memp_fput(dbp->mpf, n_pagep, 0); + (void)mpf->put(mpf, n_pagep, 0); if (nn_pagep != NULL) - (void)memp_fput(dbp->mpf, nn_pagep, 0); + (void)mpf->put(mpf, nn_pagep, 0); if (p_pagep != NULL) - (void)memp_fput(dbp->mpf, p_pagep, 0); + (void)mpf->put(mpf, p_pagep, 0); return (ret); } @@ -807,12 +828,13 @@ __ham_replpair(dbc, dbt, make_dup) u_int32_t make_dup; { DB *dbp; - HASH_CURSOR *hcp; DBT old_dbt, tdata, tmp; + DB_ENV *dbenv; DB_LSN new_lsn; + HASH_CURSOR *hcp; int32_t change; /* XXX: Possible overflow. */ - u_int32_t dup, len, memsize; - int is_big, ret, type; + u_int32_t dup_flag, len, memsize; + int beyond_eor, is_big, ret, type; u_int8_t *beg, *dest, *end, *hk, *src; void *memp; @@ -828,6 +850,7 @@ __ham_replpair(dbc, dbt, make_dup) * add. */ dbp = dbc->dbp; + dbenv = dbp->dbenv; hcp = (HASH_CURSOR *)dbc->internal; /* @@ -841,19 +864,21 @@ __ham_replpair(dbc, dbt, make_dup) */ change = dbt->size - dbt->dlen; - hk = H_PAIRDATA(hcp->page, hcp->indx); + hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); is_big = HPAGE_PTYPE(hk) == H_OFFPAGE; if (is_big) memcpy(&len, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); else - len = LEN_HKEYDATA(hcp->page, + len = LEN_HKEYDATA(dbp, hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx)); - if (dbt->doff + dbt->dlen > len) + beyond_eor = dbt->doff + dbt->dlen > len; + if (beyond_eor) change += dbt->doff + dbt->dlen - len; - if (change > (int32_t)P_FREESPACE(hcp->page) || is_big) { + if (change > (int32_t)P_FREESPACE(dbp, hcp->page) || + beyond_eor || is_big) { /* * Case 3 -- two subcases. * A. This is not really a partial operation, but an overwrite. @@ -868,16 +893,16 @@ __ham_replpair(dbc, dbt, make_dup) memset(&tmp, 0, sizeof(tmp)); if ((ret = __db_ret(dbp, hcp->page, H_KEYINDEX(hcp->indx), - &tmp, &dbc->rkey.data, &dbc->rkey.ulen)) != 0) + &tmp, &dbc->rkey->data, &dbc->rkey->ulen)) != 0) return (ret); /* Preserve duplicate info. */ - dup = F_ISSET(hcp, H_ISDUP); + dup_flag = F_ISSET(hcp, H_ISDUP); if (dbt->doff == 0 && dbt->dlen == len) { ret = __ham_del_pair(dbc, 0); if (ret == 0) ret = __ham_add_el(dbc, - &tmp, dbt, dup ? H_DUPLICATE : H_KEYDATA); + &tmp, dbt, dup_flag ? H_DUPLICATE : H_KEYDATA); } else { /* Case B */ type = HPAGE_PTYPE(hk) != H_OFFPAGE ? HPAGE_PTYPE(hk) : H_KEYDATA; @@ -891,15 +916,14 @@ __ham_replpair(dbc, dbt, make_dup) /* Now we can delete the item. */ if ((ret = __ham_del_pair(dbc, 0)) != 0) { - __os_free(memp, memsize); + __os_free(dbenv, memp); goto err; } /* Now shift old data around to make room for new. */ if (change > 0) { - if ((ret = __os_realloc(dbp->dbenv, - tdata.size + change, - NULL, &tdata.data)) != 0) + if ((ret = __os_realloc(dbenv, + tdata.size + change, &tdata.data)) != 0) return (ret); memp = tdata.data; memsize = tdata.size + change; @@ -920,9 +944,9 @@ __ham_replpair(dbc, dbt, make_dup) /* Now add the pair. */ ret = __ham_add_el(dbc, &tmp, &tdata, type); - __os_free(memp, memsize); + __os_free(dbenv, memp); } - F_SET(hcp, dup); + F_SET(hcp, dup_flag); err: return (ret); } @@ -930,7 +954,7 @@ err: return (ret); * Set up pointer into existing data. Do it before the log * message so we can use it inside of the log setup. */ - beg = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx)); + beg = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); beg += dbt->doff; /* @@ -938,20 +962,22 @@ err: return (ret); * all the parameters here. Then log the call before moving * anything around. */ - if (DB_LOGGING(dbc)) { + if (DBC_LOGGING(dbc)) { old_dbt.data = beg; old_dbt.size = dbt->dlen; - if ((ret = __ham_replace_log(dbp->dbenv, - dbc->txn, &new_lsn, 0, dbp->log_fileid, PGNO(hcp->page), + if ((ret = __ham_replace_log(dbp, + dbc->txn, &new_lsn, 0, PGNO(hcp->page), (u_int32_t)H_DATAINDEX(hcp->indx), &LSN(hcp->page), (u_int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0) return (ret); - LSN(hcp->page) = new_lsn; /* Structure assignment. */ - } + } else + LSN_NOT_LOGGED(new_lsn); + + LSN(hcp->page) = new_lsn; /* Structure assignment. */ - __ham_onpage_replace(hcp->page, dbp->pgsize, - (u_int32_t)H_DATAINDEX(hcp->indx), (int32_t)dbt->doff, change, dbt); + __ham_onpage_replace(dbp, hcp->page, (u_int32_t)H_DATAINDEX(hcp->indx), + (int32_t)dbt->doff, change, dbt); return (0); } @@ -967,34 +993,41 @@ err: return (ret); * off: Offset at which we are beginning the replacement. * change: the number of bytes (+ or -) that the element is growing/shrinking. * dbt: the new data that gets written at beg. - * PUBLIC: void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t, - * PUBLIC: int32_t, DBT *)); + * + * PUBLIC: void __ham_onpage_replace __P((DB *, PAGE *, u_int32_t, + * PUBLIC: int32_t, int32_t, DBT *)); */ void -__ham_onpage_replace(pagep, pgsize, ndx, off, change, dbt) +__ham_onpage_replace(dbp, pagep, ndx, off, change, dbt) + DB *dbp; PAGE *pagep; - size_t pgsize; u_int32_t ndx; int32_t off; int32_t change; DBT *dbt; { - db_indx_t i; + db_indx_t i, *inp; int32_t len; + size_t pgsize; u_int8_t *src, *dest; int zero_me; + pgsize = dbp->pgsize; + inp = P_INP(dbp, pagep); if (change != 0) { zero_me = 0; src = (u_int8_t *)(pagep) + HOFFSET(pagep); if (off < 0) - len = pagep->inp[ndx] - HOFFSET(pagep); - else if ((u_int32_t)off >= LEN_HKEYDATA(pagep, pgsize, ndx)) { - len = HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + - LEN_HKEYDATA(pagep, pgsize, ndx) - src; + len = inp[ndx] - HOFFSET(pagep); + else if ((u_int32_t)off >= + LEN_HKEYDATA(dbp, pagep, pgsize, ndx)) { + len = (int32_t)(HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + + LEN_HKEYDATA(dbp, pagep, pgsize, ndx) - src); zero_me = 1; } else - len = (HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off) - src; + len = (int32_t)( + (HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + off) - + src); dest = src - change; memmove(dest, src, len); if (zero_me) @@ -1002,14 +1035,14 @@ __ham_onpage_replace(pagep, pgsize, ndx, off, change, dbt) /* Now update the indices. */ for (i = ndx; i < NUM_ENT(pagep); i++) - pagep->inp[i] -= change; + inp[i] -= change; HOFFSET(pagep) -= change; } if (off >= 0) - memcpy(HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off, + memcpy(HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + off, dbt->data, dbt->size); else - memcpy(P_ENTRY(pagep, ndx), dbt->data, dbt->size); + memcpy(P_ENTRY(dbp, pagep, ndx), dbt->data, dbt->size); } /* @@ -1022,10 +1055,12 @@ __ham_split_page(dbc, obucket, nbucket) { DB *dbp; DBC **carray; - HASH_CURSOR *hcp, *cp; DBT key, page_dbt; DB_ENV *dbenv; + DB_LOCK block; DB_LSN new_lsn; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp, *cp; PAGE **pp, *old_pagep, *temp_pagep, *new_pagep; db_indx_t n; db_pgno_t bucket_pgno, npgno, next_pgno; @@ -1034,22 +1069,24 @@ __ham_split_page(dbc, obucket, nbucket) void *big_buf; dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; dbenv = dbp->dbenv; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; temp_pagep = old_pagep = new_pagep = NULL; - - if ((ret = __ham_get_clist(dbp, obucket, NDX_INVALID, &carray)) != 0) - return (ret); + carray = NULL; + LOCK_INIT(block); bucket_pgno = BUCKET_TO_PAGE(hcp, obucket); - if ((ret = memp_fget(dbp->mpf, + if ((ret = __db_lget(dbc, + 0, bucket_pgno, DB_LOCK_WRITE, 0, &block)) != 0) + goto err; + if ((ret = mpf->get(mpf, &bucket_pgno, DB_MPOOL_CREATE, &old_pagep)) != 0) goto err; /* Properly initialize the new bucket page. */ npgno = BUCKET_TO_PAGE(hcp, nbucket); - if ((ret = memp_fget(dbp->mpf, - &npgno, DB_MPOOL_CREATE, &new_pagep)) != 0) + if ((ret = mpf->get(mpf, &npgno, DB_MPOOL_CREATE, &new_pagep)) != 0) goto err; P_INIT(new_pagep, dbp->pgsize, npgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); @@ -1057,33 +1094,35 @@ __ham_split_page(dbc, obucket, nbucket) temp_pagep = hcp->split_buf; memcpy(temp_pagep, old_pagep, dbp->pgsize); - if (DB_LOGGING(dbc)) { + if (DBC_LOGGING(dbc)) { page_dbt.size = dbp->pgsize; page_dbt.data = old_pagep; - if ((ret = __ham_splitdata_log(dbenv, - dbc->txn, &new_lsn, 0, dbp->log_fileid, SPLITOLD, + if ((ret = __ham_splitdata_log(dbp, + dbc->txn, &new_lsn, 0, SPLITOLD, PGNO(old_pagep), &page_dbt, &LSN(old_pagep))) != 0) goto err; - } + } else + LSN_NOT_LOGGED(new_lsn); + + LSN(old_pagep) = new_lsn; /* Structure assignment. */ P_INIT(old_pagep, dbp->pgsize, PGNO(old_pagep), PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - if (DB_LOGGING(dbc)) - LSN(old_pagep) = new_lsn; /* Structure assignment. */ - big_len = 0; big_buf = NULL; key.flags = 0; while (temp_pagep != NULL) { + if ((ret = __ham_get_clist(dbp, + PGNO(temp_pagep), NDX_INVALID, &carray)) != 0) + goto err; + for (n = 0; n < (db_indx_t)NUM_ENT(temp_pagep); n += 2) { - if ((ret = - __db_ret(dbp, temp_pagep, H_KEYINDEX(n), - &key, &big_buf, &big_len)) != 0) + if ((ret = __db_ret(dbp, temp_pagep, + H_KEYINDEX(n), &key, &big_buf, &big_len)) != 0) goto err; - if (__ham_call_hash(dbc, key.data, key.size) - == obucket) + if (__ham_call_hash(dbc, key.data, key.size) == obucket) pp = &old_pagep; else pp = &new_pagep; @@ -1092,25 +1131,24 @@ __ham_split_page(dbc, obucket, nbucket) * Figure out how many bytes we need on the new * page to store the key/data pair. */ - - len = LEN_HITEM(temp_pagep, dbp->pgsize, + len = LEN_HITEM(dbp, temp_pagep, dbp->pgsize, H_DATAINDEX(n)) + - LEN_HITEM(temp_pagep, dbp->pgsize, + LEN_HITEM(dbp, temp_pagep, dbp->pgsize, H_KEYINDEX(n)) + 2 * sizeof(db_indx_t); - if (P_FREESPACE(*pp) < len) { - if (DB_LOGGING(dbc)) { + if (P_FREESPACE(dbp, *pp) < len) { + if (DBC_LOGGING(dbc)) { page_dbt.size = dbp->pgsize; page_dbt.data = *pp; - if ((ret = __ham_splitdata_log( - dbenv, dbc->txn, - &new_lsn, 0, dbp->log_fileid, + if ((ret = __ham_splitdata_log(dbp, + dbc->txn, &new_lsn, 0, SPLITNEW, PGNO(*pp), &page_dbt, &LSN(*pp))) != 0) goto err; - LSN(*pp) = new_lsn; - } + } else + LSN_NOT_LOGGED(new_lsn); + LSN(*pp) = new_lsn; if ((ret = __ham_add_ovflpage(dbc, *pp, 1, pp)) != 0) goto err; @@ -1122,28 +1160,25 @@ __ham_split_page(dbc, obucket, nbucket) for (i = 0; carray[i] != NULL; i++) { cp = (HASH_CURSOR *)carray[i]->internal; - if (cp->pgno == PGNO(temp_pagep) - && cp->indx == n) { + if (cp->pgno == PGNO(temp_pagep) && + cp->indx == n) { cp->pgno = PGNO(*pp); cp->indx = NUM_ENT(*pp); found = 1; } } - if (found && DB_LOGGING(dbc) - && IS_SUBTRANSACTION(dbc->txn)) { + if (found && DBC_LOGGING(dbc) && + IS_SUBTRANSACTION(dbc->txn)) { if ((ret = - __ham_chgpg_log(dbp->dbenv, + __ham_chgpg_log(dbp, dbc->txn, &new_lsn, 0, - dbp->log_fileid, DB_HAM_SPLIT, PGNO(temp_pagep), PGNO(*pp), n, NUM_ENT(*pp))) != 0) goto err; } } - __ham_copy_item(dbp->pgsize, - temp_pagep, H_KEYINDEX(n), *pp); - __ham_copy_item(dbp->pgsize, - temp_pagep, H_DATAINDEX(n), *pp); + __ham_copy_item(dbp, temp_pagep, H_KEYINDEX(n), *pp); + __ham_copy_item(dbp, temp_pagep, H_DATAINDEX(n), *pp); } next_pgno = NEXT_PGNO(temp_pagep); @@ -1156,23 +1191,30 @@ __ham_split_page(dbc, obucket, nbucket) if (next_pgno == PGNO_INVALID) temp_pagep = NULL; - else if ((ret = memp_fget(dbp->mpf, - &next_pgno, DB_MPOOL_CREATE, &temp_pagep)) != 0) + else if ((ret = mpf->get( + mpf, &next_pgno, DB_MPOOL_CREATE, &temp_pagep)) != 0) goto err; - if (temp_pagep != NULL && DB_LOGGING(dbc)) { - page_dbt.size = dbp->pgsize; - page_dbt.data = temp_pagep; - if ((ret = __ham_splitdata_log(dbenv, - dbc->txn, &new_lsn, 0, dbp->log_fileid, - SPLITOLD, PGNO(temp_pagep), - &page_dbt, &LSN(temp_pagep))) != 0) - goto err; + if (temp_pagep != NULL) { + if (DBC_LOGGING(dbc)) { + page_dbt.size = dbp->pgsize; + page_dbt.data = temp_pagep; + if ((ret = __ham_splitdata_log(dbp, + dbc->txn, &new_lsn, 0, + SPLITOLD, PGNO(temp_pagep), + &page_dbt, &LSN(temp_pagep))) != 0) + goto err; + } else + LSN_NOT_LOGGED(new_lsn); LSN(temp_pagep) = new_lsn; } + + if (carray != NULL) /* We never knew its size. */ + __os_free(dbenv, carray); + carray = NULL; } if (big_buf != NULL) - __os_free(big_buf, big_len); + __os_free(dbenv, big_buf); /* * If the original bucket spanned multiple pages, then we've got @@ -1188,37 +1230,43 @@ __ham_split_page(dbc, obucket, nbucket) /* * Write new buckets out. */ - if (DB_LOGGING(dbc)) { + if (DBC_LOGGING(dbc)) { page_dbt.size = dbp->pgsize; page_dbt.data = old_pagep; - if ((ret = __ham_splitdata_log(dbenv, dbc->txn, &new_lsn, 0, - dbp->log_fileid, SPLITNEW, PGNO(old_pagep), &page_dbt, + if ((ret = __ham_splitdata_log(dbp, dbc->txn, + &new_lsn, 0, SPLITNEW, PGNO(old_pagep), &page_dbt, &LSN(old_pagep))) != 0) goto err; LSN(old_pagep) = new_lsn; page_dbt.data = new_pagep; - if ((ret = __ham_splitdata_log(dbenv, dbc->txn, &new_lsn, 0, - dbp->log_fileid, SPLITNEW, PGNO(new_pagep), &page_dbt, + if ((ret = __ham_splitdata_log(dbp, dbc->txn, &new_lsn, 0, + SPLITNEW, PGNO(new_pagep), &page_dbt, &LSN(new_pagep))) != 0) goto err; LSN(new_pagep) = new_lsn; + } else { + LSN_NOT_LOGGED(LSN(old_pagep)); + LSN_NOT_LOGGED(LSN(new_pagep)); } - ret = memp_fput(dbp->mpf, old_pagep, DB_MPOOL_DIRTY); - if ((t_ret = memp_fput(dbp->mpf, new_pagep, DB_MPOOL_DIRTY)) != 0 - && ret == 0) + + ret = mpf->put(mpf, old_pagep, DB_MPOOL_DIRTY); + if ((t_ret = + mpf->put(mpf, new_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0) ret = t_ret; if (0) { err: if (old_pagep != NULL) - (void)memp_fput(dbp->mpf, old_pagep, DB_MPOOL_DIRTY); + (void)mpf->put(mpf, old_pagep, DB_MPOOL_DIRTY); if (new_pagep != NULL) - (void)memp_fput(dbp->mpf, new_pagep, DB_MPOOL_DIRTY); + (void)mpf->put(mpf, new_pagep, DB_MPOOL_DIRTY); if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno) - (void)memp_fput(dbp->mpf, temp_pagep, DB_MPOOL_DIRTY); + (void)mpf->put(mpf, temp_pagep, DB_MPOOL_DIRTY); } + if (LOCK_ISSET(block)) + __TLPUT(dbc, block); if (carray != NULL) /* We never knew its size. */ - __os_free(carray, 0); + __os_free(dbenv, carray); return (ret); } @@ -1237,11 +1285,12 @@ __ham_add_el(dbc, key, val, type) const DBT *key, *val; int type; { - DB *dbp; - HASH_CURSOR *hcp; const DBT *pkey, *pdata; + DB *dbp; DBT key_dbt, data_dbt; DB_LSN new_lsn; + DB_MPOOLFILE *mpf; + HASH_CURSOR *hcp; HOFFPAGE doff, koff; db_pgno_t next_pgno, pgno; u_int32_t data_size, key_size, pairsize, rectype; @@ -1249,13 +1298,14 @@ __ham_add_el(dbc, key, val, type) int key_type, data_type; dbp = dbc->dbp; + mpf = dbp->mpf; hcp = (HASH_CURSOR *)dbc->internal; do_expand = 0; - pgno = hcp->seek_found_page != PGNO_INVALID ? hcp->seek_found_page : - hcp->pgno; - if (hcp->page == NULL && (ret = memp_fget(dbp->mpf, &pgno, - DB_MPOOL_CREATE, &hcp->page)) != 0) + pgno = hcp->seek_found_page != PGNO_INVALID ? + hcp->seek_found_page : hcp->pgno; + if (hcp->page == NULL && + (ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &hcp->page)) != 0) return (ret); key_size = HKEYDATA_PSIZE(key->size); @@ -1276,21 +1326,20 @@ __ham_add_el(dbc, key, val, type) * anyway. Check if it's a bigpair that fits or a regular * pair that fits. */ - if (P_FREESPACE(hcp->page) >= pairsize) + if (P_FREESPACE(dbp, hcp->page) >= pairsize) break; next_pgno = NEXT_PGNO(hcp->page); - if ((ret = - __ham_next_cpage(dbc, next_pgno, 0)) != 0) + if ((ret = __ham_next_cpage(dbc, next_pgno, 0)) != 0) return (ret); } /* * Check if we need to allocate a new page. */ - if (P_FREESPACE(hcp->page) < pairsize) { + if (P_FREESPACE(dbp, hcp->page) < pairsize) { do_expand = 1; if ((ret = __ham_add_ovflpage(dbc, - (PAGE *)hcp->page, 1, (PAGE **)&hcp->page)) != 0) + (PAGE *)hcp->page, 1, (PAGE **)&hcp->page)) != 0) return (ret); hcp->pgno = PGNO(hcp->page); } @@ -1334,7 +1383,7 @@ __ham_add_el(dbc, key, val, type) data_type = type; } - if (DB_LOGGING(dbc)) { + if (DBC_LOGGING(dbc)) { rectype = PUTPAIR; if (is_databig) rectype |= PAIR_DATAMASK; @@ -1343,18 +1392,18 @@ __ham_add_el(dbc, key, val, type) if (type == H_DUPLICATE) rectype |= PAIR_DUPMASK; - if ((ret = __ham_insdel_log(dbp->dbenv, dbc->txn, &new_lsn, 0, - rectype, dbp->log_fileid, PGNO(hcp->page), - (u_int32_t)NUM_ENT(hcp->page), &LSN(hcp->page), pkey, - pdata)) != 0) + if ((ret = __ham_insdel_log(dbp, dbc->txn, &new_lsn, 0, + rectype, PGNO(hcp->page), (u_int32_t)NUM_ENT(hcp->page), + &LSN(hcp->page), pkey, pdata)) != 0) return (ret); + } else + LSN_NOT_LOGGED(new_lsn); - /* Move lsn onto page. */ - LSN(hcp->page) = new_lsn; /* Structure assignment. */ - } + /* Move lsn onto page. */ + LSN(hcp->page) = new_lsn; /* Structure assignment. */ - __ham_putitem(hcp->page, pkey, key_type); - __ham_putitem(hcp->page, pdata, data_type); + __ham_putitem(dbp, hcp->page, pkey, key_type); + __ham_putitem(dbp, hcp->page, pdata, data_type); /* * For splits, we are going to update item_info's page number @@ -1369,8 +1418,11 @@ __ham_add_el(dbc, key, val, type) * XXX * Maybe keep incremental numbers here. */ - if (!STD_LOCKING(dbc)) + if (!STD_LOCKING(dbc)) { hcp->hdr->nelem++; + if ((ret = __ham_dirty_meta(dbc)) != 0) + return (ret); + } if (do_expand || (hcp->hdr->ffactor != 0 && (u_int32_t)H_NUMPAIRS(hcp->page) > hcp->hdr->ffactor)) @@ -1384,28 +1436,32 @@ __ham_add_el(dbc, key, val, type) * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we * do not need to do any logging here. * - * PUBLIC: void __ham_copy_item __P((size_t, PAGE *, u_int32_t, PAGE *)); + * PUBLIC: void __ham_copy_item __P((DB *, PAGE *, u_int32_t, PAGE *)); */ void -__ham_copy_item(pgsize, src_page, src_ndx, dest_page) - size_t pgsize; +__ham_copy_item(dbp, src_page, src_ndx, dest_page) + DB *dbp; PAGE *src_page; u_int32_t src_ndx; PAGE *dest_page; { u_int32_t len; + size_t pgsize; void *src, *dest; + db_indx_t *inp; + pgsize = dbp->pgsize; + inp = P_INP(dbp, dest_page); /* * Copy the key and data entries onto this new page. */ - src = P_ENTRY(src_page, src_ndx); + src = P_ENTRY(dbp, src_page, src_ndx); /* Set up space on dest. */ - len = LEN_HITEM(src_page, pgsize, src_ndx); + len = (u_int32_t)LEN_HITEM(dbp, src_page, pgsize, src_ndx); HOFFSET(dest_page) -= len; - dest_page->inp[NUM_ENT(dest_page)] = HOFFSET(dest_page); - dest = P_ENTRY(dest_page, NUM_ENT(dest_page)); + inp[NUM_ENT(dest_page)] = HOFFSET(dest_page); + dest = P_ENTRY(dbp, dest_page, NUM_ENT(dest_page)); NUM_ENT(dest_page)++; memcpy(dest, src, len); @@ -1414,8 +1470,8 @@ __ham_copy_item(pgsize, src_page, src_ndx, dest_page) /* * * Returns: - * pointer on success - * NULL on error + * pointer on success + * NULL on error * * PUBLIC: int __ham_add_ovflpage __P((DBC *, PAGE *, int, PAGE **)); */ @@ -1427,31 +1483,33 @@ __ham_add_ovflpage(dbc, pagep, release, pp) PAGE **pp; { DB *dbp; - HASH_CURSOR *hcp; DB_LSN new_lsn; + DB_MPOOLFILE *mpf; PAGE *new_pagep; int ret; dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; + mpf = dbp->mpf; if ((ret = __db_new(dbc, P_HASH, &new_pagep)) != 0) return (ret); - if (DB_LOGGING(dbc)) { - if ((ret = __ham_newpage_log(dbp->dbenv, dbc->txn, &new_lsn, 0, - PUTOVFL, dbp->log_fileid, PGNO(pagep), &LSN(pagep), + if (DBC_LOGGING(dbc)) { + if ((ret = __ham_newpage_log(dbp, dbc->txn, &new_lsn, 0, + PUTOVFL, PGNO(pagep), &LSN(pagep), PGNO(new_pagep), &LSN(new_pagep), PGNO_INVALID, NULL)) != 0) return (ret); + } else + LSN_NOT_LOGGED(new_lsn); - /* Move lsn onto page. */ - LSN(pagep) = LSN(new_pagep) = new_lsn; - } + /* Move lsn onto page. */ + LSN(pagep) = LSN(new_pagep) = new_lsn; NEXT_PGNO(pagep) = PGNO(new_pagep); + PREV_PGNO(new_pagep) = PGNO(pagep); if (release) - ret = memp_fput(dbp->mpf, pagep, DB_MPOOL_DIRTY); + ret = mpf->put(mpf, pagep, DB_MPOOL_DIRTY); *pp = new_pagep; return (ret); @@ -1467,10 +1525,12 @@ __ham_get_cpage(dbc, mode) { DB *dbp; DB_LOCK tmp_lock; + DB_MPOOLFILE *mpf; HASH_CURSOR *hcp; int ret; dbp = dbc->dbp; + mpf = dbp->mpf; hcp = (HASH_CURSOR *)dbc->internal; ret = 0; @@ -1485,25 +1545,22 @@ __ham_get_cpage(dbc, mode) * 4. If there is a lock, but it's for a different bucket, then we need * to release the existing lock and get a new lock. */ - tmp_lock.off = LOCK_INVALID; + LOCK_INIT(tmp_lock); if (STD_LOCKING(dbc)) { - if (hcp->lock.off != LOCK_INVALID && - hcp->lbucket != hcp->bucket) { /* Case 4 */ - if (dbc->txn == NULL && - (ret = lock_put(dbp->dbenv, &hcp->lock)) != 0) - return (ret); - hcp->lock.off = LOCK_INVALID; - } - if ((hcp->lock.off != LOCK_INVALID && + if (hcp->lbucket != hcp->bucket && /* Case 4 */ + (ret = __TLPUT(dbc, hcp->lock)) != 0) + return (ret); + + if ((LOCK_ISSET(hcp->lock) && (hcp->lock_mode == DB_LOCK_READ && mode == DB_LOCK_WRITE))) { /* Case 3. */ tmp_lock = hcp->lock; - hcp->lock.off = LOCK_INVALID; + LOCK_INIT(hcp->lock); } /* Acquire the lock. */ - if (hcp->lock.off == LOCK_INVALID) + if (!LOCK_ISSET(hcp->lock)) /* Cases 1, 3, and 4. */ if ((ret = __ham_lock_bucket(dbc, mode)) != 0) return (ret); @@ -1511,17 +1568,18 @@ __ham_get_cpage(dbc, mode) if (ret == 0) { hcp->lock_mode = mode; hcp->lbucket = hcp->bucket; - if (tmp_lock.off != LOCK_INVALID) + if (LOCK_ISSET(tmp_lock)) /* Case 3: release the original lock. */ - ret = lock_put(dbp->dbenv, &tmp_lock); - } else if (tmp_lock.off != LOCK_INVALID) + ret = + dbp->dbenv->lock_put(dbp->dbenv, &tmp_lock); + } else if (LOCK_ISSET(tmp_lock)) hcp->lock = tmp_lock; } if (ret == 0 && hcp->page == NULL) { if (hcp->pgno == PGNO_INVALID) hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - if ((ret = memp_fget(dbp->mpf, + if ((ret = mpf->get(mpf, &hcp->pgno, DB_MPOOL_CREATE, &hcp->page)) != 0) return (ret); } @@ -1543,18 +1601,21 @@ __ham_next_cpage(dbc, pgno, dirty) int dirty; { DB *dbp; + DB_MPOOLFILE *mpf; HASH_CURSOR *hcp; PAGE *p; int ret; dbp = dbc->dbp; + mpf = dbp->mpf; hcp = (HASH_CURSOR *)dbc->internal; - if (hcp->page != NULL && (ret = memp_fput(dbp->mpf, - hcp->page, dirty ? DB_MPOOL_DIRTY : 0)) != 0) + if (hcp->page != NULL && + (ret = mpf->put(mpf, hcp->page, dirty ? DB_MPOOL_DIRTY : 0)) != 0) return (ret); + hcp->page = NULL; - if ((ret = memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, &p)) != 0) + if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &p)) != 0) return (ret); hcp->page = p; @@ -1576,7 +1637,7 @@ __ham_lock_bucket(dbc, mode) db_lockmode_t mode; { HASH_CURSOR *hcp; - u_int32_t flags; + db_pgno_t pgno; int gotmeta, ret; hcp = (HASH_CURSOR *)dbc->internal; @@ -1584,17 +1645,12 @@ __ham_lock_bucket(dbc, mode) if (gotmeta) if ((ret = __ham_get_meta(dbc)) != 0) return (ret); - dbc->lock.pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); + pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); if (gotmeta) if ((ret = __ham_release_meta(dbc)) != 0) return (ret); - flags = 0; - if (DB_NONBLOCK(dbc)) - LF_SET(DB_LOCK_NOWAIT); - - ret = lock_get(dbc->dbp->dbenv, - dbc->locker, flags, &dbc->lock_dbt, mode, &hcp->lock); + ret = __db_lget(dbc, 0, pgno, mode, 0, &hcp->lock); hcp->lock_mode = mode; return (ret); @@ -1606,6 +1662,9 @@ __ham_lock_bucket(dbc, mode) * represents. The caller is responsible for freeing up duplicates * or offpage entries that might be referenced by this pair. * + * Recovery assumes that this may be called without the metadata + * page pinned. + * * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t)); */ void @@ -1614,15 +1673,16 @@ __ham_dpair(dbp, p, indx) PAGE *p; u_int32_t indx; { - db_indx_t delta, n; + db_indx_t delta, n, *inp; u_int8_t *dest, *src; + inp = P_INP(dbp, p); /* * Compute "delta", the amount we have to shift all of the * offsets. To find the delta, we just need to calculate * the size of the pair of elements we are removing. */ - delta = H_PAIRSIZE(p, dbp->pgsize, indx); + delta = H_PAIRSIZE(dbp, p, dbp->pgsize, indx); /* * The hard case: we want to remove something other than @@ -1641,7 +1701,7 @@ __ham_dpair(dbp, p, indx) * be an overlapping copy, so we have to use memmove. */ dest = src + delta; - memmove(dest, src, p->inp[H_DATAINDEX(indx)] - HOFFSET(p)); + memmove(dest, src, inp[H_DATAINDEX(indx)] - HOFFSET(p)); } /* Adjust page metadata. */ @@ -1650,6 +1710,153 @@ __ham_dpair(dbp, p, indx) /* Adjust the offsets. */ for (n = (db_indx_t)indx; n < (db_indx_t)(NUM_ENT(p)); n++) - p->inp[n] = p->inp[n + 2] + delta; + inp[n] = inp[n + 2] + delta; + +} + +/* + * __ham_c_delpg -- + * + * Adjust the cursors after we've emptied a page in a bucket, taking + * care that when we move cursors pointing to deleted items, their + * orders don't collide with the orders of cursors on the page we move + * them to (since after this function is called, cursors with the same + * index on the two pages will be otherwise indistinguishable--they'll + * all have pgno new_pgno). There are three cases: + * + * 1) The emptied page is the first page in the bucket. In this + * case, we've copied all the items from the second page into the + * first page, so the first page is new_pgno and the second page is + * old_pgno. new_pgno is empty, but can have deleted cursors + * pointing at indx 0, so we need to be careful of the orders + * there. This is DB_HAM_DELFIRSTPG. + * + * 2) The page is somewhere in the middle of a bucket. Our caller + * can just delete such a page, so it's old_pgno. old_pgno is + * empty, but may have deleted cursors pointing at indx 0, so we + * need to be careful of indx 0 when we move those cursors to + * new_pgno. This is DB_HAM_DELMIDPG. + * + * 3) The page is the last in a bucket. Again the empty page is + * old_pgno, and again it should only have cursors that are deleted + * and at indx == 0. This time, though, there's no next page to + * move them to, so we set them to indx == num_ent on the previous + * page--and indx == num_ent is the index whose cursors we need to + * be careful of. This is DB_HAM_DELLASTPG. + */ +static int +__ham_c_delpg(dbc, old_pgno, new_pgno, num_ent, op, orderp) + DBC *dbc; + db_pgno_t old_pgno, new_pgno; + u_int32_t num_ent; + db_ham_mode op; + u_int32_t *orderp; +{ + DB *dbp, *ldbp; + DB_ENV *dbenv; + DB_LSN lsn; + DB_TXN *my_txn; + DBC *cp; + HASH_CURSOR *hcp; + int found, ret; + db_indx_t indx; + u_int32_t order; + + /* Which is the worrisome index? */ + indx = (op == DB_HAM_DELLASTPG) ? num_ent : 0; + dbp = dbc->dbp; + dbenv = dbp->dbenv; + + my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL; + found = 0; + + MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp); + /* + * Find the highest order of any cursor our movement + * may collide with. + */ + order = 1; + for (ldbp = __dblist_get(dbenv, dbp->adj_fileid); + ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; + ldbp = LIST_NEXT(ldbp, dblistlinks)) { + MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); + for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL; + cp = TAILQ_NEXT(cp, links)) { + if (cp == dbc || cp->dbtype != DB_HASH) + continue; + hcp = (HASH_CURSOR *)cp->internal; + if (hcp->pgno == new_pgno) { + if (hcp->indx == indx && + F_ISSET(hcp, H_DELETED) && + hcp->order >= order) + order = hcp->order + 1; + DB_ASSERT(op != DB_HAM_DELFIRSTPG || + hcp->indx == NDX_INVALID || + (hcp->indx == 0 && + F_ISSET(hcp, H_DELETED))); + } + } + MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); + } + + for (ldbp = __dblist_get(dbenv, dbp->adj_fileid); + ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; + ldbp = LIST_NEXT(ldbp, dblistlinks)) { + MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); + for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL; + cp = TAILQ_NEXT(cp, links)) { + if (cp == dbc || cp->dbtype != DB_HASH) + continue; + + hcp = (HASH_CURSOR *)cp->internal; + + if (hcp->pgno == old_pgno) { + switch (op) { + case DB_HAM_DELFIRSTPG: + /* + * We're moving all items, + * regardless of index. + */ + hcp->pgno = new_pgno; + + /* + * But we have to be careful of + * the order values. + */ + if (hcp->indx == indx) + hcp->order += order; + break; + case DB_HAM_DELMIDPG: + hcp->pgno = new_pgno; + DB_ASSERT(hcp->indx == 0 && + F_ISSET(hcp, H_DELETED)); + hcp->order += order; + break; + case DB_HAM_DELLASTPG: + hcp->pgno = new_pgno; + DB_ASSERT(hcp->indx == 0 && + F_ISSET(hcp, H_DELETED)); + hcp->indx = indx; + hcp->order += order; + break; + default: + DB_ASSERT(0); + return (__db_panic(dbenv, EINVAL)); + } + if (my_txn != NULL && cp->txn != my_txn) + found = 1; + } + } + MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); + } + MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); + + if (found != 0 && DBC_LOGGING(dbc)) { + if ((ret = __ham_chgpg_log(dbp, my_txn, &lsn, 0, op, + old_pgno, new_pgno, indx, order)) != 0) + return (ret); + } + *orderp = order; + return (0); } diff --git a/bdb/hash/hash_rec.c b/bdb/hash/hash_rec.c index ded58c281e9..24d3473c508 100644 --- a/bdb/hash/hash_rec.c +++ b/bdb/hash/hash_rec.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ /* @@ -43,7 +43,7 @@ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: hash_rec.c,v 11.34 2001/01/11 18:19:52 bostic Exp $"; +static const char revid[] = "$Id: hash_rec.c,v 11.69 2002/09/03 14:12:49 margo Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -53,15 +53,12 @@ static const char revid[] = "$Id: hash_rec.c,v 11.34 2001/01/11 18:19:52 bostic #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "btree.h" -#include "hash.h" -#include "lock.h" -#include "log.h" -#include "mp.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/log.h" -static int __ham_alloc_pages __P((DB *, __ham_groupalloc_args *)); +static int __ham_alloc_pages __P((DB *, __ham_groupalloc_args *, DB_LSN *)); /* * __ham_insdel_recover -- @@ -82,16 +79,16 @@ __ham_insdel_recover(dbenv, dbtp, lsnp, op, info) DBC *dbc; DB_MPOOLFILE *mpf; PAGE *pagep; - u_int32_t opcode; - int cmp_n, cmp_p, flags, getmeta, ret, type; + u_int32_t flags, opcode; + int cmp_n, cmp_p, ret, type; + pagep = NULL; COMPQUIET(info, NULL); - getmeta = 0; REC_PRINT(__ham_insdel_print); REC_INTRO(__ham_insdel_read, 1); - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) { if (DB_UNDO(op)) { /* * We are undoing and the page doesn't exist. That @@ -100,15 +97,11 @@ __ham_insdel_recover(dbenv, dbtp, lsnp, op, info) * don't bother creating a page. */ goto done; - } else if ((ret = memp_fget(mpf, &argp->pgno, - DB_MPOOL_CREATE, &pagep)) != 0) + } else if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) goto out; } - if ((ret = __ham_get_meta(dbc)) != 0) - goto out; - getmeta = 1; - cmp_n = log_compare(lsnp, &LSN(pagep)); cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn); @@ -135,7 +128,7 @@ __ham_insdel_recover(dbenv, dbtp, lsnp, op, info) */ if (opcode != DELPAIR || argp->ndx == (u_int32_t)NUM_ENT(pagep)) { - __ham_putitem(pagep, &argp->key, + __ham_putitem(file_dbp, pagep, &argp->key, DB_UNDO(op) || PAIR_ISKEYBIG(argp->opcode) ? H_OFFPAGE : H_KEYDATA); @@ -145,31 +138,32 @@ __ham_insdel_recover(dbenv, dbtp, lsnp, op, info) type = H_OFFPAGE; else type = H_KEYDATA; - __ham_putitem(pagep, &argp->data, type); + __ham_putitem(file_dbp, pagep, &argp->data, type); } else - (void)__ham_reputpair(pagep, file_dbp->pgsize, + (void)__ham_reputpair(file_dbp, pagep, argp->ndx, &argp->key, &argp->data); LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; flags = DB_MPOOL_DIRTY; - } else if ((opcode == DELPAIR && cmp_p == 0 && DB_REDO(op)) - || (opcode == PUTPAIR && cmp_n == 0 && DB_UNDO(op))) { + } else if ((opcode == DELPAIR && cmp_p == 0 && DB_REDO(op)) || + (opcode == PUTPAIR && cmp_n == 0 && DB_UNDO(op))) { /* Need to undo a put or redo a delete. */ __ham_dpair(file_dbp, pagep, argp->ndx); LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; flags = DB_MPOOL_DIRTY; } - if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0) + if ((ret = mpf->put(mpf, pagep, flags)) != 0) goto out; + pagep = NULL; /* Return the previous LSN. */ done: *lsnp = argp->prev_lsn; ret = 0; -out: if (getmeta) - (void)__ham_release_meta(dbc); +out: if (pagep != NULL) + (void)mpf->put(mpf, pagep, 0); REC_CLOSE; } @@ -194,15 +188,16 @@ __ham_newpage_recover(dbenv, dbtp, lsnp, op, info) DBC *dbc; DB_MPOOLFILE *mpf; PAGE *pagep; - int cmp_n, cmp_p, flags, getmeta, ret; + u_int32_t flags; + int cmp_n, cmp_p, ret; + pagep = NULL; COMPQUIET(info, NULL); - getmeta = 0; REC_PRINT(__ham_newpage_print); REC_INTRO(__ham_newpage_read, 1); - if ((ret = memp_fget(mpf, &argp->new_pgno, 0, &pagep)) != 0) { + if ((ret = mpf->get(mpf, &argp->new_pgno, 0, &pagep)) != 0) { if (DB_UNDO(op)) { /* * We are undoing and the page doesn't exist. That @@ -212,15 +207,11 @@ __ham_newpage_recover(dbenv, dbtp, lsnp, op, info) */ ret = 0; goto ppage; - } else if ((ret = memp_fget(mpf, &argp->new_pgno, - DB_MPOOL_CREATE, &pagep)) != 0) + } else if ((ret = mpf->get(mpf, + &argp->new_pgno, DB_MPOOL_CREATE, &pagep)) != 0) goto out; } - if ((ret = __ham_get_meta(dbc)) != 0) - goto out; - getmeta = 1; - /* * There are potentially three pages we need to check: the one * that we created/deleted, the one before it and the one after @@ -250,12 +241,13 @@ __ham_newpage_recover(dbenv, dbtp, lsnp, op, info) if (flags) LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; - if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0) + if ((ret = mpf->put(mpf, pagep, flags)) != 0) goto out; + pagep = NULL; /* Now do the prev page. */ ppage: if (argp->prev_pgno != PGNO_INVALID) { - if ((ret = memp_fget(mpf, &argp->prev_pgno, 0, &pagep)) != 0) { + if ((ret = mpf->get(mpf, &argp->prev_pgno, 0, &pagep)) != 0) { if (DB_UNDO(op)) { /* * We are undoing and the page doesn't exist. @@ -265,9 +257,8 @@ ppage: if (argp->prev_pgno != PGNO_INVALID) { */ ret = 0; goto npage; - } else if ((ret = - memp_fget(mpf, &argp->prev_pgno, - DB_MPOOL_CREATE, &pagep)) != 0) + } else if ((ret = mpf->get(mpf, + &argp->prev_pgno, DB_MPOOL_CREATE, &pagep)) != 0) goto out; } @@ -281,7 +272,8 @@ ppage: if (argp->prev_pgno != PGNO_INVALID) { /* Redo a create new page or undo a delete new page. */ pagep->next_pgno = argp->new_pgno; flags = DB_MPOOL_DIRTY; - } else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) || + } else if ((cmp_p == 0 && + DB_REDO(op) && argp->opcode == DELOVFL) || (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) { /* Redo a delete or undo a create new page. */ pagep->next_pgno = argp->next_pgno; @@ -291,13 +283,14 @@ ppage: if (argp->prev_pgno != PGNO_INVALID) { if (flags) LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn; - if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0) + if ((ret = mpf->put(mpf, pagep, flags)) != 0) goto out; + pagep = NULL; } /* Now time to do the next page */ npage: if (argp->next_pgno != PGNO_INVALID) { - if ((ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep)) != 0) { + if ((ret = mpf->get(mpf, &argp->next_pgno, 0, &pagep)) != 0) { if (DB_UNDO(op)) { /* * We are undoing and the page doesn't exist. @@ -306,9 +299,8 @@ npage: if (argp->next_pgno != PGNO_INVALID) { * this case, don't bother creating a page. */ goto done; - } else if ((ret = - memp_fget(mpf, &argp->next_pgno, - DB_MPOOL_CREATE, &pagep)) != 0) + } else if ((ret = mpf->get(mpf, + &argp->next_pgno, DB_MPOOL_CREATE, &pagep)) != 0) goto out; } @@ -322,7 +314,8 @@ npage: if (argp->next_pgno != PGNO_INVALID) { /* Redo a create new page or undo a delete new page. */ pagep->prev_pgno = argp->new_pgno; flags = DB_MPOOL_DIRTY; - } else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) || + } else if ((cmp_p == 0 && + DB_REDO(op) && argp->opcode == DELOVFL) || (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) { /* Redo a delete or undo a create new page. */ pagep->prev_pgno = argp->prev_pgno; @@ -332,14 +325,15 @@ npage: if (argp->next_pgno != PGNO_INVALID) { if (flags) LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn; - if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0) + if ((ret = mpf->put(mpf, pagep, flags)) != 0) goto out; + pagep = NULL; } done: *lsnp = argp->prev_lsn; ret = 0; -out: if (getmeta) - (void)__ham_release_meta(dbc); +out: if (pagep != NULL) + (void)mpf->put(mpf, pagep, 0); REC_CLOSE; } @@ -366,17 +360,18 @@ __ham_replace_recover(dbenv, dbtp, lsnp, op, info) DB_MPOOLFILE *mpf; DBT dbt; PAGE *pagep; + u_int32_t flags; int32_t grow; - int cmp_n, cmp_p, flags, getmeta, ret; + int cmp_n, cmp_p, ret; u_int8_t *hk; + pagep = NULL; COMPQUIET(info, NULL); - getmeta = 0; REC_PRINT(__ham_replace_print); REC_INTRO(__ham_replace_read, 1); - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) { if (DB_UNDO(op)) { /* * We are undoing and the page doesn't exist. That @@ -385,15 +380,11 @@ __ham_replace_recover(dbenv, dbtp, lsnp, op, info) * don't bother creating a page. */ goto done; - } else if ((ret = memp_fget(mpf, &argp->pgno, - DB_MPOOL_CREATE, &pagep)) != 0) + } else if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) goto out; } - if ((ret = __ham_get_meta(dbc)) != 0) - goto out; - getmeta = 1; - cmp_n = log_compare(lsnp, &LSN(pagep)); cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn); @@ -419,10 +410,10 @@ __ham_replace_recover(dbenv, dbtp, lsnp, op, info) } if (flags) { - __ham_onpage_replace(pagep, - file_dbp->pgsize, argp->ndx, argp->off, grow, &dbt); + __ham_onpage_replace(file_dbp, pagep, + argp->ndx, argp->off, grow, &dbt); if (argp->makedup) { - hk = P_ENTRY(pagep, argp->ndx); + hk = P_ENTRY(file_dbp, pagep, argp->ndx); if (DB_REDO(op)) HPAGE_PTYPE(hk) = H_DUPLICATE; else @@ -430,14 +421,15 @@ __ham_replace_recover(dbenv, dbtp, lsnp, op, info) } } - if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0) + if ((ret = mpf->put(mpf, pagep, flags)) != 0) goto out; + pagep = NULL; done: *lsnp = argp->prev_lsn; ret = 0; -out: if (getmeta) - (void)__ham_release_meta(dbc); +out: if (pagep != NULL) + (void)mpf->put(mpf, pagep, 0); REC_CLOSE; } @@ -460,15 +452,16 @@ __ham_splitdata_recover(dbenv, dbtp, lsnp, op, info) DBC *dbc; DB_MPOOLFILE *mpf; PAGE *pagep; - int cmp_n, cmp_p, flags, getmeta, ret; + u_int32_t flags; + int cmp_n, cmp_p, ret; + pagep = NULL; COMPQUIET(info, NULL); - getmeta = 0; REC_PRINT(__ham_splitdata_print); REC_INTRO(__ham_splitdata_read, 1); - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) { if (DB_UNDO(op)) { /* * We are undoing and the page doesn't exist. That @@ -477,15 +470,11 @@ __ham_splitdata_recover(dbenv, dbtp, lsnp, op, info) * don't bother creating a page. */ goto done; - } else if ((ret = memp_fget(mpf, &argp->pgno, - DB_MPOOL_CREATE, &pagep)) != 0) + } else if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) goto out; } - if ((ret = __ham_get_meta(dbc)) != 0) - goto out; - getmeta = 1; - cmp_n = log_compare(lsnp, &LSN(pagep)); cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn); @@ -519,14 +508,15 @@ __ham_splitdata_recover(dbenv, dbtp, lsnp, op, info) LSN(pagep) = argp->pagelsn; flags = DB_MPOOL_DIRTY; } - if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0) + if ((ret = mpf->put(mpf, pagep, flags)) != 0) goto out; + pagep = NULL; done: *lsnp = argp->prev_lsn; ret = 0; -out: if (getmeta) - (void)__ham_release_meta(dbc); +out: if (pagep != NULL) + (void)mpf->put(mpf, pagep, 0); REC_CLOSE; } @@ -550,21 +540,19 @@ __ham_copypage_recover(dbenv, dbtp, lsnp, op, info) DBC *dbc; DB_MPOOLFILE *mpf; PAGE *pagep; - int cmp_n, cmp_p, flags, getmeta, ret; + u_int32_t flags; + int cmp_n, cmp_p, ret; + pagep = NULL; COMPQUIET(info, NULL); - getmeta = 0; REC_PRINT(__ham_copypage_print); REC_INTRO(__ham_copypage_read, 1); - if ((ret = __ham_get_meta(dbc)) != 0) - goto out; - getmeta = 1; flags = 0; /* This is the bucket page. */ - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) { if (DB_UNDO(op)) { /* * We are undoing and the page doesn't exist. That @@ -574,8 +562,8 @@ __ham_copypage_recover(dbenv, dbtp, lsnp, op, info) */ ret = 0; goto donext; - } else if ((ret = memp_fget(mpf, &argp->pgno, - DB_MPOOL_CREATE, &pagep)) != 0) + } else if ((ret = mpf->get(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) goto out; } @@ -597,11 +585,12 @@ __ham_copypage_recover(dbenv, dbtp, lsnp, op, info) LSN(pagep) = argp->pagelsn; flags = DB_MPOOL_DIRTY; } - if ((ret = memp_fput(mpf, pagep, flags)) != 0) + if ((ret = mpf->put(mpf, pagep, flags)) != 0) goto out; + pagep = NULL; donext: /* Now fix up the "next" page. */ - if ((ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep)) != 0) { + if ((ret = mpf->get(mpf, &argp->next_pgno, 0, &pagep)) != 0) { if (DB_UNDO(op)) { /* * We are undoing and the page doesn't exist. That @@ -611,8 +600,8 @@ donext: /* Now fix up the "next" page. */ */ ret = 0; goto do_nn; - } else if ((ret = memp_fget(mpf, &argp->next_pgno, - DB_MPOOL_CREATE, &pagep)) != 0) + } else if ((ret = mpf->get(mpf, + &argp->next_pgno, DB_MPOOL_CREATE, &pagep)) != 0) goto out; } @@ -629,14 +618,15 @@ donext: /* Now fix up the "next" page. */ memcpy(pagep, argp->page.data, argp->page.size); flags = DB_MPOOL_DIRTY; } - if ((ret = memp_fput(mpf, pagep, flags)) != 0) + if ((ret = mpf->put(mpf, pagep, flags)) != 0) goto out; + pagep = NULL; /* Now fix up the next's next page. */ do_nn: if (argp->nnext_pgno == PGNO_INVALID) goto done; - if ((ret = memp_fget(mpf, &argp->nnext_pgno, 0, &pagep)) != 0) { + if ((ret = mpf->get(mpf, &argp->nnext_pgno, 0, &pagep)) != 0) { if (DB_UNDO(op)) { /* * We are undoing and the page doesn't exist. That @@ -645,8 +635,8 @@ do_nn: if (argp->nnext_pgno == PGNO_INVALID) * don't bother creating a page. */ goto done; - } else if ((ret = memp_fget(mpf, &argp->nnext_pgno, - DB_MPOOL_CREATE, &pagep)) != 0) + } else if ((ret = mpf->get(mpf, + &argp->nnext_pgno, DB_MPOOL_CREATE, &pagep)) != 0) goto out; } @@ -666,14 +656,15 @@ do_nn: if (argp->nnext_pgno == PGNO_INVALID) LSN(pagep) = argp->nnextlsn; flags = DB_MPOOL_DIRTY; } - if ((ret = memp_fput(mpf, pagep, flags)) != 0) + if ((ret = mpf->put(mpf, pagep, flags)) != 0) goto out; + pagep = NULL; done: *lsnp = argp->prev_lsn; ret = 0; -out: if (getmeta) - (void)__ham_release_meta(dbc); +out: if (pagep != NULL) + (void)mpf->put(mpf, pagep, 0); REC_CLOSE; } @@ -695,13 +686,17 @@ __ham_metagroup_recover(dbenv, dbtp, lsnp, op, info) __ham_metagroup_args *argp; HASH_CURSOR *hcp; DB *file_dbp; + DBMETA *mmeta; DBC *dbc; DB_MPOOLFILE *mpf; PAGE *pagep; - db_pgno_t last_pgno; - int cmp_n, cmp_p, flags, groupgrow, ret; + db_pgno_t pgno; + u_int32_t flags, mmeta_flags; + int cmp_n, cmp_p, did_recover, groupgrow, ret; COMPQUIET(info, NULL); + mmeta_flags = 0; + mmeta = NULL; REC_PRINT(__ham_metagroup_print); REC_INTRO(__ham_metagroup_read, 1); @@ -709,22 +704,24 @@ __ham_metagroup_recover(dbenv, dbtp, lsnp, op, info) * This logs the virtual create of pages pgno to pgno + bucket * Since the mpool page-allocation is not really able to be * transaction protected, we can never undo it. Even in an abort, - * we have to allocate these pages to the hash table. + * we have to allocate these pages to the hash table if they + * were actually created. In particular, during disaster + * recovery the metapage may be before this point if we + * are rolling backward. If the file has not been extended + * then the metapage could not have been updated. * The log record contains: * bucket: new bucket being allocated. * pgno: page number of the new bucket. * if bucket is a power of 2, then we allocated a whole batch of * pages; if it's not, then we simply allocated one new page. */ - groupgrow = - (u_int32_t)(1 << __db_log2(argp->bucket + 1)) == argp->bucket + 1; + groupgrow = (u_int32_t)(1 << __db_log2(argp->bucket + 1)) == + argp->bucket + 1; + pgno = argp->pgno; + if (argp->newalloc) + pgno += argp->bucket; - last_pgno = argp->pgno; - if (groupgrow) - /* Read the last page. */ - last_pgno += argp->bucket; - - if ((ret = memp_fget(mpf, &last_pgno, DB_MPOOL_CREATE, &pagep)) != 0) + if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0) goto out; cmp_n = log_compare(lsnp, &LSN(pagep)); @@ -743,7 +740,7 @@ __ham_metagroup_recover(dbenv, dbtp, lsnp, op, info) pagep->lsn = argp->pagelsn; flags = DB_MPOOL_DIRTY; } - if ((ret = memp_fput(mpf, pagep, flags)) != 0) + if ((ret = mpf->put(mpf, pagep, flags)) != 0) goto out; /* Now we have to update the meta-data page. */ @@ -753,39 +750,90 @@ __ham_metagroup_recover(dbenv, dbtp, lsnp, op, info) cmp_n = log_compare(lsnp, &hcp->hdr->dbmeta.lsn); cmp_p = log_compare(&hcp->hdr->dbmeta.lsn, &argp->metalsn); CHECK_LSN(op, cmp_p, &hcp->hdr->dbmeta.lsn, &argp->metalsn); - if ((cmp_p == 0 && DB_REDO(op)) || (cmp_n == 0 && DB_UNDO(op))) { - if (DB_REDO(op)) { - /* Redo the actual updating of bucket counts. */ - ++hcp->hdr->max_bucket; - if (groupgrow) { - hcp->hdr->low_mask = hcp->hdr->high_mask; - hcp->hdr->high_mask = - (argp->bucket + 1) | hcp->hdr->low_mask; - } - hcp->hdr->dbmeta.lsn = *lsnp; - } else { - /* Undo the actual updating of bucket counts. */ - --hcp->hdr->max_bucket; - if (groupgrow) { - hcp->hdr->high_mask = hcp->hdr->low_mask; - hcp->hdr->low_mask = hcp->hdr->high_mask >> 1; - } - hcp->hdr->dbmeta.lsn = argp->metalsn; + did_recover = 0; + if (cmp_p == 0 && DB_REDO(op)) { + /* Redo the actual updating of bucket counts. */ + ++hcp->hdr->max_bucket; + if (groupgrow) { + hcp->hdr->low_mask = hcp->hdr->high_mask; + hcp->hdr->high_mask = + (argp->bucket + 1) | hcp->hdr->low_mask; } - if (groupgrow && - hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] == - PGNO_INVALID) - hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] = - argp->pgno - argp->bucket - 1; - F_SET(hcp, H_DIRTY); + hcp->hdr->dbmeta.lsn = *lsnp; + did_recover = 1; + } else if (cmp_n == 0 && DB_UNDO(op)) { + /* Undo the actual updating of bucket counts. */ + --hcp->hdr->max_bucket; + if (groupgrow) { + hcp->hdr->high_mask = hcp->hdr->low_mask; + hcp->hdr->low_mask = hcp->hdr->high_mask >> 1; + } + hcp->hdr->dbmeta.lsn = argp->metalsn; + did_recover = 1; + } + + /* + * Now we need to fix up the spares array. Each entry in the + * spares array indicates the beginning page number for the + * indicated doubling. We need to fill this in whenever the + * spares array is invalid, since we never reclaim pages from + * the spares array and we have to allocate the pages to the + * spares array in both the redo and undo cases. + */ + if (argp->newalloc && + hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] == PGNO_INVALID) { + hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] = + argp->pgno - argp->bucket - 1; + did_recover = 1; + } + + /* + * Finally, we need to potentially fix up the last_pgno field + * in the master meta-data page (which may or may not be the + * same as the hash header page). + */ + if (argp->mmpgno != argp->mpgno) { + if ((ret = + mpf->get(mpf, &argp->mmpgno, 0, (PAGE **)&mmeta)) != 0) + goto out; + mmeta_flags = 0; + cmp_n = log_compare(lsnp, &mmeta->lsn); + cmp_p = log_compare(&mmeta->lsn, &argp->mmetalsn); + if (cmp_p == 0 && DB_REDO(op)) { + mmeta->lsn = *lsnp; + mmeta_flags = DB_MPOOL_DIRTY; + } else if (cmp_n == 0 && DB_UNDO(op)) { + mmeta->lsn = argp->mmetalsn; + mmeta_flags = DB_MPOOL_DIRTY; + } + } else + mmeta = (DBMETA *)hcp->hdr; + + if (argp->newalloc) { + if (mmeta->last_pgno < pgno) + mmeta->last_pgno = pgno; + mmeta_flags = DB_MPOOL_DIRTY; } - if ((ret = __ham_release_meta(dbc)) != 0) + + if (argp->mmpgno != argp->mpgno && + (ret = mpf->put(mpf, mmeta, mmeta_flags)) != 0) goto out; + mmeta = NULL; + + if (did_recover) + F_SET(hcp, H_DIRTY); done: *lsnp = argp->prev_lsn; ret = 0; -out: REC_CLOSE; +out: if (mmeta != NULL) + (void)mpf->put(mpf, mmeta, 0); + if (dbc != NULL) + (void)__ham_release_meta(dbc); + if (ret == ENOENT && op == DB_TXN_BACKWARD_ALLOC) + ret = 0; + + REC_CLOSE; } /* @@ -808,17 +856,20 @@ __ham_groupalloc_recover(dbenv, dbtp, lsnp, op, info) DB_MPOOLFILE *mpf; DB *file_dbp; DBC *dbc; + PAGE *pagep; db_pgno_t pgno; - int cmp_n, cmp_p, flags, ret; + int cmp_n, cmp_p, modified, ret; + mmeta = NULL; + modified = 0; REC_PRINT(__ham_groupalloc_print); REC_INTRO(__ham_groupalloc_read, 0); pgno = PGNO_BASE_MD; - if ((ret = memp_fget(mpf, &pgno, 0, &mmeta)) != 0) { + if ((ret = mpf->get(mpf, &pgno, 0, &mmeta)) != 0) { if (DB_REDO(op)) { /* Page should have existed. */ - (void)__db_pgerr(file_dbp, pgno); + __db_pgerr(file_dbp, pgno, ret); goto out; } else { ret = 0; @@ -839,37 +890,48 @@ __ham_groupalloc_recover(dbenv, dbtp, lsnp, op, info) * that the pages were never allocated, so we'd better check for * that and handle it here. */ - - flags = 0; if (DB_REDO(op)) { - if ((ret = __ham_alloc_pages(file_dbp, argp)) != 0) - goto out1; + if ((ret = __ham_alloc_pages(file_dbp, argp, lsnp)) != 0) + goto out; if (cmp_p == 0) { LSN(mmeta) = *lsnp; - flags = DB_MPOOL_DIRTY; + modified = 1; } - } + } else if (DB_UNDO(op)) { + /* + * Reset the last page back to its preallocation state. + */ + pgno = argp->start_pgno + argp->num - 1; + if ((ret = mpf->get(mpf, &pgno, 0, &pagep)) == 0) { - /* - * Always put the pages into the limbo list and free them later. - */ - else if (DB_UNDO(op)) { + if (log_compare(&pagep->lsn, lsnp) == 0) + ZERO_LSN(pagep->lsn); + + if ((ret = mpf->put(mpf, pagep, DB_MPOOL_DIRTY)) != 0) + goto out; + } else if (ret != DB_PAGE_NOTFOUND) + goto out; + /* + * Always put the pages into the limbo list and free them later. + */ if ((ret = __db_add_limbo(dbenv, info, argp->fileid, argp->start_pgno, argp->num)) != 0) goto out; if (cmp_n == 0) { LSN(mmeta) = argp->meta_lsn; - flags = DB_MPOOL_DIRTY; + modified = 1; } } -out1: if ((ret = memp_fput(mpf, mmeta, flags)) != 0) - goto out; - done: if (ret == 0) *lsnp = argp->prev_lsn; -out: REC_CLOSE; +out: if (mmeta != NULL) + (void)mpf->put(mpf, mmeta, modified ? DB_MPOOL_DIRTY : 0); + + if (ret == ENOENT && op == DB_TXN_BACKWARD_ALLOC) + ret = 0; + REC_CLOSE; } /* @@ -883,9 +945,10 @@ out: REC_CLOSE; * Hash normally has holes in its files and handles them appropriately. */ static int -__ham_alloc_pages(dbp, argp) +__ham_alloc_pages(dbp, argp, lsnp) DB *dbp; __ham_groupalloc_args *argp; + DB_LSN *lsnp; { DB_MPOOLFILE *mpf; PAGE *pagep; @@ -898,38 +961,26 @@ __ham_alloc_pages(dbp, argp) pgno = argp->start_pgno + argp->num - 1; /* If the page exists, and it has been initialized, then we're done. */ - if ((ret = memp_fget(mpf, &pgno, 0, &pagep)) == 0) { - if ((pagep->type == P_INVALID) && IS_ZERO_LSN(pagep->lsn)) + if ((ret = mpf->get(mpf, &pgno, 0, &pagep)) == 0) { + if (NUM_ENT(pagep) == 0 && IS_ZERO_LSN(pagep->lsn)) goto reinit_page; - if ((ret = memp_fput(mpf, pagep, 0)) != 0) + if ((ret = mpf->put(mpf, pagep, 0)) != 0) return (ret); return (0); } - /* - * Had to create the page. On some systems (read "Windows"), - * you can find random garbage on pages to which you haven't - * yet written. So, we have an os layer that will do the - * right thing for group allocations. We call that directly - * to make sure all the pages are allocated and then continue - * merrily on our way with normal recovery. - */ - if ((ret = __os_fpinit(dbp->dbenv, &mpf->fh, - argp->start_pgno, argp->num, dbp->pgsize)) != 0) - return (ret); - - if ((ret = memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0) { - (void)__db_pgerr(dbp, pgno); + /* Had to create the page. */ + if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0) { + __db_pgerr(dbp, pgno, ret); return (ret); } reinit_page: /* Initialize the newly allocated page. */ - P_INIT(pagep, - dbp->pgsize, pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - ZERO_LSN(pagep->lsn); + P_INIT(pagep, dbp->pgsize, pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + pagep->lsn = *lsnp; - if ((ret = memp_fput(mpf, pagep, DB_MPOOL_DIRTY)) != 0) + if ((ret = mpf->put(mpf, pagep, DB_MPOOL_DIRTY)) != 0) return (ret); return (0); @@ -942,7 +993,6 @@ reinit_page: * PUBLIC: int __ham_curadj_recover * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); */ - int __ham_curadj_recover(dbenv, dbtp, lsnp, op, info) DB_ENV *dbenv; @@ -958,14 +1008,13 @@ __ham_curadj_recover(dbenv, dbtp, lsnp, op, info) int ret; HASH_CURSOR *hcp; - REC_PRINT(__ham_groupalloc_print); + COMPQUIET(info, NULL); + REC_PRINT(__ham_curadj_print); + REC_INTRO(__ham_curadj_read, 0); - ret = 0; if (op != DB_TXN_ABORT) goto done; - REC_INTRO(__ham_curadj_read, 0); - COMPQUIET(info, NULL); /* * Undo the adjustment by reinitializing the the cursor * to look like the one that was used to do the adustment, @@ -991,7 +1040,6 @@ out: REC_CLOSE; * PUBLIC: int __ham_chgpg_recover * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); */ - int __ham_chgpg_recover(dbenv, dbtp, lsnp, op, info) DB_ENV *dbenv; @@ -1008,15 +1056,18 @@ __ham_chgpg_recover(dbenv, dbtp, lsnp, op, info) int ret; DBC *cp; HASH_CURSOR *lcp; + u_int32_t order, indx; + COMPQUIET(info, NULL); REC_PRINT(__ham_chgpg_print); + REC_INTRO(__ham_chgpg_read, 0); - ret = 0; if (op != DB_TXN_ABORT) - goto out; - REC_INTRO(__ham_chgpg_read, 0); + goto done; - COMPQUIET(info, NULL); + /* Overloaded fields for DB_HAM_DEL*PG */ + indx = argp->old_indx; + order = argp->new_indx; MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp); for (ldbp = __dblist_get(dbenv, file_dbp->adj_fileid); @@ -1029,50 +1080,77 @@ __ham_chgpg_recover(dbenv, dbtp, lsnp, op, info) lcp = (HASH_CURSOR *)cp->internal; switch (argp->mode) { - case DB_HAM_CHGPG: + case DB_HAM_DELFIRSTPG: if (lcp->pgno != argp->new_pgno) break; - - if (argp->old_indx == NDX_INVALID) + if (lcp->indx != indx || + !F_ISSET(lcp, H_DELETED) || + lcp->order >= order) { lcp->pgno = argp->old_pgno; - else if (lcp->indx == argp->new_indx) { - lcp->indx = argp->old_indx; + if (lcp->indx == indx) + lcp->order -= order; + } + break; + case DB_HAM_DELMIDPG: + case DB_HAM_DELLASTPG: + if (lcp->pgno == argp->new_pgno && + lcp->indx == indx && + F_ISSET(lcp, H_DELETED) && + lcp->order >= order) { lcp->pgno = argp->old_pgno; + lcp->order -= order; + lcp->indx = 0; } break; - + case DB_HAM_CHGPG: + /* + * If we're doing a CHGPG, we're undoing + * the move of a non-deleted item to a + * new page. Any cursors with the deleted + * flag set do not belong to this item; + * don't touch them. + */ + if (F_ISSET(lcp, H_DELETED)) + break; + /* FALLTHROUGH */ case DB_HAM_SPLIT: - if (lcp->pgno == argp->new_pgno - && lcp->indx == argp->new_indx) { + if (lcp->pgno == argp->new_pgno && + lcp->indx == argp->new_indx) { lcp->indx = argp->old_indx; lcp->pgno = argp->old_pgno; } break; - case DB_HAM_DUP: - if (lcp->opd != NULL) { - opdcp = - (BTREE_CURSOR *)lcp->opd->internal; - if (opdcp->pgno == argp->new_pgno && - opdcp->indx == argp->new_indx) { - if (F_ISSET(opdcp, C_DELETED)) - F_SET(lcp, H_DELETED); - if ((ret = - lcp->opd->c_close( - lcp->opd)) != 0) - goto out; - lcp->opd = NULL; - } - } + if (lcp->opd == NULL) + break; + opdcp = (BTREE_CURSOR *)lcp->opd->internal; + if (opdcp->pgno != argp->new_pgno || + opdcp->indx != argp->new_indx) + break; + + if (F_ISSET(opdcp, C_DELETED)) + F_SET(lcp, H_DELETED); + /* + * We can't close a cursor while we have the + * dbp mutex locked, since c_close reacquires + * it. It should be safe to drop the mutex + * here, though, since newly opened cursors + * are put only at the end of the tailq and + * the cursor we're adjusting can't be closed + * under us. + */ + MUTEX_THREAD_UNLOCK(dbenv, file_dbp->mutexp); + if ((ret = lcp->opd->c_close(lcp->opd)) != 0) + goto out; + MUTEX_THREAD_LOCK(dbenv, file_dbp->mutexp); + lcp->opd = NULL; break; } } - MUTEX_THREAD_UNLOCK(dbenv, file_dbp->mutexp); } MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); done: *lsnp = argp->prev_lsn; - ret = 0; out: REC_CLOSE; } diff --git a/bdb/hash/hash_reclaim.c b/bdb/hash/hash_reclaim.c index 8857c5406a4..ac90ffff08a 100644 --- a/bdb/hash/hash_reclaim.c +++ b/bdb/hash/hash_reclaim.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: hash_reclaim.c,v 11.4 2000/11/30 00:58:37 ubell Exp $"; +static const char revid[] = "$Id: hash_reclaim.c,v 11.12 2002/03/28 19:49:43 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -18,10 +18,8 @@ static const char revid[] = "$Id: hash_reclaim.c,v 11.4 2000/11/30 00:58:37 ubel #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "hash.h" -#include "lock.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" /* * __ham_reclaim -- @@ -52,8 +50,8 @@ __ham_reclaim(dbp, txn) if ((ret = __ham_get_meta(dbc)) != 0) goto err; - if ((ret = __ham_traverse(dbp, - dbc, DB_LOCK_WRITE, __db_reclaim_callback, dbc)) != 0) + if ((ret = __ham_traverse(dbc, + DB_LOCK_WRITE, __db_reclaim_callback, dbc, 1)) != 0) goto err; if ((ret = dbc->c_close(dbc)) != 0) goto err; @@ -66,3 +64,48 @@ err: if (hcp->hdr != NULL) (void)dbc->c_close(dbc); return (ret); } + +/* + * __ham_truncate -- + * Reclaim the pages from a subdatabase and return them to the + * parent free list. + * + * PUBLIC: int __ham_truncate __P((DB *, DB_TXN *txn, u_int32_t *)); + */ +int +__ham_truncate(dbp, txn, countp) + DB *dbp; + DB_TXN *txn; + u_int32_t *countp; +{ + DBC *dbc; + HASH_CURSOR *hcp; + db_trunc_param trunc; + int ret; + + /* Open up a cursor that we'll use for traversing. */ + if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0) + return (ret); + hcp = (HASH_CURSOR *)dbc->internal; + + if ((ret = __ham_get_meta(dbc)) != 0) + goto err; + + trunc.count = 0; + trunc.dbc = dbc; + + if ((ret = __ham_traverse(dbc, + DB_LOCK_WRITE, __db_truncate_callback, &trunc, 1)) != 0) + goto err; + if ((ret = __ham_release_meta(dbc)) != 0) + goto err; + if ((ret = dbc->c_close(dbc)) != 0) + goto err; + *countp = trunc.count; + return (0); + +err: if (hcp->hdr != NULL) + (void)__ham_release_meta(dbc); + (void)dbc->c_close(dbc); + return (ret); +} diff --git a/bdb/hash/hash_stat.c b/bdb/hash/hash_stat.c index ed64bbc68bd..f9ee1d099cb 100644 --- a/bdb/hash/hash_stat.c +++ b/bdb/hash/hash_stat.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: hash_stat.c,v 11.24 2000/12/21 21:54:35 margo Exp $"; +static const char revid[] = "$Id: hash_stat.c,v 11.48 2002/08/06 06:11:28 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -18,11 +18,9 @@ static const char revid[] = "$Id: hash_stat.c,v 11.24 2000/12/21 21:54:35 margo #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "btree.h" -#include "hash.h" -#include "lock.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" static int __ham_stat_callback __P((DB *, PAGE *, void *, int *)); @@ -30,24 +28,29 @@ static int __ham_stat_callback __P((DB *, PAGE *, void *, int *)); * __ham_stat -- * Gather/print the hash statistics * - * PUBLIC: int __ham_stat __P((DB *, void *, void *(*)(size_t), u_int32_t)); + * PUBLIC: int __ham_stat __P((DB *, void *, u_int32_t)); */ int -__ham_stat(dbp, spp, db_malloc, flags) +__ham_stat(dbp, spp, flags) DB *dbp; - void *spp, *(*db_malloc) __P((size_t)); + void *spp; u_int32_t flags; { + DBC *dbc; + DB_ENV *dbenv; DB_HASH_STAT *sp; + DB_MPOOLFILE *mpf; HASH_CURSOR *hcp; - DBC *dbc; PAGE *h; db_pgno_t pgno; int ret; - PANIC_CHECK(dbp->dbenv); + dbenv = dbp->dbenv; + + PANIC_CHECK(dbenv); DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->stat"); + mpf = dbp->mpf; sp = NULL; /* Check for invalid flags. */ @@ -62,39 +65,39 @@ __ham_stat(dbp, spp, db_malloc, flags) goto err; /* Allocate and clear the structure. */ - if ((ret = __os_malloc(dbp->dbenv, sizeof(*sp), db_malloc, &sp)) != 0) + if ((ret = __os_umalloc(dbenv, sizeof(*sp), &sp)) != 0) goto err; memset(sp, 0, sizeof(*sp)); - if (flags == DB_CACHED_COUNTS) { - sp->hash_nkeys = hcp->hdr->dbmeta.key_count; - sp->hash_ndata = hcp->hdr->dbmeta.record_count; - goto done; - } - /* Copy the fields that we have. */ + sp->hash_nkeys = hcp->hdr->dbmeta.key_count; + sp->hash_ndata = hcp->hdr->dbmeta.record_count; sp->hash_pagesize = dbp->pgsize; sp->hash_buckets = hcp->hdr->max_bucket + 1; sp->hash_magic = hcp->hdr->dbmeta.magic; sp->hash_version = hcp->hdr->dbmeta.version; sp->hash_metaflags = hcp->hdr->dbmeta.flags; - sp->hash_nelem = hcp->hdr->nelem; sp->hash_ffactor = hcp->hdr->ffactor; + if (flags == DB_FAST_STAT || flags == DB_CACHED_COUNTS) + goto done; + /* Walk the free list, counting pages. */ for (sp->hash_free = 0, pgno = hcp->hdr->dbmeta.free; pgno != PGNO_INVALID;) { ++sp->hash_free; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) + if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0) goto err; pgno = h->next_pgno; - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); } /* Now traverse the rest of the table. */ - if ((ret = __ham_traverse(dbp, - dbc, DB_LOCK_READ, __ham_stat_callback, sp)) != 0) + sp->hash_nkeys = 0; + sp->hash_ndata = 0; + if ((ret = __ham_traverse(dbc, + DB_LOCK_READ, __ham_stat_callback, sp, 0)) != 0) goto err; if (!F_ISSET(dbp, DB_AM_RDONLY)) { @@ -114,7 +117,7 @@ done: return (0); err: if (sp != NULL) - __os_free(sp, sizeof(*sp)); + __os_ufree(dbenv, sp); if (hcp->hdr != NULL) (void)__ham_release_meta(dbc); (void)dbc->c_close(dbc); @@ -127,26 +130,30 @@ err: if (sp != NULL) * Traverse an entire hash table. We use the callback so that we * can use this both for stat collection and for deallocation. * - * PUBLIC: int __ham_traverse __P((DB *, DBC *, db_lockmode_t, - * PUBLIC: int (*)(DB *, PAGE *, void *, int *), void *)); + * PUBLIC: int __ham_traverse __P((DBC *, db_lockmode_t, + * PUBLIC: int (*)(DB *, PAGE *, void *, int *), void *, int)); */ int -__ham_traverse(dbp, dbc, mode, callback, cookie) - DB *dbp; +__ham_traverse(dbc, mode, callback, cookie, look_past_max) DBC *dbc; db_lockmode_t mode; int (*callback) __P((DB *, PAGE *, void *, int *)); void *cookie; + int look_past_max; { + DB *dbp; + DBC *opd; + DB_MPOOLFILE *mpf; HASH_CURSOR *hcp; HKEYDATA *hk; - DBC *opd; db_pgno_t pgno, opgno; - u_int32_t bucket; int did_put, i, ret, t_ret; + u_int32_t bucket, spares_entry; - hcp = (HASH_CURSOR *)dbc->internal; + dbp = dbc->dbp; opd = NULL; + mpf = dbp->mpf; + hcp = (HASH_CURSOR *)dbc->internal; ret = 0; /* @@ -156,12 +163,47 @@ __ham_traverse(dbp, dbc, mode, callback, cookie) * locking easy, makes this a pain in the butt. We have to traverse * duplicate, overflow and big pages from the bucket so that we * don't access anything that isn't properly locked. + * */ - for (bucket = 0; bucket <= hcp->hdr->max_bucket; bucket++) { + for (bucket = 0;; bucket++) { + /* + * We put the loop exit condition check here, because + * it made for a really vile extended ?: that made SCO's + * compiler drop core. + * + * If look_past_max is not set, we can stop at max_bucket; + * if it is set, we need to include pages that are part of + * the current doubling but beyond the highest bucket we've + * split into, as well as pages from a "future" doubling + * that may have been created within an aborted + * transaction. To do this, keep looping (and incrementing + * bucket) until the corresponding spares array entries + * cease to be defined. + */ + if (look_past_max) { + spares_entry = __db_log2(bucket + 1); + if (spares_entry >= NCACHED || + hcp->hdr->spares[spares_entry] == 0) + break; + } else { + if (bucket > hcp->hdr->max_bucket) + break; + } + hcp->bucket = bucket; hcp->pgno = pgno = BUCKET_TO_PAGE(hcp, bucket); for (ret = __ham_get_cpage(dbc, mode); ret == 0; ret = __ham_next_cpage(dbc, pgno, 0)) { + + /* + * If we are cleaning up pages past the max_bucket, + * then they may be on the free list and have their + * next pointers set, but the should be ignored. In + * fact, we really ought to just skip anybody who is + * not a valid page. + */ + if (TYPE(hcp->page) == P_INVALID) + break; pgno = NEXT_PGNO(hcp->page); /* @@ -171,17 +213,17 @@ __ham_traverse(dbp, dbc, mode, callback, cookie) * case we have to count those pages). */ for (i = 0; i < NUM_ENT(hcp->page); i++) { - hk = (HKEYDATA *)P_ENTRY(hcp->page, i); + hk = (HKEYDATA *)P_ENTRY(dbp, hcp->page, i); switch (HPAGE_PTYPE(hk)) { case H_OFFDUP: memcpy(&opgno, HOFFDUP_PGNO(hk), sizeof(db_pgno_t)); if ((ret = __db_c_newopd(dbc, - opgno, &opd)) != 0) + opgno, NULL, &opd)) != 0) return (ret); if ((ret = __bam_traverse(opd, DB_LOCK_READ, opgno, - __ham_stat_callback, cookie)) + callback, cookie)) != 0) goto err; if ((ret = opd->c_close(opd)) != 0) @@ -221,10 +263,10 @@ __ham_traverse(dbp, dbc, mode, callback, cookie) goto err; if (STD_LOCKING(dbc)) - (void)lock_put(dbp->dbenv, &hcp->lock); + (void)dbp->dbenv->lock_put(dbp->dbenv, &hcp->lock); if (hcp->page != NULL) { - if ((ret = memp_fput(dbc->dbp->mpf, hcp->page, 0)) != 0) + if ((ret = mpf->put(mpf, hcp->page, 0)) != 0) return (ret); hcp->page = NULL; } @@ -247,6 +289,7 @@ __ham_stat_callback(dbp, pagep, cookie, putp) DB_BTREE_STAT bstat; db_indx_t indx, len, off, tlen, top; u_int8_t *hk; + int ret; *putp = 0; sp = cookie; @@ -266,15 +309,15 @@ __ham_stat_callback(dbp, pagep, cookie, putp) * is a bucket. */ if (PREV_PGNO(pagep) == PGNO_INVALID) - sp->hash_bfree += P_FREESPACE(pagep); + sp->hash_bfree += P_FREESPACE(dbp, pagep); else { sp->hash_overflows++; - sp->hash_ovfl_free += P_FREESPACE(pagep); + sp->hash_ovfl_free += P_FREESPACE(dbp, pagep); } top = NUM_ENT(pagep); /* Correct for on-page duplicates and deleted items. */ for (indx = 0; indx < top; indx += P_INDX) { - switch (*H_PAIRDATA(pagep, indx)) { + switch (*H_PAIRDATA(dbp, pagep, indx)) { case H_OFFDUP: case H_OFFPAGE: break; @@ -282,8 +325,8 @@ __ham_stat_callback(dbp, pagep, cookie, putp) sp->hash_ndata++; break; case H_DUPLICATE: - tlen = LEN_HDATA(pagep, 0, indx); - hk = H_PAIRDATA(pagep, indx); + tlen = LEN_HDATA(dbp, pagep, 0, indx); + hk = H_PAIRDATA(dbp, pagep, indx); for (off = 0; off < tlen; off += len + 2 * sizeof (db_indx_t)) { sp->hash_ndata++; @@ -310,7 +353,8 @@ __ham_stat_callback(dbp, pagep, cookie, putp) bstat.bt_int_pgfree = 0; bstat.bt_leaf_pgfree = 0; bstat.bt_ndata = 0; - __bam_stat_callback(dbp, pagep, &bstat, putp); + if ((ret = __bam_stat_callback(dbp, pagep, &bstat, putp)) != 0) + return (ret); sp->hash_dup++; sp->hash_dup_free += bstat.bt_leaf_pgfree + bstat.bt_dup_pgfree + bstat.bt_int_pgfree; @@ -318,11 +362,10 @@ __ham_stat_callback(dbp, pagep, cookie, putp) break; case P_OVERFLOW: sp->hash_bigpages++; - sp->hash_big_bfree += P_OVFLSPACE(dbp->pgsize, pagep); + sp->hash_big_bfree += P_OVFLSPACE(dbp, dbp->pgsize, pagep); break; default: - return (__db_unknown_type(dbp->dbenv, - "__ham_stat_callback", pagep->type)); + return (__db_pgfmt(dbp->dbenv, pagep->pgno)); } return (0); diff --git a/bdb/hash/hash_upgrade.c b/bdb/hash/hash_upgrade.c index c34381276b4..2dd21d7b644 100644 --- a/bdb/hash/hash_upgrade.c +++ b/bdb/hash/hash_upgrade.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: hash_upgrade.c,v 11.25 2000/12/14 19:18:32 bostic Exp $"; +static const char revid[] = "$Id: hash_upgrade.c,v 11.32 2002/08/06 05:34:58 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -18,14 +18,13 @@ static const char revid[] = "$Id: hash_upgrade.c,v 11.25 2000/12/14 19:18:32 bos #endif #include "db_int.h" -#include "db_page.h" -#include "db_swap.h" -#include "hash.h" -#include "db_upgrade.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" +#include "dbinc/db_upgrade.h" /* * __ham_30_hashmeta -- - * Upgrade the database from version 4/5 to version 6. + * Upgrade the database from version 4/5 to version 6. * * PUBLIC: int __ham_30_hashmeta __P((DB *, char *, u_int8_t *)); */ @@ -163,10 +162,6 @@ __ham_30_sizefix(dbp, fhp, realname, metabuf) return (ret); if ((ret = __os_write(dbenv, fhp, buf, pagesize, &nw)) != 0) return (ret); - if (nw != pagesize) { - __db_err(dbenv, "Short write during upgrade"); - return (EIO); - } } return (0); @@ -174,7 +169,7 @@ __ham_30_sizefix(dbp, fhp, realname, metabuf) /* * __ham_31_hashmeta -- - * Upgrade the database from version 6 to version 7. + * Upgrade the database from version 6 to version 7. * * PUBLIC: int __ham_31_hashmeta * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); @@ -229,7 +224,7 @@ __ham_31_hashmeta(dbp, real_name, flags, fhp, h, dirtyp) /* * __ham_31_hash -- - * Upgrade the database hash leaf pages. + * Upgrade the database hash leaf pages. * * PUBLIC: int __ham_31_hash * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); @@ -252,7 +247,7 @@ __ham_31_hash(dbp, real_name, flags, fhp, h, dirtyp) ret = 0; for (indx = 0; indx < NUM_ENT(h); indx += 2) { - hk = (HKEYDATA *)H_PAIRDATA(h, indx); + hk = (HKEYDATA *)H_PAIRDATA(dbp, h, indx); if (HPAGE_PTYPE(hk) == H_OFFDUP) { memcpy(&pgno, HOFFDUP_PGNO(hk), sizeof(db_pgno_t)); tpgno = pgno; diff --git a/bdb/hash/hash_verify.c b/bdb/hash/hash_verify.c index 31dd7cc2299..e6f5a2b0d65 100644 --- a/bdb/hash/hash_verify.c +++ b/bdb/hash/hash_verify.c @@ -1,16 +1,16 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2000 + * Copyright (c) 1999-2002 * Sleepycat Software. All rights reserved. * - * $Id: hash_verify.c,v 1.31 2000/11/30 00:58:37 ubell Exp $ + * $Id: hash_verify.c,v 1.53 2002/08/06 05:35:02 bostic Exp $ */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: hash_verify.c,v 1.31 2000/11/30 00:58:37 ubell Exp $"; +static const char revid[] = "$Id: hash_verify.c,v 1.53 2002/08/06 05:35:02 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -20,10 +20,10 @@ static const char revid[] = "$Id: hash_verify.c,v 1.31 2000/11/30 00:58:37 ubell #endif #include "db_int.h" -#include "db_page.h" -#include "db_verify.h" -#include "btree.h" -#include "hash.h" +#include "dbinc/db_page.h" +#include "dbinc/db_verify.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" static int __ham_dups_unsorted __P((DB *, u_int8_t *, u_int32_t)); static int __ham_vrfy_bucket __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t, @@ -83,8 +83,8 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags) if (!LF_ISSET(DB_NOORDERCHK)) if (m->h_charkey != hfunc(dbp, CHARKEY, sizeof(CHARKEY))) { EPRINT((dbp->dbenv, -"Database has different custom hash function; reverify with DB_NOORDERCHK set" - )); +"Page %lu: database has different custom hash function; reverify with DB_NOORDERCHK set", + (u_long)pgno)); /* * Return immediately; this is probably a sign * of user error rather than database corruption, so @@ -97,8 +97,8 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags) /* max_bucket must be less than the last pgno. */ if (m->max_bucket > vdp->last_pgno) { EPRINT((dbp->dbenv, - "Impossible max_bucket %lu on meta page %lu", - m->max_bucket, pgno)); + "Page %lu: Impossible max_bucket %lu on meta page", + (u_long)pgno, (u_long)m->max_bucket)); /* * Most other fields depend somehow on max_bucket, so * we just return--there will be lots of extraneous @@ -118,15 +118,15 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags) pwr = (m->max_bucket == 0) ? 1 : 1 << __db_log2(m->max_bucket + 1); if (m->high_mask != pwr - 1) { EPRINT((dbp->dbenv, - "Incorrect high_mask %lu on page %lu, should be %lu", - m->high_mask, pgno, pwr - 1)); + "Page %lu: incorrect high_mask %lu, should be %lu", + (u_long)pgno, (u_long)m->high_mask, (u_long)pwr - 1)); isbad = 1; } pwr >>= 1; if (m->low_mask != pwr - 1) { EPRINT((dbp->dbenv, - "Incorrect low_mask %lu on page %lu, should be %lu", - m->low_mask, pgno, pwr - 1)); + "Page %lu: incorrect low_mask %lu, should be %lu", + (u_long)pgno, (u_long)m->low_mask, (u_long)pwr - 1)); isbad = 1; } @@ -140,8 +140,8 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags) */ if (m->nelem > 0x80000000) { EPRINT((dbp->dbenv, - "Suspiciously high nelem of %lu on page %lu", - m->nelem, pgno)); + "Page %lu: suspiciously high nelem of %lu", + (u_long)pgno, (u_long)m->nelem)); isbad = 1; pip->h_nelem = 0; } else @@ -164,13 +164,14 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags) mbucket = (1 << i) - 1; if (BS_TO_PAGE(mbucket, m->spares) > vdp->last_pgno) { EPRINT((dbp->dbenv, - "Spares array entry %lu, page %lu is invalid", - i, pgno)); + "Page %lu: spares array entry %d is invalid", + (u_long)pgno, i)); isbad = 1; } } -err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) +err: if ((t_ret = + __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) ret = t_ret; return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); } @@ -192,6 +193,7 @@ __ham_vrfy(dbp, vdp, h, pgno, flags) { VRFY_PAGEINFO *pip; u_int32_t ent, himark, inpend; + db_indx_t *inp; int isbad, ret, t_ret; isbad = 0; @@ -226,31 +228,33 @@ __ham_vrfy(dbp, vdp, h, pgno, flags) * In any case, we return immediately if things are bad, as it would * be unsafe to proceed. */ + inp = P_INP(dbp, h); for (ent = 0, himark = dbp->pgsize, - inpend = (u_int8_t *)h->inp - (u_int8_t *)h; + inpend = (u_int32_t)((u_int8_t *)inp - (u_int8_t *)h); ent < NUM_ENT(h); ent++) - if (h->inp[ent] >= himark) { + if (inp[ent] >= himark) { EPRINT((dbp->dbenv, - "Item %lu on page %lu out of order or nonsensical", - ent, pgno)); + "Page %lu: item %lu is out of order or nonsensical", + (u_long)pgno, (u_long)ent)); isbad = 1; goto err; } else if (inpend >= himark) { EPRINT((dbp->dbenv, - "inp array collided with data on page %lu", - pgno)); + "Page %lu: entries array collided with data", + (u_long)pgno)); isbad = 1; goto err; } else { - himark = h->inp[ent]; + himark = inp[ent]; inpend += sizeof(db_indx_t); if ((ret = __ham_vrfy_item( dbp, vdp, pgno, h, ent, flags)) != 0) goto err; } -err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) +err: if ((t_ret = + __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) ret = t_ret; return (ret == 0 && isbad == 1 ? DB_VERIFY_BAD : ret); } @@ -279,7 +283,7 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags) if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) return (ret); - switch (HPAGE_TYPE(h, i)) { + switch (HPAGE_TYPE(dbp, h, i)) { case H_KEYDATA: /* Nothing to do here--everything but the type field is data */ break; @@ -287,8 +291,8 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags) /* Are we a datum or a key? Better be the former. */ if (i % 2 == 0) { EPRINT((dbp->dbenv, - "Hash key stored as duplicate at page %lu item %lu", - pip->pgno, i)); + "Page %lu: hash key stored as duplicate item %lu", + (u_long)pip->pgno, (u_long)i)); } /* * Dups are encoded as a series within a single HKEYDATA, @@ -300,16 +304,16 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags) * Note that at this point, we've verified item i-1, so * it's safe to use LEN_HKEYDATA (which looks at inp[i-1]). */ - len = LEN_HKEYDATA(h, dbp->pgsize, i); - databuf = HKEYDATA_DATA(P_ENTRY(h, i)); + len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); + databuf = HKEYDATA_DATA(P_ENTRY(dbp, h, i)); for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) { memcpy(&dlen, databuf + offset, sizeof(db_indx_t)); /* Make sure the length is plausible. */ if (offset + DUP_SIZE(dlen) > len) { EPRINT((dbp->dbenv, - "Duplicate item %lu, page %lu has bad length", - i, pip->pgno)); + "Page %lu: duplicate item %lu has bad length", + (u_long)pip->pgno, (u_long)i)); ret = DB_VERIFY_BAD; goto err; } @@ -323,8 +327,8 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags) sizeof(db_indx_t)); if (elen != dlen) { EPRINT((dbp->dbenv, - "Duplicate item %lu, page %lu has two different lengths", - i, pip->pgno)); + "Page %lu: duplicate item %lu has two different lengths", + (u_long)pip->pgno, (u_long)i)); ret = DB_VERIFY_BAD; goto err; } @@ -336,12 +340,12 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags) break; case H_OFFPAGE: /* Offpage item. Make sure pgno is sane, save off. */ - memcpy(&hop, P_ENTRY(h, i), HOFFPAGE_SIZE); + memcpy(&hop, P_ENTRY(dbp, h, i), HOFFPAGE_SIZE); if (!IS_VALID_PGNO(hop.pgno) || hop.pgno == pip->pgno || hop.pgno == PGNO_INVALID) { EPRINT((dbp->dbenv, - "Offpage item %lu, page %lu has bad page number", - i, pip->pgno)); + "Page %lu: offpage item %lu has bad pgno %lu", + (u_long)pip->pgno, (u_long)i, (u_long)hop.pgno)); ret = DB_VERIFY_BAD; goto err; } @@ -354,12 +358,12 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags) break; case H_OFFDUP: /* Offpage duplicate item. Same drill. */ - memcpy(&hod, P_ENTRY(h, i), HOFFDUP_SIZE); + memcpy(&hod, P_ENTRY(dbp, h, i), HOFFDUP_SIZE); if (!IS_VALID_PGNO(hod.pgno) || hod.pgno == pip->pgno || hod.pgno == PGNO_INVALID) { EPRINT((dbp->dbenv, - "Offpage item %lu, page %lu has bad page number", - i, pip->pgno)); + "Page %lu: offpage item %lu has bad page number", + (u_long)pip->pgno, (u_long)i)); ret = DB_VERIFY_BAD; goto err; } @@ -372,12 +376,14 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags) break; default: EPRINT((dbp->dbenv, - "Item %i, page %lu has bad type", i, pip->pgno)); + "Page %lu: item %i has bad type", + (u_long)pip->pgno, (u_long)i)); ret = DB_VERIFY_BAD; break; } -err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) +err: if ((t_ret = + __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) ret = t_ret; return (ret); } @@ -397,29 +403,32 @@ __ham_vrfy_structure(dbp, vdp, meta_pgno, flags) u_int32_t flags; { DB *pgset; + DB_MPOOLFILE *mpf; HMETA *m; PAGE *h; VRFY_PAGEINFO *pip; int isbad, p, ret, t_ret; db_pgno_t pgno; - u_int32_t bucket; + u_int32_t bucket, spares_entry; - ret = isbad = 0; - h = NULL; + mpf = dbp->mpf; pgset = vdp->pgset; + h = NULL; + ret = isbad = 0; if ((ret = __db_vrfy_pgset_get(pgset, meta_pgno, &p)) != 0) return (ret); if (p != 0) { EPRINT((dbp->dbenv, - "Hash meta page %lu referenced twice", meta_pgno)); + "Page %lu: Hash meta page referenced twice", + (u_long)meta_pgno)); return (DB_VERIFY_BAD); } if ((ret = __db_vrfy_pgset_inc(pgset, meta_pgno)) != 0) return (ret); /* Get the meta page; we'll need it frequently. */ - if ((ret = memp_fget(dbp->mpf, &meta_pgno, 0, &m)) != 0) + if ((ret = mpf->get(mpf, &meta_pgno, 0, &m)) != 0) return (ret); /* Loop through bucket by bucket. */ @@ -445,8 +454,8 @@ __ham_vrfy_structure(dbp, vdp, meta_pgno, flags) * Note that this should be safe, since we've already verified * that the spares array is sane. */ - for (bucket = m->max_bucket + 1; - m->spares[__db_log2(bucket + 1)] != 0; bucket++) { + for (bucket = m->max_bucket + 1; spares_entry = __db_log2(bucket + 1), + spares_entry < NCACHED && m->spares[spares_entry] != 0; bucket++) { pgno = BS_TO_PAGE(bucket, m->spares); if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) goto err; @@ -454,43 +463,51 @@ __ham_vrfy_structure(dbp, vdp, meta_pgno, flags) /* It's okay if these pages are totally zeroed; unmark it. */ F_CLR(pip, VRFY_IS_ALLZEROES); + /* It's also OK if this page is simply invalid. */ + if (pip->type == P_INVALID) { + if ((ret = __db_vrfy_putpageinfo(dbp->dbenv, + vdp, pip)) != 0) + goto err; + continue; + } + if (pip->type != P_HASH) { EPRINT((dbp->dbenv, - "Hash bucket %lu maps to non-hash page %lu", - bucket, pgno)); + "Page %lu: hash bucket %lu maps to non-hash page", + (u_long)pgno, (u_long)bucket)); isbad = 1; } else if (pip->entries != 0) { EPRINT((dbp->dbenv, - "Non-empty page %lu in unused hash bucket %lu", - pgno, bucket)); + "Page %lu: non-empty page in unused hash bucket %lu", + (u_long)pgno, (u_long)bucket)); isbad = 1; } else { if ((ret = __db_vrfy_pgset_get(pgset, pgno, &p)) != 0) goto err; if (p != 0) { EPRINT((dbp->dbenv, - "Hash page %lu above max_bucket referenced", - pgno)); + "Page %lu: above max_bucket referenced", + (u_long)pgno)); isbad = 1; } else { if ((ret = __db_vrfy_pgset_inc(pgset, pgno)) != 0) goto err; - if ((ret = - __db_vrfy_putpageinfo(vdp, pip)) != 0) + if ((ret = __db_vrfy_putpageinfo(dbp->dbenv, + vdp, pip)) != 0) goto err; continue; } } /* If we got here, it's an error. */ - (void)__db_vrfy_putpageinfo(vdp, pip); + (void)__db_vrfy_putpageinfo(dbp->dbenv, vdp, pip); goto err; } -err: if ((t_ret = memp_fput(dbp->mpf, m, 0)) != 0) +err: if ((t_ret = mpf->put(mpf, m, 0)) != 0) return (t_ret); - if (h != NULL && (t_ret = memp_fput(dbp->mpf, h, 0)) != 0) + if (h != NULL && (t_ret = mpf->put(mpf, h, 0)) != 0) return (t_ret); return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD: ret); } @@ -535,8 +552,9 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags) /* Make sure we got a plausible page number. */ if (pgno > vdp->last_pgno || pip->type != P_HASH) { - EPRINT((dbp->dbenv, "Bucket %lu has impossible first page %lu", - bucket, pgno)); + EPRINT((dbp->dbenv, + "Page %lu: impossible first page in bucket %lu", + (u_long)pgno, (u_long)bucket)); /* Unsafe to continue. */ isbad = 1; goto err; @@ -544,7 +562,8 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags) if (pip->prev_pgno != PGNO_INVALID) { EPRINT((dbp->dbenv, - "First hash page %lu in bucket %lu has a prev_pgno", pgno)); + "Page %lu: first page in hash bucket %lu has a prev_pgno", + (u_long)pgno, (u_long)bucket)); isbad = 1; } @@ -564,7 +583,8 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags) goto err; if (p != 0) { EPRINT((dbp->dbenv, - "Hash page %lu referenced twice", pgno)); + "Page %lu: hash page referenced twice", + (u_long)pgno)); isbad = 1; /* Unsafe to continue. */ goto err; @@ -584,11 +604,11 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags) F_CLR(pip, VRFY_IS_ALLZEROES); /* If we have dups, our meta page had better know about it. */ - if (F_ISSET(pip, VRFY_HAS_DUPS) - && !F_ISSET(mip, VRFY_HAS_DUPS)) { + if (F_ISSET(pip, VRFY_HAS_DUPS) && + !F_ISSET(mip, VRFY_HAS_DUPS)) { EPRINT((dbp->dbenv, - "Duplicates present in non-duplicate database, page %lu", - pgno)); + "Page %lu: duplicates present in non-duplicate database", + (u_long)pgno)); isbad = 1; } @@ -599,8 +619,8 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags) if (F_ISSET(mip, VRFY_HAS_DUPSORT) && F_ISSET(pip, VRFY_DUPS_UNSORTED)) { EPRINT((dbp->dbenv, - "Unsorted dups in sorted-dup database, page %lu", - pgno)); + "Page %lu: unsorted dups in sorted-dup database", + (u_long)pgno)); isbad = 1; } @@ -625,8 +645,8 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags) } if ((ret = __bam_vrfy_subtree(dbp, vdp, child->pgno, NULL, NULL, - flags | ST_RECNUM | ST_DUPSET, NULL, - NULL, NULL)) != 0) { + flags | ST_RECNUM | ST_DUPSET | ST_TOPLEVEL, + NULL, NULL, NULL)) != 0) { if (ret == DB_VERIFY_BAD) isbad = 1; else @@ -648,7 +668,7 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags) } next_pgno = pip->next_pgno; - ret = __db_vrfy_putpageinfo(vdp, pip); + ret = __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip); pip = NULL; if (ret != 0) @@ -661,7 +681,8 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags) if (!IS_VALID_PGNO(next_pgno)) { DB_ASSERT(0); EPRINT((dbp->dbenv, - "Hash page %lu has bad next_pgno", pgno)); + "Page %lu: hash page has bad next_pgno", + (u_long)pgno)); isbad = 1; goto err; } @@ -670,8 +691,9 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags) goto err; if (pip->prev_pgno != pgno) { - EPRINT((dbp->dbenv, "Hash page %lu has bad prev_pgno", - next_pgno)); + EPRINT((dbp->dbenv, + "Page %lu: hash page has bad prev_pgno", + (u_long)next_pgno)); isbad = 1; } pgno = next_pgno; @@ -679,11 +701,11 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags) err: if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0) ret = t_ret; - if (mip != NULL && ((t_ret = __db_vrfy_putpageinfo(vdp, mip)) != 0) && - ret == 0) + if (mip != NULL && ((t_ret = + __db_vrfy_putpageinfo(dbp->dbenv, vdp, mip)) != 0) && ret == 0) ret = t_ret; - if (pip != NULL && ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0) && - ret == 0) + if (pip != NULL && ((t_ret = + __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0) && ret == 0) ret = t_ret; return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); } @@ -707,16 +729,19 @@ __ham_vrfy_hashing(dbp, nentries, m, thisbucket, pgno, flags, hfunc) u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); { DBT dbt; + DB_MPOOLFILE *mpf; PAGE *h; db_indx_t i; int ret, t_ret, isbad; u_int32_t hval, bucket; + mpf = dbp->mpf; ret = isbad = 0; + memset(&dbt, 0, sizeof(DBT)); F_SET(&dbt, DB_DBT_REALLOC); - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) + if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0) return (ret); for (i = 0; i < nentries; i += 2) { @@ -738,15 +763,15 @@ __ham_vrfy_hashing(dbp, nentries, m, thisbucket, pgno, flags, hfunc) if (bucket != thisbucket) { EPRINT((dbp->dbenv, - "Item %lu on page %lu hashes incorrectly", - i, pgno)); + "Page %lu: item %lu hashes incorrectly", + (u_long)pgno, (u_long)i)); isbad = 1; } } err: if (dbt.data != NULL) - __os_free(dbt.data, 0); - if ((t_ret = memp_fput(dbp->mpf, h, 0)) != 0) + __os_ufree(dbp->dbenv, dbt.data); + if ((t_ret = mpf->put(mpf, h, 0)) != 0) return (t_ret); return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); @@ -782,7 +807,7 @@ __ham_salvage(dbp, vdp, pgno, h, handle, callback, flags) dbt.flags = DB_DBT_REALLOC; memset(&unkdbt, 0, sizeof(DBT)); - unkdbt.size = strlen("UNKNOWN") + 1; + unkdbt.size = (u_int32_t)strlen("UNKNOWN") + 1; unkdbt.data = "UNKNOWN"; err_ret = 0; @@ -791,7 +816,7 @@ __ham_salvage(dbp, vdp, pgno, h, handle, callback, flags) * Allocate a buffer for overflow items. Start at one page; * __db_safe_goff will realloc as needed. */ - if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &buf)) != 0) + if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &buf)) != 0) return (ret); himark = dbp->pgsize; @@ -808,8 +833,8 @@ __ham_salvage(dbp, vdp, pgno, h, handle, callback, flags) break; if (ret == 0) { - hk = P_ENTRY(h, i); - len = LEN_HKEYDATA(h, dbp->pgsize, i); + hk = P_ENTRY(dbp, h, i); + len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); if ((u_int32_t)(hk + len - (u_int8_t *)h) > dbp->pgsize) { /* @@ -834,7 +859,7 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len); dbt.size = len; dbt.data = buf; if ((ret = __db_prdbt(&dbt, - 0, " ", handle, callback, 0, NULL)) != 0) + 0, " ", handle, callback, 0, vdp)) != 0) err_ret = ret; break; case H_OFFPAGE: @@ -848,11 +873,11 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len); dpgno, &dbt, &buf, flags)) != 0) { err_ret = ret; (void)__db_prdbt(&unkdbt, 0, " ", - handle, callback, 0, NULL); + handle, callback, 0, vdp); break; } if ((ret = __db_prdbt(&dbt, - 0, " ", handle, callback, 0, NULL)) != 0) + 0, " ", handle, callback, 0, vdp)) != 0) err_ret = ret; break; case H_OFFDUP: @@ -865,7 +890,7 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len); /* UNKNOWN iff pgno is bad or we're a key. */ if (!IS_VALID_PGNO(dpgno) || (i % 2 == 0)) { if ((ret = __db_prdbt(&unkdbt, 0, " ", - handle, callback, 0, NULL)) != 0) + handle, callback, 0, vdp)) != 0) err_ret = ret; } else if ((ret = __db_salvage_duptree(dbp, vdp, dpgno, &dbt, handle, callback, @@ -908,7 +933,7 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len); dbt.size = dlen; dbt.data = buf; if ((ret = __db_prdbt(&dbt, 0, " ", - handle, callback, 0, NULL)) != 0) + handle, callback, 0, vdp)) != 0) err_ret = ret; tlen += sizeof(db_indx_t); } @@ -917,7 +942,7 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len); } } - __os_free(buf, 0); + __os_free(dbp->dbenv, buf); if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0) return (t_ret); return ((ret == 0 && err_ret != 0) ? err_ret : ret); @@ -938,6 +963,7 @@ int __ham_meta2pgset(dbp, vdp, hmeta, flags, pgset) u_int32_t flags; DB *pgset; { + DB_MPOOLFILE *mpf; PAGE *h; db_pgno_t pgno; u_int32_t bucket, totpgs; @@ -951,6 +977,7 @@ int __ham_meta2pgset(dbp, vdp, hmeta, flags, pgset) DB_ASSERT(pgset != NULL); + mpf = dbp->mpf; totpgs = 0; /* @@ -967,7 +994,7 @@ int __ham_meta2pgset(dbp, vdp, hmeta, flags, pgset) * Safely walk the list of pages in this bucket. */ for (;;) { - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) + if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0) return (ret); if (TYPE(h) == P_HASH) { @@ -976,24 +1003,26 @@ int __ham_meta2pgset(dbp, vdp, hmeta, flags, pgset) * pgset. */ if (++totpgs > vdp->last_pgno) { - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); return (DB_VERIFY_BAD); } if ((ret = - __db_vrfy_pgset_inc(pgset, pgno)) != 0) + __db_vrfy_pgset_inc(pgset, pgno)) != 0) { + (void)mpf->put(mpf, h, 0); return (ret); + } pgno = NEXT_PGNO(h); } else pgno = PGNO_INVALID; - if ((ret = memp_fput(dbp->mpf, h, 0)) != 0) + if ((ret = mpf->put(mpf, h, 0)) != 0) return (ret); /* If the new pgno is wonky, go onto the next bucket. */ if (!IS_VALID_PGNO(pgno) || pgno == PGNO_INVALID) - goto nextbucket; + break; /* * If we've touched this page before, we have a cycle; @@ -1002,9 +1031,8 @@ int __ham_meta2pgset(dbp, vdp, hmeta, flags, pgset) if ((ret = __db_vrfy_pgset_get(pgset, pgno, &val)) != 0) return (ret); if (val != 0) - goto nextbucket; + break; } -nextbucket: ; } return (0); } |