summaryrefslogtreecommitdiff
path: root/bdb/hash
diff options
context:
space:
mode:
authorunknown <ram@mysql.r18.ru>2002-10-30 15:57:05 +0400
committerunknown <ram@mysql.r18.ru>2002-10-30 15:57:05 +0400
commit155e78f014de1a2e259ae5119f4621fbb210a784 (patch)
tree6881a3cca88bea0bb9eeffd5aae34be437152786 /bdb/hash
parentb8798d25ab71436bf690ee8ae48285a655c5487e (diff)
downloadmariadb-git-155e78f014de1a2e259ae5119f4621fbb210a784.tar.gz
BDB 4.1.24
BitKeeper/deleted/.del-ex_access.wpj~3df6ae8c99bf7c5f: Delete: bdb/build_vxworks/ex_access/ex_access.wpj BitKeeper/deleted/.del-ex_btrec.wpj~a7622f1c6f432dc6: Delete: bdb/build_vxworks/ex_btrec/ex_btrec.wpj BitKeeper/deleted/.del-ex_dbclient.wpj~7345440f3b204cdd: Delete: bdb/build_vxworks/ex_dbclient/ex_dbclient.wpj BitKeeper/deleted/.del-ex_env.wpj~fbe1ab10b04e8b74: Delete: bdb/build_vxworks/ex_env/ex_env.wpj BitKeeper/deleted/.del-ex_mpool.wpj~4479cfd5c45f327d: Delete: bdb/build_vxworks/ex_mpool/ex_mpool.wpj BitKeeper/deleted/.del-ex_tpcb.wpj~f78093006e14bf41: Delete: bdb/build_vxworks/ex_tpcb/ex_tpcb.wpj BitKeeper/deleted/.del-db_buildall.dsp~bd749ff6da11682: Delete: bdb/build_win32/db_buildall.dsp BitKeeper/deleted/.del-cxx_app.cpp~ad8df8e0791011ed: Delete: bdb/cxx/cxx_app.cpp BitKeeper/deleted/.del-cxx_log.cpp~a50ff3118fe06952: Delete: bdb/cxx/cxx_log.cpp BitKeeper/deleted/.del-cxx_table.cpp~ecd751e79b055556: Delete: bdb/cxx/cxx_table.cpp BitKeeper/deleted/.del-namemap.txt~796a3acd3885d8fd: Delete: bdb/cxx/namemap.txt BitKeeper/deleted/.del-Design.fileop~3ca4da68f1727373: Delete: bdb/db/Design.fileop BitKeeper/deleted/.del-db185_int.h~61bee3736e7959ef: Delete: bdb/db185/db185_int.h BitKeeper/deleted/.del-acconfig.h~411e8854d67ad8b5: Delete: bdb/dist/acconfig.h BitKeeper/deleted/.del-mutex.m4~a13383cde18a64e1: Delete: bdb/dist/aclocal/mutex.m4 BitKeeper/deleted/.del-options.m4~b9d0ca637213750a: Delete: bdb/dist/aclocal/options.m4 BitKeeper/deleted/.del-programs.m4~3ce7890b47732b30: Delete: bdb/dist/aclocal/programs.m4 BitKeeper/deleted/.del-tcl.m4~f944e2db93c3b6db: Delete: bdb/dist/aclocal/tcl.m4 BitKeeper/deleted/.del-types.m4~59cae158c9a32cff: Delete: bdb/dist/aclocal/types.m4 BitKeeper/deleted/.del-script~d38f6d3a4f159cb4: Delete: bdb/dist/build/script BitKeeper/deleted/.del-configure.in~ac795a92c8fe049c: Delete: bdb/dist/configure.in BitKeeper/deleted/.del-ltconfig~66bbd007d8024af: Delete: bdb/dist/ltconfig BitKeeper/deleted/.del-rec_ctemp~a28554362534f00a: Delete: bdb/dist/rec_ctemp BitKeeper/deleted/.del-s_tcl~2ffe4326459fcd9f: Delete: bdb/dist/s_tcl BitKeeper/deleted/.del-.IGNORE_ME~d8148b08fa7d5d15: Delete: bdb/dist/template/.IGNORE_ME BitKeeper/deleted/.del-btree.h~179f2aefec1753d: Delete: bdb/include/btree.h BitKeeper/deleted/.del-cxx_int.h~6b649c04766508f8: Delete: bdb/include/cxx_int.h BitKeeper/deleted/.del-db.src~6b433ae615b16a8d: Delete: bdb/include/db.src BitKeeper/deleted/.del-db_185.h~ad8b373d9391d35c: Delete: bdb/include/db_185.h BitKeeper/deleted/.del-db_am.h~a714912b6b75932f: Delete: bdb/include/db_am.h BitKeeper/deleted/.del-db_cxx.h~fcafadf45f5d19e9: Delete: bdb/include/db_cxx.h BitKeeper/deleted/.del-db_dispatch.h~6844f20f7eb46904: Delete: bdb/include/db_dispatch.h BitKeeper/deleted/.del-db_int.src~419a3f48b6a01da7: Delete: bdb/include/db_int.src BitKeeper/deleted/.del-db_join.h~76f9747a42c3399a: Delete: bdb/include/db_join.h BitKeeper/deleted/.del-db_page.h~e302ca3a4db3abdc: Delete: bdb/include/db_page.h BitKeeper/deleted/.del-db_server_int.h~e1d20b6ba3bca1ab: Delete: bdb/include/db_server_int.h BitKeeper/deleted/.del-db_shash.h~5fbf2d696fac90f3: Delete: bdb/include/db_shash.h BitKeeper/deleted/.del-db_swap.h~1e60887550864a59: Delete: bdb/include/db_swap.h BitKeeper/deleted/.del-db_upgrade.h~c644eee73701fc8d: Delete: bdb/include/db_upgrade.h BitKeeper/deleted/.del-db_verify.h~b8d6c297c61f342e: Delete: bdb/include/db_verify.h BitKeeper/deleted/.del-debug.h~dc2b4f2cf27ccebc: Delete: bdb/include/debug.h BitKeeper/deleted/.del-hash.h~2aaa548b28882dfb: Delete: bdb/include/hash.h BitKeeper/deleted/.del-lock.h~a761c1b7de57b77f: Delete: bdb/include/lock.h BitKeeper/deleted/.del-log.h~ff20184238e35e4d: Delete: bdb/include/log.h BitKeeper/deleted/.del-mp.h~7e317597622f3411: Delete: bdb/include/mp.h BitKeeper/deleted/.del-mutex.h~d3ae7a2977a68137: Delete: bdb/include/mutex.h BitKeeper/deleted/.del-os.h~91867cc8757cd0e3: Delete: bdb/include/os.h BitKeeper/deleted/.del-os_jump.h~e1b939fa5151d4be: Delete: bdb/include/os_jump.h BitKeeper/deleted/.del-qam.h~6fad0c1b5723d597: Delete: bdb/include/qam.h BitKeeper/deleted/.del-queue.h~4c72c0826c123d5: Delete: bdb/include/queue.h BitKeeper/deleted/.del-region.h~513fe04d977ca0fc: Delete: bdb/include/region.h BitKeeper/deleted/.del-shqueue.h~525fc3e6c2025c36: Delete: bdb/include/shqueue.h BitKeeper/deleted/.del-tcl_db.h~c536fd61a844f23f: Delete: bdb/include/tcl_db.h BitKeeper/deleted/.del-txn.h~c8d94b221ec147e4: Delete: bdb/include/txn.h BitKeeper/deleted/.del-xa.h~ecc466493aae9d9a: Delete: bdb/include/xa.h BitKeeper/deleted/.del-DbRecoveryInit.java~756b52601a0b9023: Delete: bdb/java/src/com/sleepycat/db/DbRecoveryInit.java BitKeeper/deleted/.del-DbTxnRecover.java~74607cba7ab89d6d: Delete: bdb/java/src/com/sleepycat/db/DbTxnRecover.java BitKeeper/deleted/.del-lock_conflict.c~fc5e0f14cf597a2b: Delete: bdb/lock/lock_conflict.c BitKeeper/deleted/.del-log.src~53ac9e7b5cb023f2: Delete: bdb/log/log.src BitKeeper/deleted/.del-log_findckp.c~24287f008916e81f: Delete: bdb/log/log_findckp.c BitKeeper/deleted/.del-log_rec.c~d51711f2cac09297: Delete: bdb/log/log_rec.c BitKeeper/deleted/.del-log_register.c~b40bb4efac75ca15: Delete: bdb/log/log_register.c BitKeeper/deleted/.del-Design~b3d0f179f2767b: Delete: bdb/mp/Design BitKeeper/deleted/.del-os_finit.c~95dbefc6fe79b26c: Delete: bdb/os/os_finit.c BitKeeper/deleted/.del-os_abs.c~df95d1e7db81924: Delete: bdb/os_vxworks/os_abs.c BitKeeper/deleted/.del-os_finit.c~803b484bdb9d0122: Delete: bdb/os_vxworks/os_finit.c BitKeeper/deleted/.del-os_map.c~3a6d7926398b76d3: Delete: bdb/os_vxworks/os_map.c BitKeeper/deleted/.del-os_finit.c~19a227c6d3c78ad: Delete: bdb/os_win32/os_finit.c BitKeeper/deleted/.del-log-corruption.patch~1cf2ecc7c6408d5d: Delete: bdb/patches/log-corruption.patch BitKeeper/deleted/.del-Btree.pm~af6d0c5eaed4a98e: Delete: bdb/perl.BerkeleyDB/BerkeleyDB/Btree.pm BitKeeper/deleted/.del-BerkeleyDB.pm~7244036d4482643: Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pm BitKeeper/deleted/.del-BerkeleyDB.pod~e7b18fd6132448e3: Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pod BitKeeper/deleted/.del-Hash.pm~10292a26c06a5c95: Delete: bdb/perl.BerkeleyDB/BerkeleyDB/Hash.pm BitKeeper/deleted/.del-BerkeleyDB.pod.P~79f76a1495eda203: Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pod.P BitKeeper/deleted/.del-BerkeleyDB.xs~80c99afbd98e392c: Delete: bdb/perl.BerkeleyDB/BerkeleyDB.xs BitKeeper/deleted/.del-Changes~729c1891efa60de9: Delete: bdb/perl.BerkeleyDB/Changes BitKeeper/deleted/.del-MANIFEST~63a1e34aecf157a0: Delete: bdb/perl.BerkeleyDB/MANIFEST BitKeeper/deleted/.del-Makefile.PL~c68797707d8df87a: Delete: bdb/perl.BerkeleyDB/Makefile.PL BitKeeper/deleted/.del-README~5f2f579b1a241407: Delete: bdb/perl.BerkeleyDB/README BitKeeper/deleted/.del-Todo~dca3c66c193adda9: Delete: bdb/perl.BerkeleyDB/Todo BitKeeper/deleted/.del-config.in~ae81681e450e0999: Delete: bdb/perl.BerkeleyDB/config.in BitKeeper/deleted/.del-dbinfo~28ad67d83be4f68e: Delete: bdb/perl.BerkeleyDB/dbinfo BitKeeper/deleted/.del-mkconsts~543ab60669c7a04e: Delete: bdb/perl.BerkeleyDB/mkconsts BitKeeper/deleted/.del-mkpod~182c0ca54e439afb: Delete: bdb/perl.BerkeleyDB/mkpod BitKeeper/deleted/.del-5.004~e008cb5a48805543: Delete: bdb/perl.BerkeleyDB/patches/5.004 BitKeeper/deleted/.del-irix_6_5.pl~61662bb08afcdec8: Delete: bdb/perl.BerkeleyDB/hints/irix_6_5.pl BitKeeper/deleted/.del-solaris.pl~6771e7182394e152: Delete: bdb/perl.BerkeleyDB/hints/solaris.pl BitKeeper/deleted/.del-typemap~783b8f5295b05f3d: Delete: bdb/perl.BerkeleyDB/typemap BitKeeper/deleted/.del-5.004_01~6081ce2fff7b0bc: Delete: bdb/perl.BerkeleyDB/patches/5.004_01 BitKeeper/deleted/.del-5.004_02~87214eac35ad9e6: Delete: bdb/perl.BerkeleyDB/patches/5.004_02 BitKeeper/deleted/.del-5.004_03~9a672becec7cb40f: Delete: bdb/perl.BerkeleyDB/patches/5.004_03 BitKeeper/deleted/.del-5.004_04~e326cb51af09d154: Delete: bdb/perl.BerkeleyDB/patches/5.004_04 BitKeeper/deleted/.del-5.004_05~7ab457a1e41a92fe: Delete: bdb/perl.BerkeleyDB/patches/5.004_05 BitKeeper/deleted/.del-5.005~f9e2d59b5964cd4b: Delete: bdb/perl.BerkeleyDB/patches/5.005 BitKeeper/deleted/.del-5.005_01~3eb9fb7b5842ea8e: Delete: bdb/perl.BerkeleyDB/patches/5.005_01 BitKeeper/deleted/.del-5.005_02~67477ce0bef717cb: Delete: bdb/perl.BerkeleyDB/patches/5.005_02 BitKeeper/deleted/.del-5.005_03~c4c29a1fb21e290a: Delete: bdb/perl.BerkeleyDB/patches/5.005_03 BitKeeper/deleted/.del-5.6.0~e1fb9897d124ee22: Delete: bdb/perl.BerkeleyDB/patches/5.6.0 BitKeeper/deleted/.del-btree.t~e4a1a3c675ddc406: Delete: bdb/perl.BerkeleyDB/t/btree.t BitKeeper/deleted/.del-db-3.0.t~d2c60991d84558f2: Delete: bdb/perl.BerkeleyDB/t/db-3.0.t BitKeeper/deleted/.del-db-3.1.t~6ee88cd13f55e018: Delete: bdb/perl.BerkeleyDB/t/db-3.1.t BitKeeper/deleted/.del-db-3.2.t~f73b6461f98fd1cf: Delete: bdb/perl.BerkeleyDB/t/db-3.2.t BitKeeper/deleted/.del-destroy.t~cc6a2ae1980a2ecd: Delete: bdb/perl.BerkeleyDB/t/destroy.t BitKeeper/deleted/.del-env.t~a8604a4499c4bd07: Delete: bdb/perl.BerkeleyDB/t/env.t BitKeeper/deleted/.del-examples.t~2571b77c3cc75574: Delete: bdb/perl.BerkeleyDB/t/examples.t BitKeeper/deleted/.del-examples.t.T~8228bdd75ac78b88: Delete: bdb/perl.BerkeleyDB/t/examples.t.T BitKeeper/deleted/.del-examples3.t.T~66a186897a87026d: Delete: bdb/perl.BerkeleyDB/t/examples3.t.T BitKeeper/deleted/.del-examples3.t~fe3822ba2f2d7f83: Delete: bdb/perl.BerkeleyDB/t/examples3.t BitKeeper/deleted/.del-filter.t~f87b045c1b708637: Delete: bdb/perl.BerkeleyDB/t/filter.t BitKeeper/deleted/.del-hash.t~616bfb4d644de3a3: Delete: bdb/perl.BerkeleyDB/t/hash.t BitKeeper/deleted/.del-join.t~29fc39f74a83ca22: Delete: bdb/perl.BerkeleyDB/t/join.t BitKeeper/deleted/.del-mldbm.t~31f5015341eea040: Delete: bdb/perl.BerkeleyDB/t/mldbm.t BitKeeper/deleted/.del-queue.t~8f338034ce44a641: Delete: bdb/perl.BerkeleyDB/t/queue.t BitKeeper/deleted/.del-recno.t~d4ddbd3743add63e: Delete: bdb/perl.BerkeleyDB/t/recno.t BitKeeper/deleted/.del-strict.t~6885cdd2ea71ca2d: Delete: bdb/perl.BerkeleyDB/t/strict.t BitKeeper/deleted/.del-subdb.t~aab62a5d5864c603: Delete: bdb/perl.BerkeleyDB/t/subdb.t BitKeeper/deleted/.del-txn.t~65033b8558ae1216: Delete: bdb/perl.BerkeleyDB/t/txn.t BitKeeper/deleted/.del-unknown.t~f3710458682665e1: Delete: bdb/perl.BerkeleyDB/t/unknown.t BitKeeper/deleted/.del-Changes~436f74a5c414c65b: Delete: bdb/perl.DB_File/Changes BitKeeper/deleted/.del-DB_File.pm~ae0951c6c7665a82: Delete: bdb/perl.DB_File/DB_File.pm BitKeeper/deleted/.del-DB_File.xs~89e49a0b5556f1d8: Delete: bdb/perl.DB_File/DB_File.xs BitKeeper/deleted/.del-DB_File_BS~290fad5dbbb87069: Delete: bdb/perl.DB_File/DB_File_BS BitKeeper/deleted/.del-MANIFEST~90ee581572bdd4ac: Delete: bdb/perl.DB_File/MANIFEST BitKeeper/deleted/.del-Makefile.PL~ac0567bb5a377e38: Delete: bdb/perl.DB_File/Makefile.PL BitKeeper/deleted/.del-README~77e924a5a9bae6b3: Delete: bdb/perl.DB_File/README BitKeeper/deleted/.del-config.in~ab4c2792b86a810b: Delete: bdb/perl.DB_File/config.in BitKeeper/deleted/.del-dbinfo~461c43b30fab2cb: Delete: bdb/perl.DB_File/dbinfo BitKeeper/deleted/.del-dynixptx.pl~50dcddfae25d17e9: Delete: bdb/perl.DB_File/hints/dynixptx.pl BitKeeper/deleted/.del-typemap~55cffb3288a9e587: Delete: bdb/perl.DB_File/typemap BitKeeper/deleted/.del-version.c~a4df0e646f8b3975: Delete: bdb/perl.DB_File/version.c BitKeeper/deleted/.del-5.004_01~d6830d0082702af7: Delete: bdb/perl.DB_File/patches/5.004_01 BitKeeper/deleted/.del-5.004_02~78b082dc80c91031: Delete: bdb/perl.DB_File/patches/5.004_02 BitKeeper/deleted/.del-5.004~4411ec2e3c9e008b: Delete: bdb/perl.DB_File/patches/5.004 BitKeeper/deleted/.del-sco.pl~1e795fe14fe4dcfe: Delete: bdb/perl.DB_File/hints/sco.pl BitKeeper/deleted/.del-5.004_03~33f274648b160d95: Delete: bdb/perl.DB_File/patches/5.004_03 BitKeeper/deleted/.del-5.004_04~8f3d1b3cf18bb20a: Delete: bdb/perl.DB_File/patches/5.004_04 BitKeeper/deleted/.del-5.004_05~9c0f02e7331e142: Delete: bdb/perl.DB_File/patches/5.004_05 BitKeeper/deleted/.del-5.005~c2108cb2e3c8d951: Delete: bdb/perl.DB_File/patches/5.005 BitKeeper/deleted/.del-5.005_01~3b45e9673afc4cfa: Delete: bdb/perl.DB_File/patches/5.005_01 BitKeeper/deleted/.del-5.005_02~9fe5766bb02a4522: Delete: bdb/perl.DB_File/patches/5.005_02 BitKeeper/deleted/.del-5.005_03~ffa1c38c19ae72ea: Delete: bdb/perl.DB_File/patches/5.005_03 BitKeeper/deleted/.del-5.6.0~373be3a5ce47be85: Delete: bdb/perl.DB_File/patches/5.6.0 BitKeeper/deleted/.del-db-btree.t~3231595a1c241eb3: Delete: bdb/perl.DB_File/t/db-btree.t BitKeeper/deleted/.del-db-hash.t~7c4ad0c795c7fad2: Delete: bdb/perl.DB_File/t/db-hash.t BitKeeper/deleted/.del-db-recno.t~6c2d3d80b9ba4a50: Delete: bdb/perl.DB_File/t/db-recno.t BitKeeper/deleted/.del-db_server.sed~cdb00ebcd48a64e2: Delete: bdb/rpc_server/db_server.sed BitKeeper/deleted/.del-db_server_proc.c~d46c8f409c3747f4: Delete: bdb/rpc_server/db_server_proc.c BitKeeper/deleted/.del-db_server_svc.sed~3f5e59f334fa4607: Delete: bdb/rpc_server/db_server_svc.sed BitKeeper/deleted/.del-db_server_util.c~a809f3a4629acda: Delete: bdb/rpc_server/db_server_util.c BitKeeper/deleted/.del-log.tcl~ff1b41f1355b97d7: Delete: bdb/test/log.tcl BitKeeper/deleted/.del-mpool.tcl~b0df4dc1b04db26c: Delete: bdb/test/mpool.tcl BitKeeper/deleted/.del-mutex.tcl~52fd5c73a150565: Delete: bdb/test/mutex.tcl BitKeeper/deleted/.del-txn.tcl~c4ff071550b5446e: Delete: bdb/test/txn.tcl BitKeeper/deleted/.del-README~e800a12a5392010a: Delete: bdb/test/upgrade/README BitKeeper/deleted/.del-pack-2.6.6.pl~89d5076d758d3e98: Delete: bdb/test/upgrade/generate-2.X/pack-2.6.6.pl BitKeeper/deleted/.del-test-2.6.patch~4a52dc83d447547b: Delete: bdb/test/upgrade/generate-2.X/test-2.6.patch
Diffstat (limited to 'bdb/hash')
-rw-r--r--bdb/hash/hash.c1386
-rw-r--r--bdb/hash/hash.src219
-rw-r--r--bdb/hash/hash_conv.c32
-rw-r--r--bdb/hash/hash_dup.c396
-rw-r--r--bdb/hash/hash_func.c11
-rw-r--r--bdb/hash/hash_meta.c56
-rw-r--r--bdb/hash/hash_method.c12
-rw-r--r--bdb/hash/hash_open.c558
-rw-r--r--bdb/hash/hash_page.c799
-rw-r--r--bdb/hash/hash_rec.c500
-rw-r--r--bdb/hash/hash_reclaim.c59
-rw-r--r--bdb/hash/hash_stat.c137
-rw-r--r--bdb/hash/hash_upgrade.c23
-rw-r--r--bdb/hash/hash_verify.c238
14 files changed, 2673 insertions, 1753 deletions
diff --git a/bdb/hash/hash.c b/bdb/hash/hash.c
index e96fd4898f0..2f972a3238d 100644
--- a/bdb/hash/hash.c
+++ b/bdb/hash/hash.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,7 +43,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: hash.c,v 11.94 2001/01/03 16:42:26 ubell Exp $";
+static const char revid[] = "$Id: hash.c,v 11.166 2002/08/06 06:11:25 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -54,446 +54,70 @@ static const char revid[] = "$Id: hash.c,v 11.94 2001/01/03 16:42:26 ubell Exp $
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_am.h"
-#include "db_ext.h"
-#include "db_shash.h"
-#include "db_swap.h"
-#include "hash.h"
-#include "btree.h"
-#include "log.h"
-#include "lock.h"
-#include "txn.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/btree.h"
+#include "dbinc/hash.h"
+#include "dbinc/lock.h"
+static int __ham_bulk __P((DBC *, DBT *, u_int32_t));
static int __ham_c_close __P((DBC *, db_pgno_t, int *));
static int __ham_c_del __P((DBC *));
static int __ham_c_destroy __P((DBC *));
static int __ham_c_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
static int __ham_c_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
static int __ham_c_writelock __P((DBC *));
-static int __ham_del_dups __P((DBC *, DBT *));
-static int __ham_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
static int __ham_dup_return __P((DBC *, DBT *, u_int32_t));
static int __ham_expand_table __P((DBC *));
-static int __ham_init_htab __P((DBC *,
- const char *, db_pgno_t, u_int32_t, u_int32_t));
static int __ham_lookup __P((DBC *,
const DBT *, u_int32_t, db_lockmode_t, db_pgno_t *));
static int __ham_overwrite __P((DBC *, DBT *, u_int32_t));
/*
- * __ham_metachk --
+ * __ham_quick_delete --
+ * When performing a DB->del operation that does not involve secondary
+ * indices and is not removing an off-page duplicate tree, we can
+ * speed things up substantially by removing the entire duplicate
+ * set, if any is present, in one operation, rather than by conjuring
+ * up and deleting each of the items individually. (All are stored
+ * in one big HKEYDATA structure.) We don't bother to distinguish
+ * on-page duplicate sets from single, non-dup items; they're deleted
+ * in exactly the same way.
*
- * PUBLIC: int __ham_metachk __P((DB *, const char *, HMETA *));
- */
-int
-__ham_metachk(dbp, name, hashm)
- DB *dbp;
- const char *name;
- HMETA *hashm;
-{
- DB_ENV *dbenv;
- u_int32_t vers;
- int ret;
-
- dbenv = dbp->dbenv;
-
- /*
- * At this point, all we know is that the magic number is for a Hash.
- * Check the version, the database may be out of date.
- */
- vers = hashm->dbmeta.version;
- if (F_ISSET(dbp, DB_AM_SWAP))
- M_32_SWAP(vers);
- switch (vers) {
- case 4:
- case 5:
- case 6:
- __db_err(dbenv,
- "%s: hash version %lu requires a version upgrade",
- name, (u_long)vers);
- return (DB_OLD_VERSION);
- case 7:
- break;
- default:
- __db_err(dbenv,
- "%s: unsupported hash version: %lu", name, (u_long)vers);
- return (EINVAL);
- }
-
- /* Swap the page if we need to. */
- if (F_ISSET(dbp, DB_AM_SWAP) && (ret = __ham_mswap((PAGE *)hashm)) != 0)
- return (ret);
-
- /* Check the type. */
- if (dbp->type != DB_HASH && dbp->type != DB_UNKNOWN)
- return (EINVAL);
- dbp->type = DB_HASH;
- DB_ILLEGAL_METHOD(dbp, DB_OK_HASH);
-
- /*
- * Check application info against metadata info, and set info, flags,
- * and type based on metadata info.
- */
- if ((ret = __db_fchk(dbenv,
- "DB->open", hashm->dbmeta.flags,
- DB_HASH_DUP | DB_HASH_SUBDB | DB_HASH_DUPSORT)) != 0)
- return (ret);
-
- if (F_ISSET(&hashm->dbmeta, DB_HASH_DUP))
- F_SET(dbp, DB_AM_DUP);
- else
- if (F_ISSET(dbp, DB_AM_DUP)) {
- __db_err(dbenv,
- "%s: DB_DUP specified to open method but not set in database",
- name);
- return (EINVAL);
- }
-
- if (F_ISSET(&hashm->dbmeta, DB_HASH_SUBDB))
- F_SET(dbp, DB_AM_SUBDB);
- else
- if (F_ISSET(dbp, DB_AM_SUBDB)) {
- __db_err(dbenv,
- "%s: multiple databases specified but not supported in file",
- name);
- return (EINVAL);
- }
-
- if (F_ISSET(&hashm->dbmeta, DB_HASH_DUPSORT)) {
- if (dbp->dup_compare == NULL)
- dbp->dup_compare = __bam_defcmp;
- } else
- if (dbp->dup_compare != NULL) {
- __db_err(dbenv,
- "%s: duplicate sort function specified but not set in database",
- name);
- return (EINVAL);
- }
-
- /* Set the page size. */
- dbp->pgsize = hashm->dbmeta.pagesize;
-
- /* Copy the file's ID. */
- memcpy(dbp->fileid, hashm->dbmeta.uid, DB_FILE_ID_LEN);
-
- return (0);
-}
-
-/*
- * __ham_open --
+ * This function is called by __db_delete when the appropriate
+ * conditions are met, and it performs the delete in the optimized way.
*
- * PUBLIC: int __ham_open __P((DB *, const char *, db_pgno_t, u_int32_t));
+ * The cursor should be set to the first item in the duplicate
+ * set, or to the sole key/data pair when the key does not have a
+ * duplicate set, before the function is called.
+ *
+ * PUBLIC: int __ham_quick_delete __P((DBC *));
*/
int
-__ham_open(dbp, name, base_pgno, flags)
- DB *dbp;
- const char *name;
- db_pgno_t base_pgno;
- u_int32_t flags;
-{
- DB_ENV *dbenv;
- DBC *dbc;
- HASH_CURSOR *hcp;
- HASH *hashp;
- int need_sync, ret, t_ret;
-
- dbc = NULL;
- dbenv = dbp->dbenv;
- need_sync = 0;
-
- /* Initialize the remaining fields/methods of the DB. */
- dbp->del = __ham_delete;
- dbp->stat = __ham_stat;
-
- /*
- * Get a cursor. If DB_CREATE is specified, we may be creating
- * pages, and to do that safely in CDB we need a write cursor.
- * In STD_LOCKING mode, we'll synchronize using the meta page
- * lock instead.
- */
- if ((ret = dbp->cursor(dbp,
- dbp->open_txn, &dbc, LF_ISSET(DB_CREATE) && CDB_LOCKING(dbenv) ?
- DB_WRITECURSOR : 0)) != 0)
- return (ret);
-
- hcp = (HASH_CURSOR *)dbc->internal;
- hashp = dbp->h_internal;
- hashp->meta_pgno = base_pgno;
- if ((ret = __ham_get_meta(dbc)) != 0)
- goto err1;
-
- /*
- * If this is a new file, initialize it, and put it back dirty.
- *
- * Initialize the hdr structure.
- */
- if (hcp->hdr->dbmeta.magic == DB_HASHMAGIC) {
- /* File exists, verify the data in the header. */
- if (hashp->h_hash == NULL)
- hashp->h_hash = hcp->hdr->dbmeta.version < 5
- ? __ham_func4 : __ham_func5;
- if (!F_ISSET(dbp, DB_RDONLY) &&
- hashp->h_hash(dbp,
- CHARKEY, sizeof(CHARKEY)) != hcp->hdr->h_charkey) {
- __db_err(dbp->dbenv,
- "hash: incompatible hash function");
- ret = EINVAL;
- goto err2;
- }
- if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUP))
- F_SET(dbp, DB_AM_DUP);
- if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUPSORT))
- F_SET(dbp, DB_AM_DUPSORT);
- if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_SUBDB))
- F_SET(dbp, DB_AM_SUBDB);
- } else if (!IS_RECOVERING(dbenv)) {
- /*
- * File does not exist, we must initialize the header. If
- * locking is enabled that means getting a write lock first.
- * During recovery the meta page will be in the log.
- */
- dbc->lock.pgno = base_pgno;
-
- if (STD_LOCKING(dbc) &&
- ((ret = lock_put(dbenv, &hcp->hlock)) != 0 ||
- (ret = lock_get(dbenv, dbc->locker,
- DB_NONBLOCK(dbc) ? DB_LOCK_NOWAIT : 0,
- &dbc->lock_dbt, DB_LOCK_WRITE, &hcp->hlock)) != 0))
- goto err2;
- else if (CDB_LOCKING(dbp->dbenv)) {
- DB_ASSERT(LF_ISSET(DB_CREATE));
- if ((ret = lock_get(dbenv, dbc->locker,
- DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
- &dbc->mylock)) != 0)
- goto err2;
- }
- if ((ret = __ham_init_htab(dbc, name,
- base_pgno, hashp->h_nelem, hashp->h_ffactor)) != 0)
- goto err2;
-
- need_sync = 1;
- }
-
-err2: /* Release the meta data page */
- if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
- ret = t_ret;
-err1: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- /* Sync the file so that we know that the meta data goes to disk. */
- if (ret == 0 && need_sync)
- ret = dbp->sync(dbp, 0);
-#if CONFIG_TEST
- if (ret == 0)
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name);
-
-DB_TEST_RECOVERY_LABEL
-#endif
- if (ret != 0)
- (void)__ham_db_close(dbp);
-
- return (ret);
-}
-
-/************************** LOCAL CREATION ROUTINES **********************/
-/*
- * Returns 0 on No Error
- */
-static int
-__ham_init_htab(dbc, name, pgno, nelem, ffactor)
+__ham_quick_delete(dbc)
DBC *dbc;
- const char *name;
- db_pgno_t pgno;
- u_int32_t nelem, ffactor;
{
- DB *dbp;
- DB_LOCK metalock;
- DB_LSN orig_lsn;
- DBMETA *mmeta;
- HASH_CURSOR *hcp;
- HASH *hashp;
- PAGE *h;
- db_pgno_t mpgno;
- int32_t l2, nbuckets;
- int dirty_mmeta, i, ret, t_ret;
-
- hcp = (HASH_CURSOR *)dbc->internal;
- dbp = dbc->dbp;
- hashp = dbp->h_internal;
- mmeta = NULL;
- h = NULL;
- ret = 0;
- dirty_mmeta = 0;
- metalock.off = LOCK_INVALID;
-
- if (hashp->h_hash == NULL)
- hashp->h_hash = DB_HASHVERSION < 5 ? __ham_func4 : __ham_func5;
-
- if (nelem != 0 && ffactor != 0) {
- nelem = (nelem - 1) / ffactor + 1;
- l2 = __db_log2(nelem > 2 ? nelem : 2);
- } else
- l2 = 1;
- nbuckets = 1 << l2;
-
- orig_lsn = hcp->hdr->dbmeta.lsn;
- memset(hcp->hdr, 0, sizeof(HMETA));
- ZERO_LSN(hcp->hdr->dbmeta.lsn);
- hcp->hdr->dbmeta.pgno = pgno;
- hcp->hdr->dbmeta.magic = DB_HASHMAGIC;
- hcp->hdr->dbmeta.version = DB_HASHVERSION;
- hcp->hdr->dbmeta.pagesize = dbp->pgsize;
- hcp->hdr->dbmeta.type = P_HASHMETA;
- hcp->hdr->dbmeta.free = PGNO_INVALID;
- hcp->hdr->max_bucket = hcp->hdr->high_mask = nbuckets - 1;
- hcp->hdr->low_mask = (nbuckets >> 1) - 1;
- hcp->hdr->ffactor = ffactor;
- hcp->hdr->h_charkey = hashp->h_hash(dbp, CHARKEY, sizeof(CHARKEY));
- memcpy(hcp->hdr->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN);
-
- if (F_ISSET(dbp, DB_AM_DUP))
- F_SET(&hcp->hdr->dbmeta, DB_HASH_DUP);
- if (F_ISSET(dbp, DB_AM_SUBDB))
- F_SET(&hcp->hdr->dbmeta, DB_HASH_SUBDB);
- if (dbp->dup_compare != NULL)
- F_SET(&hcp->hdr->dbmeta, DB_HASH_DUPSORT);
-
- if ((ret = memp_fset(dbp->mpf, hcp->hdr, DB_MPOOL_DIRTY)) != 0)
- goto err;
-
- /*
- * Create the first and second buckets pages so that we have the
- * page numbers for them and we can store that page number
- * in the meta-data header (spares[0]).
- */
- hcp->hdr->spares[0] = nbuckets;
- if ((ret = memp_fget(dbp->mpf,
- &hcp->hdr->spares[0], DB_MPOOL_NEW_GROUP, &h)) != 0)
- goto err;
-
- P_INIT(h, dbp->pgsize, hcp->hdr->spares[0], PGNO_INVALID,
- PGNO_INVALID, 0, P_HASH);
-
- /* Fill in the last fields of the meta data page. */
- hcp->hdr->spares[0] -= (nbuckets - 1);
- for (i = 1; i <= l2; i++)
- hcp->hdr->spares[i] = hcp->hdr->spares[0];
- for (; i < NCACHED; i++)
- hcp->hdr->spares[i] = PGNO_INVALID;
-
- /*
- * Before we are about to put any dirty pages, we need to log
- * the meta-data page create.
- */
- ret = __db_log_page(dbp, name, &orig_lsn, pgno, (PAGE *)hcp->hdr);
-
- if (dbp->open_txn != NULL) {
- mmeta = (DBMETA *) hcp->hdr;
- if (F_ISSET(dbp, DB_AM_SUBDB)) {
-
- /*
- * If this is a subdatabase, then we need to
- * get the LSN off the master meta data page
- * because that's where free pages are linked
- * and during recovery we need to access
- * that page and roll it backward/forward
- * correctly with respect to LSN.
- */
- mpgno = PGNO_BASE_MD;
- if ((ret = __db_lget(dbc,
- 0, mpgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
- goto err;
- if ((ret = memp_fget(dbp->mpf,
- &mpgno, 0, (PAGE **)&mmeta)) != 0)
- goto err;
- }
- if ((t_ret = __ham_groupalloc_log(dbp->dbenv,
- dbp->open_txn, &LSN(mmeta), 0, dbp->log_fileid,
- &LSN(mmeta), hcp->hdr->spares[0],
- hcp->hdr->max_bucket + 1, mmeta->free)) != 0 && ret == 0)
- ret = t_ret;
- if (ret == 0) {
- /* need to update real LSN for buffer manager */
- dirty_mmeta = 1;
- }
-
- }
-
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, name);
-
-DB_TEST_RECOVERY_LABEL
-err: if (h != NULL &&
- (t_ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0 && ret == 0)
- ret = t_ret;
-
- if (F_ISSET(dbp, DB_AM_SUBDB) && mmeta != NULL)
- if ((t_ret = memp_fput(dbp->mpf, mmeta,
- dirty_mmeta ? DB_MPOOL_DIRTY : 0)) != 0 && ret == 0)
- ret = t_ret;
- if (metalock.off != LOCK_INVALID)
- (void)__TLPUT(dbc, metalock);
-
- return (ret);
-}
-
-static int
-__ham_delete(dbp, txn, key, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *key;
- u_int32_t flags;
-{
- DBC *dbc;
- HASH_CURSOR *hcp;
- db_pgno_t pgno;
int ret, t_ret;
- /*
- * This is the only access method routine called directly from
- * the dbp, so we have to do error checking.
- */
-
- PANIC_CHECK(dbp->dbenv);
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->del");
- DB_CHECK_TXN(dbp, txn);
-
- if ((ret =
- __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
- return (ret);
-
- if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
+ if ((ret = __ham_get_meta(dbc)) != 0)
return (ret);
- DEBUG_LWRITE(dbc, txn, "ham_delete", key, NULL, flags);
+ /* Assert that we're not using secondary indices. */
+ DB_ASSERT(!F_ISSET(dbc->dbp, DB_AM_SECONDARY));
+ /*
+ * We should assert that we're not a primary either, but that
+ * would require grabbing the dbp's mutex, so we don't bother.
+ */
- hcp = (HASH_CURSOR *)dbc->internal;
- if ((ret = __ham_get_meta(dbc)) != 0)
- goto out;
+ /* Assert that we're set, but not to an off-page duplicate. */
+ DB_ASSERT(IS_INITIALIZED(dbc));
+ DB_ASSERT(((HASH_CURSOR *)dbc->internal)->opd == NULL);
- pgno = PGNO_INVALID;
- if ((ret = __ham_lookup(dbc, key, 0, DB_LOCK_WRITE, &pgno)) == 0) {
- if (F_ISSET(hcp, H_OK)) {
- if (pgno == PGNO_INVALID)
- ret = __ham_del_pair(dbc, 1);
- else {
- /* When we close the cursor in __ham_del_dups,
- * that will make the off-page dup tree go
- * go away as well as our current entry. When
- * it updates cursors, ours should get marked
- * as H_DELETED.
- */
- ret = __ham_del_dups(dbc, key);
- }
- } else
- ret = DB_NOTFOUND;
- }
+ ret = __ham_del_pair(dbc, 1);
if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
ret = t_ret;
-out: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
return (ret);
}
@@ -517,8 +141,8 @@ __ham_c_init(dbc)
1, sizeof(struct cursor_t), &new_curs)) != 0)
return (ret);
if ((ret = __os_malloc(dbenv,
- dbc->dbp->pgsize, NULL, &new_curs->split_buf)) != 0) {
- __os_free(new_curs, sizeof(*new_curs));
+ dbc->dbp->pgsize, &new_curs->split_buf)) != 0) {
+ __os_free(dbenv, new_curs);
return (ret);
}
@@ -527,8 +151,10 @@ __ham_c_init(dbc)
dbc->c_count = __db_c_count;
dbc->c_del = __db_c_del;
dbc->c_dup = __db_c_dup;
- dbc->c_get = __db_c_get;
+ dbc->c_get = dbc->c_real_get = __db_c_get;
+ dbc->c_pget = __db_c_pget;
dbc->c_put = __db_c_put;
+ dbc->c_am_bulk = __ham_bulk;
dbc->c_am_close = __ham_c_close;
dbc->c_am_del = __ham_c_del;
dbc->c_am_destroy = __ham_c_destroy;
@@ -551,12 +177,14 @@ __ham_c_close(dbc, root_pgno, rmroot)
db_pgno_t root_pgno;
int *rmroot;
{
+ DB_MPOOLFILE *mpf;
HASH_CURSOR *hcp;
HKEYDATA *dp;
int doroot, gotmeta, ret, t_ret;
u_int32_t dirty;
COMPQUIET(rmroot, 0);
+ mpf = dbc->dbp->mpf;
dirty = 0;
doroot = gotmeta = ret = 0;
hcp = (HASH_CURSOR *) dbc->internal;
@@ -568,9 +196,14 @@ __ham_c_close(dbc, root_pgno, rmroot)
gotmeta = 1;
if ((ret = __ham_get_cpage(dbc, DB_LOCK_READ)) != 0)
goto out;
- dp = (HKEYDATA *)H_PAIRDATA(hcp->page, hcp->indx);
- DB_ASSERT(HPAGE_PTYPE(dp) == H_OFFDUP);
- memcpy(&root_pgno, HOFFPAGE_PGNO(dp), sizeof(db_pgno_t));
+ dp = (HKEYDATA *)H_PAIRDATA(dbc->dbp, hcp->page, hcp->indx);
+
+ /* If its not a dup we aborted before we changed it. */
+ if (HPAGE_PTYPE(dp) == H_OFFDUP)
+ memcpy(&root_pgno,
+ HOFFPAGE_PGNO(dp), sizeof(db_pgno_t));
+ else
+ root_pgno = PGNO_INVALID;
if ((ret =
hcp->opd->c_am_close(hcp->opd, root_pgno, &doroot)) != 0)
@@ -583,7 +216,7 @@ __ham_c_close(dbc, root_pgno, rmroot)
}
out: if (hcp->page != NULL && (t_ret =
- memp_fput(dbc->dbp->mpf, hcp->page, dirty)) != 0 && ret == 0)
+ mpf->put(mpf, hcp->page, dirty)) != 0 && ret == 0)
ret = t_ret;
if (gotmeta != 0 && (t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
ret = t_ret;
@@ -605,8 +238,8 @@ __ham_c_destroy(dbc)
hcp = (HASH_CURSOR *)dbc->internal;
if (hcp->split_buf != NULL)
- __os_free(hcp->split_buf, dbc->dbp->pgsize);
- __os_free(hcp, sizeof(HASH_CURSOR));
+ __os_free(dbc->dbp->dbenv, hcp->split_buf);
+ __os_free(dbc->dbp->dbenv, hcp);
return (0);
}
@@ -623,6 +256,7 @@ __ham_c_count(dbc, recnop)
db_recno_t *recnop;
{
DB *dbp;
+ DB_MPOOLFILE *mpf;
HASH_CURSOR *hcp;
db_indx_t len;
db_recno_t recno;
@@ -630,22 +264,23 @@ __ham_c_count(dbc, recnop)
u_int8_t *p, *pend;
dbp = dbc->dbp;
- hcp = (HASH_CURSOR *) dbc->internal;
+ mpf = dbp->mpf;
+ hcp = (HASH_CURSOR *)dbc->internal;
recno = 0;
if ((ret = __ham_get_cpage(dbc, DB_LOCK_READ)) != 0)
return (ret);
- switch (HPAGE_PTYPE(H_PAIRDATA(hcp->page, hcp->indx))) {
+ switch (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))) {
case H_KEYDATA:
case H_OFFPAGE:
recno = 1;
break;
case H_DUPLICATE:
- p = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx));
+ p = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx));
pend = p +
- LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx);
+ LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx);
for (; p < pend; recno++) {
/* p may be odd, so copy rather than just dereffing */
memcpy(&len, p, sizeof(db_indx_t));
@@ -654,14 +289,13 @@ __ham_c_count(dbc, recnop)
break;
default:
- ret = __db_unknown_type(dbp->dbenv, "__ham_c_count",
- HPAGE_PTYPE(H_PAIRDATA(hcp->page, hcp->indx)));
+ ret = __db_pgfmt(dbp->dbenv, hcp->pgno);
goto err;
}
*recnop = recno;
-err: if ((t_ret = memp_fput(dbc->dbp->mpf, hcp->page, 0)) != 0 && ret == 0)
+err: if ((t_ret = mpf->put(mpf, hcp->page, 0)) != 0 && ret == 0)
ret = t_ret;
hcp->page = NULL;
return (ret);
@@ -673,10 +307,12 @@ __ham_c_del(dbc)
{
DB *dbp;
DBT repldbt;
+ DB_MPOOLFILE *mpf;
HASH_CURSOR *hcp;
int ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
hcp = (HASH_CURSOR *)dbc->internal;
if (F_ISSET(hcp, H_DELETED))
@@ -689,12 +325,12 @@ __ham_c_del(dbc)
goto out;
/* Off-page duplicates. */
- if (HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP)
+ if (HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP)
goto out;
if (F_ISSET(hcp, H_ISDUP)) { /* On-page duplicate. */
if (hcp->dup_off == 0 &&
- DUP_SIZE(hcp->dup_len) == LEN_HDATA(hcp->page,
+ DUP_SIZE(hcp->dup_len) == LEN_HDATA(dbp, hcp->page,
hcp->hdr->dbmeta.pagesize, hcp->indx))
ret = __ham_del_pair(dbc, 1);
else {
@@ -703,21 +339,25 @@ __ham_c_del(dbc)
repldbt.doff = hcp->dup_off;
repldbt.dlen = DUP_SIZE(hcp->dup_len);
repldbt.size = 0;
- repldbt.data = HKEYDATA_DATA(H_PAIRDATA(hcp->page,
+ repldbt.data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page,
hcp->indx));
- ret = __ham_replpair(dbc, &repldbt, 0);
- hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
- F_SET(hcp, H_DELETED);
- ret = __ham_c_update(dbc, DUP_SIZE(hcp->dup_len), 0, 1);
+ if ((ret = __ham_replpair(dbc, &repldbt, 0)) == 0) {
+ hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
+ F_SET(hcp, H_DELETED);
+ ret = __ham_c_update(dbc,
+ DUP_SIZE(hcp->dup_len), 0, 1);
+ }
}
} else /* Not a duplicate */
ret = __ham_del_pair(dbc, 1);
-out: if (ret == 0 && hcp->page != NULL &&
- (t_ret = memp_fput(dbp->mpf, hcp->page, DB_MPOOL_DIRTY)) != 0)
- ret = t_ret;
- hcp->page = NULL;
+out: if (hcp->page != NULL) {
+ if ((t_ret = mpf->put(mpf,
+ hcp->page, ret == 0 ? DB_MPOOL_DIRTY : 0)) && ret == 0)
+ ret = t_ret;
+ hcp->page = NULL;
+ }
if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
ret = t_ret;
return (ret);
@@ -760,7 +400,7 @@ __ham_c_dup(orig_dbc, new_dbc)
* holds a lock of the correct type, so if we need a write lock and
* request it, we know that we'll get it.
*/
- if (orig->lock.off == LOCK_INVALID || orig_dbc->txn != NULL)
+ if (!LOCK_ISSET(orig->lock) || orig_dbc->txn != NULL)
return (0);
return (__ham_lock_bucket(new_dbc, DB_LOCK_READ));
@@ -775,12 +415,14 @@ __ham_c_get(dbc, key, data, flags, pgnop)
db_pgno_t *pgnop;
{
DB *dbp;
+ DB_MPOOLFILE *mpf;
HASH_CURSOR *hcp;
db_lockmode_t lock_type;
int get_key, ret, t_ret;
hcp = (HASH_CURSOR *)dbc->internal;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
/* Clear OR'd in additional bits so we can check for flag equality. */
if (F_ISSET(dbc, DBC_RMW))
@@ -827,6 +469,7 @@ __ham_c_get(dbc, key, data, flags, pgnop)
case DB_SET:
case DB_SET_RANGE:
case DB_GET_BOTH:
+ case DB_GET_BOTH_RANGE:
ret = __ham_lookup(dbc, key, 0, lock_type, pgnop);
get_key = 0;
break;
@@ -856,11 +499,11 @@ __ham_c_get(dbc, key, data, flags, pgnop)
goto err;
else if (F_ISSET(hcp, H_OK)) {
if (*pgnop == PGNO_INVALID)
- ret = __ham_dup_return (dbc, data, flags);
+ ret = __ham_dup_return(dbc, data, flags);
break;
} else if (!F_ISSET(hcp, H_NOMORE)) {
__db_err(dbp->dbenv,
- "H_NOMORE returned to __ham_c_get");
+ "H_NOMORE returned to __ham_c_get");
ret = EINVAL;
break;
}
@@ -872,7 +515,7 @@ __ham_c_get(dbc, key, data, flags, pgnop)
case DB_LAST:
case DB_PREV:
case DB_PREV_NODUP:
- ret = memp_fput(dbp->mpf, hcp->page, 0);
+ ret = mpf->put(mpf, hcp->page, 0);
hcp->page = NULL;
if (hcp->bucket == 0) {
ret = DB_NOTFOUND;
@@ -890,7 +533,7 @@ __ham_c_get(dbc, key, data, flags, pgnop)
case DB_FIRST:
case DB_NEXT:
case DB_NEXT_NODUP:
- ret = memp_fput(dbp->mpf, hcp->page, 0);
+ ret = mpf->put(mpf, hcp->page, 0);
hcp->page = NULL;
hcp->indx = NDX_INVALID;
hcp->bucket++;
@@ -907,6 +550,7 @@ __ham_c_get(dbc, key, data, flags, pgnop)
break;
case DB_GET_BOTH:
case DB_GET_BOTHC:
+ case DB_GET_BOTH_RANGE:
case DB_NEXT_DUP:
case DB_SET:
case DB_SET_RANGE:
@@ -940,6 +584,382 @@ err: if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
return (ret);
}
+/*
+ * __ham_bulk -- Return bulk data from a hash table.
+ */
+static int
+__ham_bulk(dbc, data, flags)
+ DBC *dbc;
+ DBT *data;
+ u_int32_t flags;
+{
+ DB *dbp;
+ DB_MPOOLFILE *mpf;
+ HASH_CURSOR *cp;
+ PAGE *pg;
+ db_indx_t dup_len, dup_off, dup_tlen, indx, *inp;
+ db_lockmode_t lock_mode;
+ db_pgno_t pgno;
+ int32_t *endp, key_off, *offp, *saveoff;
+ u_int32_t key_size, size, space;
+ u_int8_t *dbuf, *dp, *hk, *np, *tmp;
+ int is_dup, is_key;
+ int need_pg, next_key, no_dup, pagesize, ret, t_ret;
+
+ ret = 0;
+ key_off = 0;
+ dup_len = dup_off = dup_tlen = 0;
+ size = 0;
+ dbp = dbc->dbp;
+ pagesize = dbp->pgsize;
+ mpf = dbp->mpf;
+ cp = (HASH_CURSOR *)dbc->internal;
+ is_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1 : 0;
+ next_key = is_key && LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP;
+ no_dup = LF_ISSET(DB_OPFLAGS_MASK) == DB_NEXT_NODUP;
+ dbuf = data->data;
+ np = dp = dbuf;
+
+ /* Keep track of space that is left. There is an termination entry */
+ space = data->ulen;
+ space -= sizeof(*offp);
+
+ /* Build the offset/size table from the end up. */
+ endp = (int32_t *) ((u_int8_t *)dbuf + data->ulen);
+ endp--;
+ offp = endp;
+
+ key_size = 0;
+ lock_mode = F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE: DB_LOCK_READ;
+
+next_pg:
+ need_pg = 1;
+ indx = cp->indx;
+ pg = cp->page;
+ inp = P_INP(dbp, pg);
+
+ do {
+ if (is_key) {
+ hk = H_PAIRKEY(dbp, pg, indx);
+ if (HPAGE_PTYPE(hk) == H_OFFPAGE) {
+ memcpy(&key_size,
+ HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
+ memcpy(&pgno,
+ HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
+ size = key_size;
+ if (key_size > space)
+ goto get_key_space;
+ if ((ret = __bam_bulk_overflow(
+ dbc, key_size, pgno, np)) != 0)
+ return (ret);
+ space -= key_size;
+ key_off = (int32_t)(np - dbuf);
+ np += key_size;
+ } else {
+ if (need_pg) {
+ dp = np;
+ size = pagesize - HOFFSET(pg);
+ if (space < size) {
+get_key_space:
+ if (offp == endp) {
+ data->size =
+ ALIGN(size +
+ pagesize,
+ sizeof(u_int32_t));
+ return (ENOMEM);
+ }
+ goto back_up;
+ }
+ memcpy(dp,
+ (u_int8_t *)pg + HOFFSET(pg), size);
+ need_pg = 0;
+ space -= size;
+ np += size;
+ }
+ key_size = LEN_HKEY(dbp, pg, pagesize, indx);
+ key_off = (int32_t)(inp[indx] - HOFFSET(pg)
+ + dp - dbuf + SSZA(HKEYDATA, data));
+ }
+ }
+
+ hk = H_PAIRDATA(dbp, pg, indx);
+ switch (HPAGE_PTYPE(hk)) {
+ case H_DUPLICATE:
+ case H_KEYDATA:
+ if (need_pg) {
+ dp = np;
+ size = pagesize - HOFFSET(pg);
+ if (space < size) {
+back_up:
+ if (indx != 0) {
+ indx -= 2;
+ /* XXX
+ * It's not clear that this is
+ * the right way to fix this,
+ * but here goes.
+ * If we are backing up onto a
+ * duplicate, then we need to
+ * position ourselves at the
+ * end of the duplicate set.
+ * We probably need to make
+ * this work for H_OFFDUP too.
+ * It might be worth making a
+ * dummy cursor and calling
+ * __ham_item_prev.
+ */
+ tmp = H_PAIRDATA(dbp, pg, indx);
+ if (HPAGE_PTYPE(tmp) ==
+ H_DUPLICATE) {
+ dup_off = dup_tlen =
+ LEN_HDATA(dbp, pg,
+ pagesize, indx + 1);
+ memcpy(&dup_len,
+ HKEYDATA_DATA(tmp),
+ sizeof(db_indx_t));
+ }
+ goto get_space;
+ }
+ /* indx == 0 */
+ if ((ret = __ham_item_prev(dbc,
+ lock_mode, &pgno)) != 0) {
+ if (ret != DB_NOTFOUND)
+ return (ret);
+ if ((ret = mpf->put(mpf,
+ cp->page, 0)) != 0)
+ return (ret);
+ cp->page = NULL;
+ if (cp->bucket == 0) {
+ cp->indx = indx =
+ NDX_INVALID;
+ goto get_space;
+ }
+ if ((ret =
+ __ham_get_meta(dbc)) != 0)
+ return (ret);
+
+ cp->bucket--;
+ cp->pgno = BUCKET_TO_PAGE(cp,
+ cp->bucket);
+ cp->indx = NDX_INVALID;
+ if ((ret = __ham_release_meta(
+ dbc)) != 0)
+ return (ret);
+ if ((ret = __ham_item_prev(dbc,
+ lock_mode, &pgno)) != 0)
+ return (ret);
+ }
+ indx = cp->indx;
+get_space:
+ /*
+ * See if we put any data in the buffer.
+ */
+ if (offp >= endp ||
+ F_ISSET(dbc, DBC_TRANSIENT)) {
+ data->size = ALIGN(size +
+ data->ulen - space,
+ sizeof(u_int32_t));
+ return (ENOMEM);
+ }
+ /*
+ * Don't continue; we're all out
+ * of space, even though we're
+ * returning success.
+ */
+ next_key = 0;
+ break;
+ }
+ memcpy(dp, (u_int8_t *)pg + HOFFSET(pg), size);
+ need_pg = 0;
+ space -= size;
+ np += size;
+ }
+
+ /*
+ * We're about to crack the offset(s) and length(s)
+ * out of an H_KEYDATA or H_DUPLICATE item.
+ * There are three cases:
+ * 1. We were moved into a duplicate set by
+ * the standard hash cursor code. Respect
+ * the dup_off and dup_tlen we were given.
+ * 2. We stumbled upon a duplicate set while
+ * walking the page on our own. We need to
+ * recognize it as a dup and set dup_off and
+ * dup_tlen.
+ * 3. The current item is not a dup.
+ */
+ if (F_ISSET(cp, H_ISDUP)) {
+ /* Case 1 */
+ is_dup = 1;
+ dup_len = cp->dup_len;
+ dup_off = cp->dup_off;
+ dup_tlen = cp->dup_tlen;
+ } else if (HPAGE_PTYPE(hk) == H_DUPLICATE) {
+ /* Case 2 */
+ is_dup = 1;
+ /*
+ * If we run out of memory and bail,
+ * make sure the fact we're in a dup set
+ * isn't ignored later.
+ */
+ F_SET(cp, H_ISDUP);
+ dup_off = 0;
+ memcpy(&dup_len,
+ HKEYDATA_DATA(hk), sizeof(db_indx_t));
+ dup_tlen = LEN_HDATA(dbp, pg, pagesize, indx);
+ } else
+ /* Case 3 */
+ is_dup = dup_len = dup_off = dup_tlen = 0;
+
+ do {
+ space -= (is_key ? 4 : 2) * sizeof(*offp);
+ size += (is_key ? 4 : 2) * sizeof(*offp);
+ /*
+ * Since space is an unsigned, if we happen
+ * to wrap, then this comparison will turn out
+ * to be true. XXX Wouldn't it be better to
+ * simply check above that space is greater than
+ * the value we're about to subtract???
+ */
+ if (space > data->ulen) {
+ if (!is_dup || dup_off == 0)
+ goto back_up;
+ dup_off -= (db_indx_t)DUP_SIZE(offp[1]);
+ goto get_space;
+ }
+ if (is_key) {
+ *offp-- = key_off;
+ *offp-- = key_size;
+ }
+ if (is_dup) {
+ *offp-- = (int32_t)(
+ inp[indx + 1] - HOFFSET(pg) +
+ dp - dbuf + SSZA(HKEYDATA, data) +
+ dup_off + sizeof(db_indx_t));
+ memcpy(&dup_len,
+ HKEYDATA_DATA(hk) + dup_off,
+ sizeof(db_indx_t));
+ dup_off += DUP_SIZE(dup_len);
+ *offp-- = dup_len;
+ } else {
+ *offp-- = (int32_t)(
+ inp[indx + 1] - HOFFSET(pg) +
+ dp - dbuf + SSZA(HKEYDATA, data));
+ *offp-- = LEN_HDATA(dbp, pg,
+ pagesize, indx);
+ }
+ } while (is_dup && dup_off < dup_tlen && no_dup == 0);
+ F_CLR(cp, H_ISDUP);
+ break;
+ case H_OFFDUP:
+ memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
+ space -= 2 * sizeof(*offp);
+ if (space > data->ulen)
+ goto back_up;
+
+ if (is_key) {
+ space -= 2 * sizeof(*offp);
+ if (space > data->ulen)
+ goto back_up;
+ *offp-- = key_off;
+ *offp-- = key_size;
+ }
+ saveoff = offp;
+ if ((ret = __bam_bulk_duplicates(dbc,
+ pgno, dbuf, is_key ? offp + 2 : NULL,
+ &offp, &np, &space, no_dup)) != 0) {
+ if (ret == ENOMEM) {
+ size = space;
+ if (is_key && saveoff == offp) {
+ offp += 2;
+ goto back_up;
+ }
+ goto get_space;
+ }
+ return (ret);
+ }
+ break;
+ case H_OFFPAGE:
+ space -= (is_key ? 4 : 2) * sizeof(*offp);
+ if (space > data->ulen)
+ goto back_up;
+
+ memcpy(&size, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
+ memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
+ if (size > space)
+ goto back_up;
+
+ if ((ret =
+ __bam_bulk_overflow(dbc, size, pgno, np)) != 0)
+ return (ret);
+
+ if (is_key) {
+ *offp-- = key_off;
+ *offp-- = key_size;
+ }
+
+ *offp-- = (int32_t)(np - dbuf);
+ *offp-- = size;
+
+ np += size;
+ space -= size;
+ break;
+ }
+ } while (next_key && (indx += 2) < NUM_ENT(pg));
+
+ cp->indx = indx;
+ cp->dup_len = dup_len;
+ cp->dup_off = dup_off;
+ cp->dup_tlen = dup_tlen;
+
+ /* If we are off the page then try to the next page. */
+ if (ret == 0 && next_key && indx >= NUM_ENT(pg)) {
+ if ((ret = __ham_item_next(dbc, lock_mode, &pgno)) == 0)
+ goto next_pg;
+ if (ret != DB_NOTFOUND)
+ return (ret);
+ if ((ret = mpf->put(dbc->dbp->mpf, cp->page, 0)) != 0)
+ return (ret);
+ cp->page = NULL;
+ if ((ret = __ham_get_meta(dbc)) != 0)
+ return (ret);
+
+ cp->bucket++;
+ if (cp->bucket > cp->hdr->max_bucket) {
+ /*
+ * Restore cursor to its previous state. We're past
+ * the last item in the last bucket, so the next
+ * DBC->c_get(DB_NEXT) will return DB_NOTFOUND.
+ */
+ cp->bucket--;
+ ret = DB_NOTFOUND;
+ } else {
+ /*
+ * Start on the next bucket.
+ *
+ * Note that if this new bucket happens to be empty,
+ * but there's another non-empty bucket after it,
+ * we'll return early. This is a rare case, and we
+ * don't guarantee any particular number of keys
+ * returned on each call, so just let the next call
+ * to bulk get move forward by yet another bucket.
+ */
+ cp->pgno = BUCKET_TO_PAGE(cp, cp->bucket);
+ cp->indx = NDX_INVALID;
+ F_CLR(cp, H_ISDUP);
+ ret = __ham_item_next(dbc, lock_mode, &pgno);
+ }
+
+ if ((t_ret = __ham_release_meta(dbc)) != 0)
+ return (t_ret);
+ if (ret == 0)
+ goto next_pg;
+ if (ret != DB_NOTFOUND)
+ return (ret);
+ }
+ *offp = (u_int32_t) -1;
+ return (0);
+}
+
static int
__ham_c_put(dbc, key, data, flags, pgnop)
DBC *dbc;
@@ -949,6 +969,7 @@ __ham_c_put(dbc, key, data, flags, pgnop)
db_pgno_t *pgnop;
{
DB *dbp;
+ DB_MPOOLFILE *mpf;
DBT tmp_val, *myval;
HASH_CURSOR *hcp;
u_int32_t nbytes;
@@ -962,6 +983,7 @@ __ham_c_put(dbc, key, data, flags, pgnop)
COMPQUIET(myval, NULL);
dbp = dbc->dbp;
+ mpf = dbp->mpf;
hcp = (HASH_CURSOR *)dbc->internal;
if (F_ISSET(hcp, H_DELETED) &&
@@ -984,8 +1006,7 @@ __ham_c_put(dbc, key, data, flags, pgnop)
ret = 0;
if (hcp->seek_found_page != PGNO_INVALID &&
hcp->seek_found_page != hcp->pgno) {
- if ((ret = memp_fput(dbp->mpf, hcp->page, 0))
- != 0)
+ if ((ret = mpf->put(mpf, hcp->page, 0)) != 0)
goto err2;
hcp->page = NULL;
hcp->pgno = hcp->seek_found_page;
@@ -1000,9 +1021,10 @@ __ham_c_put(dbc, key, data, flags, pgnop)
* and then write the new bytes represented by
* val.
*/
- if ((ret = __ham_init_dbt(dbp->dbenv,
- &tmp_val, data->size + data->doff,
- &dbc->rdata.data, &dbc->rdata.ulen)) == 0) {
+ if ((ret = __ham_init_dbt(dbp->dbenv, &tmp_val,
+ data->size + data->doff,
+ &dbc->my_rdata.data,
+ &dbc->my_rdata.ulen)) == 0) {
memset(tmp_val.data, 0, data->doff);
memcpy((u_int8_t *)tmp_val.data +
data->doff, data->data, data->size);
@@ -1038,8 +1060,8 @@ done: if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
F_CLR(hcp, H_EXPAND);
}
- if (ret == 0 &&
- (t_ret = memp_fset(dbp->mpf, hcp->page, DB_MPOOL_DIRTY)) != 0)
+ if (hcp->page != NULL &&
+ (t_ret = mpf->set(mpf, hcp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
err2: if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
@@ -1058,17 +1080,30 @@ __ham_expand_table(dbc)
DBC *dbc;
{
DB *dbp;
- PAGE *h;
+ DB_LOCK metalock;
+ DB_LSN lsn;
+ DB_MPOOLFILE *mpf;
+ DBMETA *mmeta;
HASH_CURSOR *hcp;
- db_pgno_t pgno;
- u_int32_t old_bucket, new_bucket;
- int ret;
+ PAGE *h;
+ db_pgno_t pgno, mpgno;
+ u_int32_t newalloc, new_bucket, old_bucket;
+ int dirty_meta, got_meta, logn, new_double, ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
hcp = (HASH_CURSOR *)dbc->internal;
if ((ret = __ham_dirty_meta(dbc)) != 0)
return (ret);
+ LOCK_INIT(metalock);
+ mmeta = (DBMETA *) hcp->hdr;
+ mpgno = mmeta->pgno;
+ h = NULL;
+ dirty_meta = 0;
+ got_meta = 0;
+ newalloc = 0;
+
/*
* If the split point is about to increase, make sure that we
* have enough extra pages. The calculation here is weird.
@@ -1078,86 +1113,116 @@ __ham_expand_table(dbc)
* see what the log of one greater than that is; here we have to
* look at the log of max + 2. VERY NASTY STUFF.
*
- * It just got even nastier. With subdatabases, we have to request
- * a chunk of contiguous pages, so we do that here using an
- * undocumented feature of mpool (the MPOOL_NEW_GROUP flag) to
- * give us a number of contiguous pages. Ouch.
+ * We figure out what we need to do, then we log it, then request
+ * the pages from mpool. We don't want to fail after extending
+ * the file.
+ *
+ * If the page we are about to split into has already been allocated,
+ * then we simply need to get it to get its LSN. If it hasn't yet
+ * been allocated, then we know it's LSN (0,0).
*/
- if (hcp->hdr->max_bucket == hcp->hdr->high_mask) {
- /*
- * Ask mpool to give us a set of contiguous page numbers
- * large enough to contain the next doubling.
- *
- * Figure out how many new pages we need. This will return
- * us the last page. We calculate its page number, initialize
- * the page and then write it back to reserve all the pages
- * in between. It is possible that the allocation of new pages
- * has already been done, but the tranaction aborted. Since
- * we don't undo the allocation, check for a valid pgno before
- * doing the allocation.
- */
- pgno = hcp->hdr->max_bucket + 1;
- if (hcp->hdr->spares[__db_log2(pgno) + 1] == PGNO_INVALID)
- /* Allocate a group of pages. */
- ret = memp_fget(dbp->mpf,
- &pgno, DB_MPOOL_NEW_GROUP, &h);
- else {
- /* Just read in the last page of the batch */
- pgno = hcp->hdr->spares[__db_log2(pgno) + 1] +
- hcp->hdr->max_bucket + 1;
- /* Move to the last page of the group. */
- pgno += hcp->hdr->max_bucket;
- ret = memp_fget(dbp->mpf,
- &pgno, DB_MPOOL_CREATE, &h);
- }
- if (ret != 0)
- return (ret);
- P_INIT(h, dbp->pgsize, pgno,
- PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
- pgno -= hcp->hdr->max_bucket;
- } else {
- pgno = BUCKET_TO_PAGE(hcp, hcp->hdr->max_bucket + 1);
+ new_bucket = hcp->hdr->max_bucket + 1;
+ old_bucket = new_bucket & hcp->hdr->low_mask;
+
+ new_double = hcp->hdr->max_bucket == hcp->hdr->high_mask;
+ logn = __db_log2(new_bucket);
+
+ if (!new_double || hcp->hdr->spares[logn + 1] != PGNO_INVALID) {
+ /* Page exists; get it so we can get its LSN */
+ pgno = BUCKET_TO_PAGE(hcp, new_bucket);
if ((ret =
- memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, &h)) != 0)
- return (ret);
+ mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &h)) != 0)
+ goto err;
+ lsn = h->lsn;
+ } else {
+ /* Get the master meta-data page to do allocation. */
+ if (F_ISSET(dbp, DB_AM_SUBDB)) {
+ mpgno = PGNO_BASE_MD;
+ if ((ret = __db_lget(dbc,
+ 0, mpgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
+ goto err;
+ if ((ret =
+ mpf->get(mpf, &mpgno, 0, (PAGE **)&mmeta)) != 0)
+ goto err;
+ got_meta = 1;
+ }
+ pgno = mmeta->last_pgno + 1;
+ ZERO_LSN(lsn);
+ newalloc = 1;
}
- /* Now we can log the meta-data split. */
- if (DB_LOGGING(dbc)) {
- if ((ret = __ham_metagroup_log(dbp->dbenv,
- dbc->txn, &h->lsn, 0, dbp->log_fileid,
- hcp->hdr->max_bucket, pgno, &hcp->hdr->dbmeta.lsn,
- &h->lsn)) != 0) {
- (void)memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
- return (ret);
- }
+ /* Log the meta-data split first. */
+ if (DBC_LOGGING(dbc)) {
+ /*
+ * We always log the page number of the first page of
+ * the allocation group. However, the LSN that we log
+ * is either the LSN on the first page (if we did not
+ * do the actual allocation here) or the LSN on the last
+ * page of the unit (if we did do the allocation here).
+ */
+ if ((ret = __ham_metagroup_log(dbp, dbc->txn,
+ &lsn, 0, hcp->hdr->max_bucket, mpgno, &mmeta->lsn,
+ hcp->hdr->dbmeta.pgno, &hcp->hdr->dbmeta.lsn,
+ pgno, &lsn, newalloc)) != 0)
+ goto err;
+ } else
+ LSN_NOT_LOGGED(lsn);
- hcp->hdr->dbmeta.lsn = h->lsn;
- }
+ hcp->hdr->dbmeta.lsn = lsn;
- /* If we allocated some new pages, write out the last page. */
- if ((ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
- return (ret);
+ if (new_double && hcp->hdr->spares[logn + 1] == PGNO_INVALID) {
+ /*
+ * We need to begin a new doubling and we have not allocated
+ * any pages yet. Read the last page in and initialize it to
+ * make the allocation contiguous. The pgno we calculated
+ * above is the first page allocated. The entry in spares is
+ * that page number minus any buckets already allocated (it
+ * simplifies bucket to page transaction). After we've set
+ * that, we calculate the last pgno.
+ */
+
+ hcp->hdr->spares[logn + 1] = pgno - new_bucket;
+ pgno += hcp->hdr->max_bucket;
+ mmeta->last_pgno = pgno;
+ mmeta->lsn = lsn;
+ dirty_meta = DB_MPOOL_DIRTY;
- new_bucket = ++hcp->hdr->max_bucket;
- old_bucket = (hcp->hdr->max_bucket & hcp->hdr->low_mask);
+ if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &h)) != 0)
+ goto err;
+
+ P_INIT(h, dbp->pgsize,
+ pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+ }
+
+ /* Write out whatever page we ended up modifying. */
+ h->lsn = lsn;
+ if ((ret = mpf->put(mpf, h, DB_MPOOL_DIRTY)) != 0)
+ goto err;
+ h = NULL;
/*
- * If we started a new doubling, fill in the spares array with
- * the starting page number negatively offset by the bucket number.
+ * Update the meta-data page of this hash database.
*/
- if (new_bucket > hcp->hdr->high_mask) {
- /* Starting a new doubling */
+ hcp->hdr->max_bucket = new_bucket;
+ if (new_double) {
hcp->hdr->low_mask = hcp->hdr->high_mask;
hcp->hdr->high_mask = new_bucket | hcp->hdr->low_mask;
- if (hcp->hdr->spares[__db_log2(new_bucket) + 1] == PGNO_INVALID)
- hcp->hdr->spares[__db_log2(new_bucket) + 1] =
- pgno - new_bucket;
}
/* Relocate records to the new bucket */
- return (__ham_split_page(dbc, old_bucket, new_bucket));
+ ret = __ham_split_page(dbc, old_bucket, new_bucket);
+
+err: if (got_meta)
+ (void)mpf->put(mpf, mmeta, dirty_meta);
+
+ if (LOCK_ISSET(metalock))
+ (void)__TLPUT(dbc, metalock);
+
+ if (h != NULL)
+ (void)mpf->put(mpf, h, 0);
+
+ return (ret);
}
/*
@@ -1191,7 +1256,7 @@ __ham_call_hash(dbc, k, len)
* everything held by the cursor.
*/
static int
-__ham_dup_return (dbc, val, flags)
+__ham_dup_return(dbc, val, flags)
DBC *dbc;
DBT *val;
u_int32_t flags;
@@ -1211,7 +1276,7 @@ __ham_dup_return (dbc, val, flags)
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
ndx = H_DATAINDEX(hcp->indx);
- type = HPAGE_TYPE(hcp->page, ndx);
+ type = HPAGE_TYPE(dbp, hcp->page, ndx);
pp = hcp->page;
myval = val;
@@ -1228,8 +1293,8 @@ __ham_dup_return (dbc, val, flags)
DB_ASSERT(type != H_OFFDUP);
/* Case 1 */
- if (type != H_DUPLICATE &&
- flags != DB_GET_BOTH && flags != DB_GET_BOTHC)
+ if (type != H_DUPLICATE && flags != DB_GET_BOTH &&
+ flags != DB_GET_BOTHC && flags != DB_GET_BOTH_RANGE)
return (0);
/*
@@ -1239,11 +1304,11 @@ __ham_dup_return (dbc, val, flags)
*/
if (!F_ISSET(hcp, H_ISDUP) && type == H_DUPLICATE) {
F_SET(hcp, H_ISDUP);
- hcp->dup_tlen = LEN_HDATA(hcp->page,
+ hcp->dup_tlen = LEN_HDATA(dbp, hcp->page,
hcp->hdr->dbmeta.pagesize, hcp->indx);
- hk = H_PAIRDATA(hcp->page, hcp->indx);
- if (flags == DB_LAST
- || flags == DB_PREV || flags == DB_PREV_NODUP) {
+ hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
+ if (flags == DB_LAST ||
+ flags == DB_PREV || flags == DB_PREV_NODUP) {
hcp->dup_off = 0;
do {
memcpy(&len,
@@ -1265,7 +1330,8 @@ __ham_dup_return (dbc, val, flags)
* may need to adjust the cursor before returning data.
* Case 4
*/
- if (flags == DB_GET_BOTH || flags == DB_GET_BOTHC) {
+ if (flags == DB_GET_BOTH ||
+ flags == DB_GET_BOTHC || flags == DB_GET_BOTH_RANGE) {
if (F_ISSET(hcp, H_ISDUP)) {
/*
* If we're doing a join, search forward from the
@@ -1274,7 +1340,7 @@ __ham_dup_return (dbc, val, flags)
if (flags == DB_GET_BOTHC)
F_SET(hcp, H_CONTINUE);
- __ham_dsearch(dbc, val, &off, &cmp);
+ __ham_dsearch(dbc, val, &off, &cmp, flags);
/*
* This flag is set nowhere else and is safe to
@@ -1283,7 +1349,7 @@ __ham_dup_return (dbc, val, flags)
F_CLR(hcp, H_CONTINUE);
hcp->dup_off = off;
} else {
- hk = H_PAIRDATA(hcp->page, hcp->indx);
+ hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
if (((HKEYDATA *)hk)->type == H_OFFPAGE) {
memcpy(&tlen,
HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
@@ -1298,7 +1364,7 @@ __ham_dup_return (dbc, val, flags)
* routines may only look at data and size.
*/
tmp_val.data = HKEYDATA_DATA(hk);
- tmp_val.size = LEN_HDATA(hcp->page,
+ tmp_val.size = LEN_HDATA(dbp, hcp->page,
dbp->pgsize, hcp->indx);
cmp = dbp->dup_compare == NULL ?
__bam_defcmp(dbp, &tmp_val, val) :
@@ -1311,6 +1377,18 @@ __ham_dup_return (dbc, val, flags)
}
/*
+ * If we're doing a bulk get, we don't want to actually return
+ * the data: __ham_bulk will take care of cracking out the
+ * duplicates appropriately.
+ *
+ * The rest of this function calculates partial offsets and
+ * handles the actual __db_ret, so just return if
+ * DB_MULTIPLE(_KEY) is set.
+ */
+ if (F_ISSET(dbc, DBC_MULTIPLE | DBC_MULTIPLE_KEY))
+ return (0);
+
+ /*
* Now, everything is initialized, grab a duplicate if
* necessary.
*/
@@ -1351,8 +1429,8 @@ __ham_dup_return (dbc, val, flags)
* Finally, if we had a duplicate, pp, ndx, and myval should be
* set appropriately.
*/
- if ((ret = __db_ret(dbp, pp, ndx, myval, &dbc->rdata.data,
- &dbc->rdata.ulen)) != 0)
+ if ((ret = __db_ret(dbp, pp, ndx, myval, &dbc->rdata->data,
+ &dbc->rdata->ulen)) != 0)
return (ret);
/*
@@ -1374,6 +1452,7 @@ __ham_overwrite(dbc, nval, flags)
u_int32_t flags;
{
DB *dbp;
+ DB_ENV *dbenv;
HASH_CURSOR *hcp;
DBT *myval, tmp_val, tmp_val2;
void *newrec;
@@ -1383,6 +1462,7 @@ __ham_overwrite(dbc, nval, flags)
int ret;
dbp = dbc->dbp;
+ dbenv = dbp->dbenv;
hcp = (HASH_CURSOR *)dbc->internal;
if (F_ISSET(hcp, H_ISDUP)) {
/*
@@ -1399,7 +1479,7 @@ __ham_overwrite(dbc, nval, flags)
*/
memset(&tmp_val, 0, sizeof(tmp_val));
if ((ret =
- __ham_dup_return (dbc, &tmp_val, DB_CURRENT)) != 0)
+ __ham_dup_return(dbc, &tmp_val, DB_CURRENT)) != 0)
return (ret);
/* Figure out new size. */
@@ -1435,7 +1515,7 @@ __ham_overwrite(dbc, nval, flags)
}
if ((ret = __os_malloc(dbp->dbenv,
- DUP_SIZE(newsize), NULL, &newrec)) != 0)
+ DUP_SIZE(newsize), &newrec)) != 0)
return (ret);
memset(&tmp_val2, 0, sizeof(tmp_val2));
F_SET(&tmp_val2, DB_DBT_PARTIAL);
@@ -1483,8 +1563,7 @@ __ham_overwrite(dbc, nval, flags)
tmp_val2.size = newsize;
if (dbp->dup_compare(
dbp, &tmp_val, &tmp_val2) != 0) {
- (void)__os_free(newrec,
- DUP_SIZE(newsize));
+ (void)__os_free(dbenv, newrec);
return (__db_duperr(dbp, flags));
}
}
@@ -1495,7 +1574,7 @@ __ham_overwrite(dbc, nval, flags)
tmp_val2.dlen = DUP_SIZE(hcp->dup_len);
ret = __ham_replpair(dbc, &tmp_val2, 0);
- (void)__os_free(newrec, DUP_SIZE(newsize));
+ (void)__os_free(dbenv, newrec);
/* Update cursor */
if (ret != 0)
@@ -1520,7 +1599,7 @@ __ham_overwrite(dbc, nval, flags)
/* Make sure we maintain sort order. */
if (dbp->dup_compare != NULL) {
tmp_val2.data =
- HKEYDATA_DATA(H_PAIRDATA(hcp->page,
+ HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page,
hcp->indx)) + hcp->dup_off +
sizeof(db_indx_t);
tmp_val2.size = hcp->dup_len;
@@ -1529,8 +1608,8 @@ __ham_overwrite(dbc, nval, flags)
}
/* Overwriting a complete duplicate. */
if ((ret =
- __ham_make_dup(dbp->dbenv, nval,
- &tmp_val, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
+ __ham_make_dup(dbp->dbenv, nval, &tmp_val,
+ &dbc->my_rdata.data, &dbc->my_rdata.ulen)) != 0)
return (ret);
/* Now fix what we are replacing. */
tmp_val.doff = hcp->dup_off;
@@ -1541,7 +1620,7 @@ __ham_overwrite(dbc, nval, flags)
hcp->dup_tlen += (nval->size - hcp->dup_len);
else
hcp->dup_tlen -= (hcp->dup_len - nval->size);
- hcp->dup_len = DUP_SIZE(nval->size);
+ hcp->dup_len = (db_indx_t)DUP_SIZE(nval->size);
}
myval = &tmp_val;
} else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
@@ -1549,12 +1628,12 @@ __ham_overwrite(dbc, nval, flags)
memcpy(&tmp_val, nval, sizeof(*nval));
F_SET(&tmp_val, DB_DBT_PARTIAL);
tmp_val.doff = 0;
- hk = H_PAIRDATA(hcp->page, hcp->indx);
+ hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
if (HPAGE_PTYPE(hk) == H_OFFPAGE)
memcpy(&tmp_val.dlen,
HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
else
- tmp_val.dlen = LEN_HDATA(hcp->page,
+ tmp_val.dlen = LEN_HDATA(dbp, hcp->page,
hcp->hdr->dbmeta.pagesize, hcp->indx);
myval = &tmp_val;
} else
@@ -1601,7 +1680,7 @@ __ham_lookup(dbc, key, sought, mode, pgnop)
hcp->bucket = __ham_call_hash(dbc, (u_int8_t *)key->data, key->size);
hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
- while (1) {
+ for (;;) {
*pgnop = PGNO_INVALID;
if ((ret = __ham_item_next(dbc, mode, pgnop)) != 0)
return (ret);
@@ -1609,7 +1688,7 @@ __ham_lookup(dbc, key, sought, mode, pgnop)
if (F_ISSET(hcp, H_NOMORE))
break;
- hk = H_PAIRKEY(hcp->page, hcp->indx);
+ hk = H_PAIRKEY(dbp, hcp->page, hcp->indx);
switch (HPAGE_PTYPE(hk)) {
case H_OFFPAGE:
memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
@@ -1625,12 +1704,12 @@ __ham_lookup(dbc, key, sought, mode, pgnop)
break;
case H_KEYDATA:
if (key->size ==
- LEN_HKEY(hcp->page, dbp->pgsize, hcp->indx) &&
+ LEN_HKEY(dbp, hcp->page, dbp->pgsize, hcp->indx) &&
memcmp(key->data,
HKEYDATA_DATA(hk), key->size) == 0) {
/* Found the key, check for data type. */
found_key: F_SET(hcp, H_OK);
- dk = H_PAIRDATA(hcp->page, hcp->indx);
+ dk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
if (HPAGE_PTYPE(dk) == H_OFFDUP)
memcpy(pgnop, HOFFDUP_PGNO(dk),
sizeof(db_pgno_t));
@@ -1643,7 +1722,7 @@ found_key: F_SET(hcp, H_OK);
* These are errors because keys are never
* duplicated, only data items are.
*/
- return (__db_pgfmt(dbp, PGNO(hcp->page)));
+ return (__db_pgfmt(dbp->dbenv, PGNO(hcp->page)));
}
}
@@ -1677,7 +1756,7 @@ __ham_init_dbt(dbenv, dbt, size, bufp, sizep)
memset(dbt, 0, sizeof(*dbt));
if (*sizep < size) {
- if ((ret = __os_realloc(dbenv, size, NULL, bufp)) != 0) {
+ if ((ret = __os_realloc(dbenv, size, bufp)) != 0) {
*sizep = 0;
return (ret);
}
@@ -1732,8 +1811,8 @@ __ham_c_update(dbc, len, add, is_dup)
MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
/*
- * Calcuate the order of this deleted record.
- * This will be one grater than any cursor that is pointing
+ * Calculate the order of this deleted record.
+ * This will be one greater than any cursor that is pointing
* at this record and already marked as deleted.
*/
order = 0;
@@ -1749,11 +1828,11 @@ __ham_c_update(dbc, len, add, is_dup)
continue;
lcp = (HASH_CURSOR *)cp->internal;
if (F_ISSET(lcp, H_DELETED) &&
- hcp->pgno == lcp->pgno &&
- hcp->indx == lcp->indx &&
- order <= lcp->order &&
- (!is_dup || hcp->dup_off == lcp->dup_off))
- order = lcp->order +1;
+ hcp->pgno == lcp->pgno &&
+ hcp->indx == lcp->indx &&
+ order <= lcp->order &&
+ (!is_dup || hcp->dup_off == lcp->dup_off))
+ order = lcp->order + 1;
}
MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
}
@@ -1788,8 +1867,8 @@ __ham_c_update(dbc, len, add, is_dup)
* We are "undeleting" so unmark all
* cursors with the same order.
*/
- if (lcp->indx == hcp->indx
- && F_ISSET(lcp, H_DELETED)) {
+ if (lcp->indx == hcp->indx &&
+ F_ISSET(lcp, H_DELETED)) {
if (lcp->order == hcp->order)
F_CLR(lcp, H_DELETED);
else if (lcp->order >
@@ -1815,12 +1894,13 @@ __ham_c_update(dbc, len, add, is_dup)
} else {
if (lcp->indx > hcp->indx) {
lcp->indx -= 2;
- if (lcp->indx == hcp->indx
- && F_ISSET(lcp, H_DELETED))
+ if (lcp->indx == hcp->indx &&
+ F_ISSET(lcp, H_DELETED))
lcp->order += order;
- } else if (lcp->indx == hcp->indx
- && !F_ISSET(lcp, H_DELETED)) {
+ } else if (lcp->indx == hcp->indx &&
+ !F_ISSET(lcp, H_DELETED)) {
F_SET(lcp, H_DELETED);
+ F_CLR(lcp, H_ISDUP);
lcp->order = order;
}
}
@@ -1833,10 +1913,10 @@ __ham_c_update(dbc, len, add, is_dup)
*/
if (add) {
lcp->dup_tlen += len;
- if (lcp->dup_off == hcp->dup_off
- && F_ISSET(hcp, H_DELETED)
- && F_ISSET(lcp, H_DELETED)) {
- /* Abort of a delete. */
+ if (lcp->dup_off == hcp->dup_off &&
+ F_ISSET(hcp, H_DELETED) &&
+ F_ISSET(lcp, H_DELETED)) {
+ /* Abort of a delete. */
if (lcp->order == hcp->order)
F_CLR(lcp, H_DELETED);
else if (lcp->order >
@@ -1851,8 +1931,9 @@ __ham_c_update(dbc, len, add, is_dup)
lcp->dup_tlen -= len;
if (lcp->dup_off > hcp->dup_off) {
lcp->dup_off -= len;
- if (lcp->dup_off == hcp->dup_off
- && F_ISSET(lcp, H_DELETED))
+ if (lcp->dup_off ==
+ hcp->dup_off &&
+ F_ISSET(lcp, H_DELETED))
lcp->order += order;
} else if (lcp->dup_off ==
hcp->dup_off &&
@@ -1867,10 +1948,9 @@ __ham_c_update(dbc, len, add, is_dup)
}
MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
- if (found != 0 && DB_LOGGING(dbc)) {
- if ((ret = __ham_curadj_log(dbenv,
- my_txn, &lsn, 0, dbp->log_fileid, hcp->pgno,
- hcp->indx, len, hcp->dup_off, add, is_dup, order)) != 0)
+ if (found != 0 && DBC_LOGGING(dbc)) {
+ if ((ret = __ham_curadj_log(dbp, my_txn, &lsn, 0, hcp->pgno,
+ hcp->indx, len, hcp->dup_off, add, is_dup, order)) != 0)
return (ret);
}
@@ -1885,13 +1965,12 @@ __ham_c_update(dbc, len, add, is_dup)
* cursors on a split. The latter is so we can update cursors when we
* move items off page.
*
- * PUBLIC: int __ham_get_clist __P((DB *,
- * PUBLIC: db_pgno_t, u_int32_t, DBC ***));
+ * PUBLIC: int __ham_get_clist __P((DB *, db_pgno_t, u_int32_t, DBC ***));
*/
int
-__ham_get_clist(dbp, bucket, indx, listp)
+__ham_get_clist(dbp, pgno, indx, listp)
DB *dbp;
- db_pgno_t bucket;
+ db_pgno_t pgno;
u_int32_t indx;
DBC ***listp;
{
@@ -1915,18 +1994,20 @@ __ham_get_clist(dbp, bucket, indx, listp)
MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
cp = TAILQ_NEXT(cp, links))
- if (cp->dbtype == DB_HASH &&
- ((indx == NDX_INVALID &&
- ((HASH_CURSOR *)(cp->internal))->bucket
- == bucket) || (indx != NDX_INVALID &&
- cp->internal->pgno == bucket &&
- cp->internal->indx == indx))) {
+ /*
+ * We match if cp->pgno matches the specified
+ * pgno, and if either the cp->indx matches
+ * or we weren't given an index.
+ */
+ if (cp->internal->pgno == pgno &&
+ (indx == NDX_INVALID ||
+ cp->internal->indx == indx)) {
if (nused >= nalloc) {
nalloc += 10;
if ((ret = __os_realloc(dbp->dbenv,
nalloc * sizeof(HASH_CURSOR *),
- NULL, listp)) != 0)
- return (ret);
+ listp)) != 0)
+ goto err;
}
(*listp)[nused++] = cp;
}
@@ -1939,74 +2020,25 @@ __ham_get_clist(dbp, bucket, indx, listp)
if (nused >= nalloc) {
nalloc++;
if ((ret = __os_realloc(dbp->dbenv,
- nalloc * sizeof(HASH_CURSOR *), NULL, listp)) != 0)
+ nalloc * sizeof(HASH_CURSOR *), listp)) != 0)
return (ret);
}
(*listp)[nused] = NULL;
}
return (0);
-}
-
-static int
-__ham_del_dups(orig_dbc, key)
- DBC *orig_dbc;
- DBT *key;
-{
- DBC *dbc;
- DBT data, lkey;
- int ret, t_ret;
-
- /* Allocate a cursor. */
- if ((ret = orig_dbc->c_dup(orig_dbc, &dbc, 0)) != 0)
- return (ret);
-
- /*
- * Walk a cursor through the key/data pairs, deleting as we go. Set
- * the DB_DBT_USERMEM flag, as this might be a threaded application
- * and the flags checking will catch us. We don't actually want the
- * keys or data, so request a partial of length 0.
- */
- memset(&lkey, 0, sizeof(lkey));
- F_SET(&lkey, DB_DBT_USERMEM | DB_DBT_PARTIAL);
- memset(&data, 0, sizeof(data));
- F_SET(&data, DB_DBT_USERMEM | DB_DBT_PARTIAL);
-
- /* Walk through the set of key/data pairs, deleting as we go. */
- if ((ret = dbc->c_get(dbc, key, &data, DB_SET)) != 0) {
- if (ret == DB_NOTFOUND)
- ret = 0;
- goto err;
- }
-
- for (;;) {
- if ((ret = dbc->c_del(dbc, 0)) != 0)
- goto err;
- if ((ret = dbc->c_get(dbc, &lkey, &data, DB_NEXT_DUP)) != 0) {
- if (ret == DB_NOTFOUND) {
- ret = 0;
- break;
- }
- goto err;
- }
- }
-
-err: /*
- * Discard the cursor. This will cause the underlying off-page dup
- * tree to go away as well as the actual entry on the page.
- */
- if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
+err:
+ MUTEX_THREAD_UNLOCK(dbp->dbenv, dbp->mutexp);
+ MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
return (ret);
-
}
static int
__ham_c_writelock(dbc)
DBC *dbc;
{
- HASH_CURSOR *hcp;
+ DB_ENV *dbenv;
DB_LOCK tmp_lock;
+ HASH_CURSOR *hcp;
int ret;
/*
@@ -2017,79 +2049,13 @@ __ham_c_writelock(dbc)
return (0);
hcp = (HASH_CURSOR *)dbc->internal;
- if ((hcp->lock.off == LOCK_INVALID || hcp->lock_mode == DB_LOCK_READ)) {
+ if ((!LOCK_ISSET(hcp->lock) || hcp->lock_mode == DB_LOCK_READ)) {
tmp_lock = hcp->lock;
if ((ret = __ham_lock_bucket(dbc, DB_LOCK_WRITE)) != 0)
return (ret);
- if (tmp_lock.off != LOCK_INVALID &&
- (ret = lock_put(dbc->dbp->dbenv, &tmp_lock)) != 0)
- return (ret);
- }
- return (0);
-}
-
-/*
- * __ham_c_chgpg --
- *
- * Adjust the cursors after moving an item from one page to another.
- * If the old_index is NDX_INVALID, that means that we copied the
- * page wholesale and we're leaving indices intact and just changing
- * the page number.
- *
- * PUBLIC: int __ham_c_chgpg
- * PUBLIC: __P((DBC *, db_pgno_t, u_int32_t, db_pgno_t, u_int32_t));
- */
-int
-__ham_c_chgpg(dbc, old_pgno, old_index, new_pgno, new_index)
- DBC *dbc;
- db_pgno_t old_pgno, new_pgno;
- u_int32_t old_index, new_index;
-{
- DB *dbp, *ldbp;
- DB_ENV *dbenv;
- DB_LSN lsn;
- DB_TXN *my_txn;
- DBC *cp;
- HASH_CURSOR *hcp;
- int found, ret;
-
- dbp = dbc->dbp;
- dbenv = dbp->dbenv;
-
- my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL;
- found = 0;
-
- MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
- for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
- ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
- ldbp = LIST_NEXT(ldbp, dblistlinks)) {
- MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
- for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
- cp = TAILQ_NEXT(cp, links)) {
- if (cp == dbc || cp->dbtype != DB_HASH)
- continue;
-
- hcp = (HASH_CURSOR *)cp->internal;
- if (hcp->pgno == old_pgno) {
- if (old_index == NDX_INVALID) {
- hcp->pgno = new_pgno;
- } else if (hcp->indx == old_index) {
- hcp->pgno = new_pgno;
- hcp->indx = new_index;
- } else
- continue;
- if (my_txn != NULL && cp->txn != my_txn)
- found = 1;
- }
- }
- MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
- }
- MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
-
- if (found != 0 && DB_LOGGING(dbc)) {
- if ((ret = __ham_chgpg_log(dbenv,
- my_txn, &lsn, 0, dbp->log_fileid, DB_HAM_CHGPG,
- old_pgno, new_pgno, old_index, new_index)) != 0)
+ dbenv = dbc->dbp->dbenv;
+ if (LOCK_ISSET(tmp_lock) &&
+ (ret = dbenv->lock_put(dbenv, &tmp_lock)) != 0)
return (ret);
}
return (0);
diff --git a/bdb/hash/hash.src b/bdb/hash/hash.src
index e6ecd11c907..b4b633c56e6 100644
--- a/bdb/hash/hash.src
+++ b/bdb/hash/hash.src
@@ -1,8 +1,10 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
+ *
+ * $Id: hash.src,v 10.38 2002/04/17 19:03:10 krinsky Exp $
*/
/*
* Copyright (c) 1995, 1996
@@ -38,44 +40,10 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * $Id: hash.src,v 10.24 2000/12/12 17:41:48 bostic Exp $
- */
-
-/*
- * This is the source file used to create the logging functions for the
- * hash package. Each access method (or set of routines wishing to register
- * record types with the transaction system) should have a file like this.
- * Each type of log record and its parameters is defined. The basic
- * format of a record definition is:
- *
- * BEGIN <RECORD_TYPE>
- * ARG|STRING|POINTER <variable name> <variable type> <printf format>
- * ...
- * END
- * ARG the argument is a simple parameter of the type * specified.
- * DBT the argument is a DBT (db.h) containing a length and pointer.
- * PTR the argument is a pointer to the data type specified; the entire
- * type should be logged.
- *
- * There are a set of shell scripts of the form xxx.sh that generate c
- * code and or h files to process these. (This is probably better done
- * in a single PERL script, but for now, this works.)
- *
- * The DB recovery system requires the following three fields appear in
- * every record, and will assign them to the per-record-type structures
- * as well as making them the first parameters to the appropriate logging
- * call.
- * rectype: record-type, identifies the structure and log/read call
- * txnid: transaction id, a DBT in this implementation
- * prev: the last LSN for this transaction
*/
-/*
- * Use the argument of PREFIX as the prefix for all record types,
- * routines, id numbers, etc.
- */
-PREFIX ham
+PREFIX __ham
+DBPRIVATE
INCLUDE #include "db_config.h"
INCLUDE
@@ -83,16 +51,18 @@ INCLUDE #ifndef NO_SYSTEM_INCLUDES
INCLUDE #include <sys/types.h>
INCLUDE
INCLUDE #include <ctype.h>
-INCLUDE #include <errno.h>
INCLUDE #include <string.h>
INCLUDE #endif
INCLUDE
INCLUDE #include "db_int.h"
-INCLUDE #include "db_page.h"
-INCLUDE #include "db_dispatch.h"
-INCLUDE #include "db_am.h"
-INCLUDE #include "hash.h"
-INCLUDE #include "txn.h"
+INCLUDE #include "dbinc/crypto.h"
+INCLUDE #include "dbinc/db_page.h"
+INCLUDE #include "dbinc/db_dispatch.h"
+INCLUDE #include "dbinc/db_am.h"
+INCLUDE #include "dbinc/hash.h"
+INCLUDE #include "dbinc/rep.h"
+INCLUDE #include "dbinc/log.h"
+INCLUDE #include "dbinc/txn.h"
INCLUDE
/*
@@ -109,8 +79,8 @@ INCLUDE
*/
BEGIN insdel 21
ARG opcode u_int32_t lu
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
+DB fileid int32_t ld
+WRLOCK pgno db_pgno_t lu
ARG ndx u_int32_t lu
POINTER pagelsn DB_LSN * lu
DBT key DBT s
@@ -129,46 +99,26 @@ END
*/
BEGIN newpage 22
ARG opcode u_int32_t lu
-ARG fileid int32_t ld
-ARG prev_pgno db_pgno_t lu
+DB fileid int32_t ld
+WRLOCKNZ prev_pgno db_pgno_t lu
POINTER prevlsn DB_LSN * lu
-ARG new_pgno db_pgno_t lu
+WRLOCKNZ new_pgno db_pgno_t lu
POINTER pagelsn DB_LSN * lu
-ARG next_pgno db_pgno_t lu
+WRLOCKNZ next_pgno db_pgno_t lu
POINTER nextlsn DB_LSN * lu
END
/*
- * DEPRECATED in 3.0.
- * Superceded by metagroup which allocates a group of new pages.
- *
- * Splitting requires two types of log messages. The first logs the
- * meta-data of the split.
- *
- * For the meta-data split
- * bucket: max_bucket in table before split
- * ovflpoint: overflow point before split.
- * spares: spares[ovflpoint] before split.
- */
-DEPRECATED splitmeta 23
-ARG fileid int32_t ld
-ARG bucket u_int32_t lu
-ARG ovflpoint u_int32_t lu
-ARG spares u_int32_t lu
-POINTER metalsn DB_LSN * lu
-END
-
-/*
* Splitting requires two types of log messages. The second logs the
* data on the original page. To redo the split, we have to visit the
* new page (pages) and add the items back on the page if they are not
* yet there.
*/
BEGIN splitdata 24
-ARG fileid int32_t ld
+DB fileid int32_t ld
ARG opcode u_int32_t lu
-ARG pgno db_pgno_t lu
-DBT pageimage DBT s
+WRLOCK pgno db_pgno_t lu
+PGDBT pageimage DBT s
POINTER pagelsn DB_LSN * lu
END
@@ -185,8 +135,8 @@ END
* makedup - this was a replacement that made an item a duplicate.
*/
BEGIN replace 25
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
+DB fileid int32_t ld
+WRLOCK pgno db_pgno_t lu
ARG ndx u_int32_t lu
POINTER pagelsn DB_LSN * lu
ARG off int32_t ld
@@ -196,52 +146,6 @@ ARG makedup u_int32_t lu
END
/*
- * DEPRECATED in 3.0.
- * Hash now uses the btree allocation and deletion page routines.
- *
- * HASH-newpgno: is used to record getting/deleting a new page number.
- * This doesn't require much data modification, just modifying the
- * meta-data.
- * pgno is the page being allocated/freed.
- * free_pgno is the next_pgno on the free list.
- * old_type was the type of a page being deallocated.
- * old_pgno was the next page number before the deallocation.
- */
-DEPRECATED newpgno 26
-ARG opcode u_int32_t lu
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
-ARG free_pgno db_pgno_t lu
-ARG old_type u_int32_t lu
-ARG old_pgno db_pgno_t lu
-ARG new_type u_int32_t lu
-POINTER pagelsn DB_LSN * lu
-POINTER metalsn DB_LSN * lu
-END
-
-/*
- * DEPRECATED in 3.0.
- * Since we now pre-allocate the contiguous chunk of pages for a doubling,
- * there is no big benefit to pre-allocating a few extra pages. It used
- * to be that the file was only physically as large as the current bucket,
- * so if you were on a doubling of 16K, but were only on the first bucket
- * of that 16K, the file was much shorter than it would be at the end of
- * the doubling, so we didn't want to force overflow pages at the end of the
- * 16K pages. Since we now must allocate the 16K pages (because of sub
- * databases), it's not a big deal to tack extra pages on at the end.
- *
- * ovfl: initialize a set of overflow pages.
- */
-DEPRECATED ovfl 27
-ARG fileid int32_t ld
-ARG start_pgno db_pgno_t lu
-ARG npages u_int32_t lu
-ARG free_pgno db_pgno_t lu
-ARG ovflpoint u_int32_t lu
-POINTER metalsn DB_LSN * lu
-END
-
-/*
* Used when we empty the first page in a bucket and there are pages after
* it. The page after it gets copied into the bucket page (since bucket
* pages have to be in fixed locations).
@@ -252,33 +156,46 @@ END
* nnextlsn: the LSN of nnext_pgno.
*/
BEGIN copypage 28
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
+DB fileid int32_t ld
+WRLOCK pgno db_pgno_t lu
POINTER pagelsn DB_LSN * lu
-ARG next_pgno db_pgno_t lu
+WRLOCK next_pgno db_pgno_t lu
POINTER nextlsn DB_LSN * lu
-ARG nnext_pgno db_pgno_t lu
+WRLOCKNZ nnext_pgno db_pgno_t lu
POINTER nnextlsn DB_LSN * lu
-DBT page DBT s
+PGDBT page DBT s
END
/*
- * This replaces the old splitmeta operation. It behaves largely the same
- * way, but it has enough information so that we can record a group allocation
- * which we do now because of sub databases. The number of pages allocated is
- * always bucket + 1 pgno is the page number of the first newly allocated
- * bucket.
+ * This record logs the meta-data aspects of a split operation. It has enough
+ * information so that we can record both an individual page allocation as well
+ * as a group allocation which we do because in sub databases, the pages in
+ * a hash doubling, must be contiguous. If we do a group allocation, the
+ * number of pages allocated is bucket + 1, pgno is the page number of the
+ * first newly allocated bucket.
+ *
* bucket: Old maximum bucket number.
- * pgno: Page allocated to bucket + 1 (first newly allocated page)
+ * mmpgno: Master meta-data page number (0 if same as mpgno).
+ * mmetalsn: Lsn of the master meta-data page.
+ * mpgno: Meta-data page number.
* metalsn: Lsn of the meta-data page.
- * pagelsn: Lsn of the maximum page allocated.
+ * pgno: Page allocated to bucket + 1 (first newly allocated page)
+ * pagelsn: Lsn of either the first page allocated (if newalloc == 0) or
+ * the last page allocated (if newalloc == 1).
+ * newalloc: 1 indicates that this record did the actual allocation;
+ * 0 indicates that the pages were already allocated from a
+ * previous (failed) allocation.
*/
BEGIN metagroup 29
-ARG fileid int32_t ld
+DB fileid int32_t ld
ARG bucket u_int32_t lu
-ARG pgno db_pgno_t lu
+WRLOCK mmpgno db_pgno_t lu
+POINTER mmetalsn DB_LSN * lu
+WRLOCKNZ mpgno db_pgno_t lu
POINTER metalsn DB_LSN * lu
+WRLOCK pgno db_pgno_t lu
POINTER pagelsn DB_LSN * lu
+ARG newalloc u_int32_t lu
END
/*
@@ -293,28 +210,10 @@ END
* start_pgno: starting page number
* num: number of allocated pages
*/
-DEPRECATED groupalloc1 30
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
-POINTER metalsn DB_LSN * lu
-POINTER mmetalsn DB_LSN * lu
-ARG start_pgno db_pgno_t lu
-ARG num u_int32_t lu
-END
-
-DEPRECATED groupalloc2 31
-ARG fileid int32_t ld
-POINTER meta_lsn DB_LSN * lu
-POINTER alloc_lsn DB_LSN * lu
-ARG start_pgno db_pgno_t lu
-ARG num u_int32_t lu
-ARG free db_pgno_t lu
-END
-
BEGIN groupalloc 32
-ARG fileid int32_t ld
+DB fileid int32_t ld
POINTER meta_lsn DB_LSN * lu
-ARG start_pgno db_pgno_t lu
+WRLOCK start_pgno db_pgno_t lu
ARG num u_int32_t lu
ARG free db_pgno_t lu
END
@@ -329,7 +228,7 @@ END
* dup_off - if a dup its offset
* add - 1 if add 0 if delete
* is_dup - 1 if dup 0 otherwise.
- * order - order assinged to this deleted record or dup.
+ * order - order assigned to this deleted record or dup.
*
* chgpg - rmoved a page, move the records to a new page
* mode - CHGPG page was deleted or records move to new page.
@@ -338,9 +237,15 @@ END
* old_pgno, new_pgno - old and new page numbers.
* old_index, new_index - old and new index numbers, NDX_INVALID if
* it effects all records on the page.
+ * For three opcodes new in 3.3 (DB_HAM_DELFIRSTPG, DELMIDPG,
+ * and DELLASTPG), we overload old_indx and new_indx to avoid
+ * needing a new log record type: old_indx stores the only
+ * indx of interest to these records, and new_indx stores the
+ * order that's assigned to the lowest deleted record we're
+ * moving.
*/
BEGIN curadj 33
-ARG fileid int32_t ld
+DB fileid int32_t ld
ARG pgno db_pgno_t lu
ARG indx u_int32_t lu
ARG len u_int32_t lu
@@ -351,7 +256,7 @@ ARG order u_int32_t lu
END
BEGIN chgpg 34
-ARG fileid int32_t ld
+DB fileid int32_t ld
ARG mode db_ham_mode ld
ARG old_pgno db_pgno_t lu
ARG new_pgno db_pgno_t lu
diff --git a/bdb/hash/hash_conv.c b/bdb/hash/hash_conv.c
index 30d17a6164d..a93e56a2ee4 100644
--- a/bdb/hash/hash_conv.c
+++ b/bdb/hash/hash_conv.c
@@ -1,13 +1,13 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: hash_conv.c,v 11.5 2000/03/31 00:30:32 ubell Exp $";
+static const char revid[] = "$Id: hash_conv.c,v 11.13 2002/08/06 05:34:35 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -15,20 +15,21 @@ static const char revid[] = "$Id: hash_conv.c,v 11.5 2000/03/31 00:30:32 ubell E
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_swap.h"
-#include "hash.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_swap.h"
+#include "dbinc/hash.h"
/*
* __ham_pgin --
* Convert host-specific page layout from the host-independent format
* stored on disk.
*
- * PUBLIC: int __ham_pgin __P((DB_ENV *, db_pgno_t, void *, DBT *));
+ * PUBLIC: int __ham_pgin __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *));
*/
int
-__ham_pgin(dbenv, pg, pp, cookie)
+__ham_pgin(dbenv, dummydbp, pg, pp, cookie)
DB_ENV *dbenv;
+ DB *dummydbp;
db_pgno_t pg;
void *pp;
DBT *cookie;
@@ -45,16 +46,16 @@ __ham_pgin(dbenv, pg, pp, cookie)
* initialize the rest of the page and return.
*/
if (h->type != P_HASHMETA && h->pgno == PGNO_INVALID) {
- P_INIT(pp, pginfo->db_pagesize,
+ P_INIT(pp, (db_indx_t)pginfo->db_pagesize,
pg, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
return (0);
}
- if (!pginfo->needswap)
+ if (!F_ISSET(pginfo, DB_AM_SWAP))
return (0);
return (h->type == P_HASHMETA ? __ham_mswap(pp) :
- __db_byteswap(dbenv, pg, pp, pginfo->db_pagesize, 1));
+ __db_byteswap(dbenv, dummydbp, pg, pp, pginfo->db_pagesize, 1));
}
/*
@@ -62,11 +63,12 @@ __ham_pgin(dbenv, pg, pp, cookie)
* Convert host-specific page layout to the host-independent format
* stored on disk.
*
- * PUBLIC: int __ham_pgout __P((DB_ENV *, db_pgno_t, void *, DBT *));
+ * PUBLIC: int __ham_pgout __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *));
*/
int
-__ham_pgout(dbenv, pg, pp, cookie)
+__ham_pgout(dbenv, dummydbp, pg, pp, cookie)
DB_ENV *dbenv;
+ DB *dummydbp;
db_pgno_t pg;
void *pp;
DBT *cookie;
@@ -75,12 +77,12 @@ __ham_pgout(dbenv, pg, pp, cookie)
PAGE *h;
pginfo = (DB_PGINFO *)cookie->data;
- if (!pginfo->needswap)
+ if (!F_ISSET(pginfo, DB_AM_SWAP))
return (0);
h = pp;
return (h->type == P_HASHMETA ? __ham_mswap(pp) :
- __db_byteswap(dbenv, pg, pp, pginfo->db_pagesize, 0));
+ __db_byteswap(dbenv, dummydbp, pg, pp, pginfo->db_pagesize, 0));
}
/*
@@ -108,5 +110,7 @@ __ham_mswap(pg)
SWAP32(p); /* h_charkey */
for (i = 0; i < NCACHED; ++i)
SWAP32(p); /* spares */
+ p += 59 * sizeof(u_int32_t); /* unusued */
+ SWAP32(p); /* crypto_magic */
return (0);
}
diff --git a/bdb/hash/hash_dup.c b/bdb/hash/hash_dup.c
index f5fbf4f472f..ec70e519d54 100644
--- a/bdb/hash/hash_dup.c
+++ b/bdb/hash/hash_dup.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
/*
@@ -38,20 +38,14 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: hash_dup.c,v 11.49 2000/12/21 21:54:35 margo Exp $";
+static const char revid[] = "$Id: hash_dup.c,v 11.76 2002/08/06 05:34:40 bostic Exp $";
#endif /* not lint */
/*
* PACKAGE: hashing
*
* DESCRIPTION:
- * Manipulation of duplicates for the hash package.
- *
- * ROUTINES:
- *
- * External
- * __add_dup
- * Internal
+ * Manipulation of duplicates for the hash package.
*/
#ifndef NO_SYSTEM_INCLUDES
@@ -61,13 +55,15 @@ static const char revid[] = "$Id: hash_dup.c,v 11.49 2000/12/21 21:54:35 margo E
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "hash.h"
-#include "btree.h"
-#include "txn.h"
+#include "dbinc/db_page.h"
+#include "dbinc/hash.h"
+#include "dbinc/btree.h"
+static int __ham_c_chgpg __P((DBC *,
+ db_pgno_t, u_int32_t, db_pgno_t, u_int32_t));
static int __ham_check_move __P((DBC *, u_int32_t));
static int __ham_dcursor __P((DBC *, db_pgno_t, u_int32_t));
+static int __ham_move_offpage __P((DBC *, PAGE *, u_int32_t, db_pgno_t));
/*
* Called from hash_access to add a duplicate key. nval is the new
@@ -92,13 +88,15 @@ __ham_add_dup(dbc, nval, flags, pgnop)
db_pgno_t *pgnop;
{
DB *dbp;
- HASH_CURSOR *hcp;
DBT pval, tmp_val;
+ DB_MPOOLFILE *mpf;
+ HASH_CURSOR *hcp;
u_int32_t add_bytes, new_size;
int cmp, ret;
u_int8_t *hk;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
hcp = (HASH_CURSOR *)dbc->internal;
DB_ASSERT(flags != DB_CURRENT);
@@ -117,12 +115,12 @@ __ham_add_dup(dbc, nval, flags, pgnop)
* hcp->dndx is the first free ndx or the index of the
* current pointer into the duplicate set.
*/
- hk = H_PAIRDATA(hcp->page, hcp->indx);
+ hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
/* Add the len bytes to the current singleton. */
if (HPAGE_PTYPE(hk) != H_DUPLICATE)
add_bytes += DUP_SIZE(0);
new_size =
- LEN_HKEYDATA(hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx)) +
+ LEN_HKEYDATA(dbp, hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx)) +
add_bytes;
/*
@@ -132,7 +130,7 @@ __ham_add_dup(dbc, nval, flags, pgnop)
*/
if (HPAGE_PTYPE(hk) != H_OFFDUP &&
(HPAGE_PTYPE(hk) == H_OFFPAGE || ISBIG(hcp, new_size) ||
- add_bytes > P_FREESPACE(hcp->page))) {
+ add_bytes > P_FREESPACE(dbp, hcp->page))) {
if ((ret = __ham_dup_convert(dbc)) != 0)
return (ret);
@@ -145,14 +143,14 @@ __ham_add_dup(dbc, nval, flags, pgnop)
if (HPAGE_PTYPE(hk) != H_DUPLICATE) {
pval.flags = 0;
pval.data = HKEYDATA_DATA(hk);
- pval.size = LEN_HDATA(hcp->page, dbp->pgsize,
+ pval.size = LEN_HDATA(dbp, hcp->page, dbp->pgsize,
hcp->indx);
if ((ret = __ham_make_dup(dbp->dbenv,
- &pval, &tmp_val, &dbc->rdata.data,
- &dbc->rdata.ulen)) != 0 || (ret =
+ &pval, &tmp_val, &dbc->my_rdata.data,
+ &dbc->my_rdata.ulen)) != 0 || (ret =
__ham_replpair(dbc, &tmp_val, 1)) != 0)
return (ret);
- hk = H_PAIRDATA(hcp->page, hcp->indx);
+ hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
HPAGE_PTYPE(hk) = H_DUPLICATE;
/*
@@ -167,7 +165,7 @@ __ham_add_dup(dbc, nval, flags, pgnop)
/* Now make the new entry a duplicate. */
if ((ret = __ham_make_dup(dbp->dbenv, nval,
- &tmp_val, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
+ &tmp_val, &dbc->my_rdata.data, &dbc->my_rdata.ulen)) != 0)
return (ret);
tmp_val.dlen = 0;
@@ -176,13 +174,14 @@ __ham_add_dup(dbc, nval, flags, pgnop)
case DB_KEYLAST:
case DB_NODUPDATA:
if (dbp->dup_compare != NULL) {
- __ham_dsearch(dbc, nval, &tmp_val.doff, &cmp);
+ __ham_dsearch(dbc,
+ nval, &tmp_val.doff, &cmp, flags);
/* dup dups are not supported w/ sorted dups */
if (cmp == 0)
return (__db_duperr(dbp, flags));
} else {
- hcp->dup_tlen = LEN_HDATA(hcp->page,
+ hcp->dup_tlen = LEN_HDATA(dbp, hcp->page,
dbp->pgsize, hcp->indx);
hcp->dup_len = nval->size;
F_SET(hcp, H_ISDUP);
@@ -203,8 +202,7 @@ __ham_add_dup(dbc, nval, flags, pgnop)
/* Add the duplicate. */
ret = __ham_replpair(dbc, &tmp_val, 0);
if (ret == 0)
- ret = memp_fset(dbp->mpf, hcp->page, DB_MPOOL_DIRTY);
-
+ ret = mpf->set(mpf, hcp->page, DB_MPOOL_DIRTY);
if (ret != 0)
return (ret);
@@ -213,12 +211,12 @@ __ham_add_dup(dbc, nval, flags, pgnop)
case DB_AFTER:
hcp->dup_off += DUP_SIZE(hcp->dup_len);
hcp->dup_len = nval->size;
- hcp->dup_tlen += DUP_SIZE(nval->size);
+ hcp->dup_tlen += (db_indx_t)DUP_SIZE(nval->size);
break;
case DB_KEYFIRST:
case DB_KEYLAST:
case DB_BEFORE:
- hcp->dup_tlen += DUP_SIZE(nval->size);
+ hcp->dup_tlen += (db_indx_t)DUP_SIZE(nval->size);
hcp->dup_len = nval->size;
break;
}
@@ -230,8 +228,8 @@ __ham_add_dup(dbc, nval, flags, pgnop)
* If we get here, then we're on duplicate pages; set pgnop and
* return so the common code can handle it.
*/
- memcpy(pgnop,
- HOFFDUP_PGNO(H_PAIRDATA(hcp->page, hcp->indx)), sizeof(db_pgno_t));
+ memcpy(pgnop, HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)),
+ sizeof(db_pgno_t));
return (ret);
}
@@ -245,19 +243,21 @@ int
__ham_dup_convert(dbc)
DBC *dbc;
{
+ BOVERFLOW bo;
DB *dbp;
DBC **hcs;
+ DBT dbt;
DB_LSN lsn;
- PAGE *dp;
+ DB_MPOOLFILE *mpf;
HASH_CURSOR *hcp;
- BOVERFLOW bo;
- DBT dbt;
HOFFPAGE ho;
+ PAGE *dp;
db_indx_t i, len, off;
int c, ret, t_ret;
u_int8_t *p, *pend;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
hcp = (HASH_CURSOR *)dbc->internal;
/*
@@ -274,24 +274,24 @@ __ham_dup_convert(dbc)
*/
if ((ret = __ham_get_clist(dbp,
PGNO(hcp->page), (u_int32_t)hcp->indx, &hcs)) != 0)
- return (ret);
+ goto err;
/*
* Now put the duplicates onto the new page.
*/
dbt.flags = 0;
- switch (HPAGE_PTYPE(H_PAIRDATA(hcp->page, hcp->indx))) {
+ switch (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))) {
case H_KEYDATA:
/* Simple case, one key on page; move it to dup page. */
- dbt.size = LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx);
- dbt.data = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx));
+ dbt.size = LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx);
+ dbt.data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx));
ret = __db_pitem(dbc,
dp, 0, BKEYDATA_SIZE(dbt.size), NULL, &dbt);
goto finish;
case H_OFFPAGE:
/* Simple case, one key on page; move it to dup page. */
- memcpy(&ho,
- P_ENTRY(hcp->page, H_DATAINDEX(hcp->indx)), HOFFPAGE_SIZE);
+ memcpy(&ho, P_ENTRY(dbp, hcp->page, H_DATAINDEX(hcp->indx)),
+ HOFFPAGE_SIZE);
UMRW_SET(bo.unused1);
B_TSET(bo.type, ho.type, 0);
UMRW_SET(bo.unused2);
@@ -301,17 +301,15 @@ __ham_dup_convert(dbc)
dbt.data = &bo;
ret = __db_pitem(dbc, dp, 0, dbt.size, &dbt, NULL);
-
finish: if (ret == 0) {
- memp_fset(dbp->mpf, dp, DB_MPOOL_DIRTY);
- /*
- * Update any other cursors
- */
- if (hcs != NULL && DB_LOGGING(dbc)
- && IS_SUBTRANSACTION(dbc->txn)) {
- if ((ret = __ham_chgpg_log(dbp->dbenv,
- dbc->txn, &lsn, 0, dbp->log_fileid,
- DB_HAM_DUP, PGNO(hcp->page),
+ if ((ret = mpf->set(mpf, dp, DB_MPOOL_DIRTY)) != 0)
+ break;
+
+ /* Update any other cursors. */
+ if (hcs != NULL && DBC_LOGGING(dbc) &&
+ IS_SUBTRANSACTION(dbc->txn)) {
+ if ((ret = __ham_chgpg_log(dbp, dbc->txn,
+ &lsn, 0, DB_HAM_DUP, PGNO(hcp->page),
PGNO(dp), hcp->indx, 0)) != 0)
break;
}
@@ -319,14 +317,12 @@ finish: if (ret == 0) {
if ((ret = __ham_dcursor(hcs[c],
PGNO(dp), 0)) != 0)
break;
-
}
break;
-
case H_DUPLICATE:
- p = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx));
+ p = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx));
pend = p +
- LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx);
+ LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx);
/*
* We need to maintain the duplicate cursor position.
@@ -344,39 +340,48 @@ finish: if (ret == 0) {
if ((ret = __db_pitem(dbc, dp,
i, BKEYDATA_SIZE(dbt.size), NULL, &dbt)) != 0)
break;
- /*
- * Update any other cursors
- */
+
+ /* Update any other cursors */
+ if (hcs != NULL && DBC_LOGGING(dbc) &&
+ IS_SUBTRANSACTION(dbc->txn)) {
+ if ((ret = __ham_chgpg_log(dbp, dbc->txn,
+ &lsn, 0, DB_HAM_DUP, PGNO(hcp->page),
+ PGNO(dp), hcp->indx, i)) != 0)
+ break;
+ }
for (c = 0; hcs != NULL && hcs[c] != NULL; c++)
if (((HASH_CURSOR *)(hcs[c]->internal))->dup_off
== off && (ret = __ham_dcursor(hcs[c],
PGNO(dp), i)) != 0)
- goto out;
+ goto err;
off += len + 2 * sizeof(db_indx_t);
}
-out: break;
-
+ break;
default:
- ret = __db_pgfmt(dbp, (u_long)hcp->pgno);
+ ret = __db_pgfmt(dbp->dbenv, (u_long)hcp->pgno);
break;
}
- if (ret == 0) {
- /*
- * Now attach this to the source page in place of
- * the old duplicate item.
- */
- __ham_move_offpage(dbc, hcp->page,
+
+ /*
+ * Now attach this to the source page in place of the old duplicate
+ * item.
+ */
+ if (ret == 0)
+ ret = __ham_move_offpage(dbc, hcp->page,
(u_int32_t)H_DATAINDEX(hcp->indx), PGNO(dp));
- ret = memp_fset(dbp->mpf, hcp->page, DB_MPOOL_DIRTY);
- if ((t_ret = memp_fput(dbp->mpf, dp, DB_MPOOL_DIRTY)) != 0)
- ret = t_ret;
+err: if (ret == 0)
+ ret = mpf->set(mpf, hcp->page, DB_MPOOL_DIRTY);
+
+ if ((t_ret =
+ mpf->put(mpf, dp, ret == 0 ? DB_MPOOL_DIRTY : 0)) != 0 && ret == 0)
+ ret = t_ret;
+
+ if (ret == 0)
hcp->dup_tlen = hcp->dup_off = hcp->dup_len = 0;
- } else
- (void)__db_free(dbc, dp);
if (hcs != NULL)
- __os_free(hcs, 0);
+ __os_free(dbp->dbenv, hcs);
return (ret);
}
@@ -444,9 +449,10 @@ __ham_check_move(dbc, add_len)
u_int32_t add_len;
{
DB *dbp;
- HASH_CURSOR *hcp;
DBT k, d;
DB_LSN new_lsn;
+ DB_MPOOLFILE *mpf;
+ HASH_CURSOR *hcp;
PAGE *next_pagep;
db_pgno_t next_pgno;
u_int32_t new_datalen, old_len, rectype;
@@ -454,9 +460,10 @@ __ham_check_move(dbc, add_len)
int ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
hcp = (HASH_CURSOR *)dbc->internal;
- hk = H_PAIRDATA(hcp->page, hcp->indx);
+ hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
/*
* If the item is already off page duplicates or an offpage item,
@@ -465,7 +472,7 @@ __ham_check_move(dbc, add_len)
if (HPAGE_PTYPE(hk) == H_OFFDUP || HPAGE_PTYPE(hk) == H_OFFPAGE)
return (0);
- old_len = LEN_HITEM(hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx));
+ old_len = LEN_HITEM(dbp, hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx));
new_datalen = old_len - HKEYDATA_SIZE(0) + add_len;
if (HPAGE_PTYPE(hk) != H_DUPLICATE)
new_datalen += DUP_SIZE(0);
@@ -479,10 +486,10 @@ __ham_check_move(dbc, add_len)
* If neither of these is true, then we can return.
*/
if (ISBIG(hcp, new_datalen) && (old_len > HOFFDUP_SIZE ||
- HOFFDUP_SIZE - old_len <= P_FREESPACE(hcp->page)))
+ HOFFDUP_SIZE - old_len <= P_FREESPACE(dbp, hcp->page)))
return (0);
- if (!ISBIG(hcp, new_datalen) && add_len <= P_FREESPACE(hcp->page))
+ if (!ISBIG(hcp, new_datalen) && add_len <= P_FREESPACE(dbp, hcp->page))
return (0);
/*
@@ -494,20 +501,20 @@ __ham_check_move(dbc, add_len)
new_datalen = ISBIG(hcp, new_datalen) ?
HOFFDUP_SIZE : HKEYDATA_SIZE(new_datalen);
- new_datalen += LEN_HITEM(hcp->page, dbp->pgsize, H_KEYINDEX(hcp->indx));
+ new_datalen += LEN_HITEM(dbp, hcp->page, dbp->pgsize, H_KEYINDEX(hcp->indx));
next_pagep = NULL;
for (next_pgno = NEXT_PGNO(hcp->page); next_pgno != PGNO_INVALID;
next_pgno = NEXT_PGNO(next_pagep)) {
if (next_pagep != NULL &&
- (ret = memp_fput(dbp->mpf, next_pagep, 0)) != 0)
+ (ret = mpf->put(mpf, next_pagep, 0)) != 0)
return (ret);
- if ((ret = memp_fget(dbp->mpf,
+ if ((ret = mpf->get(mpf,
&next_pgno, DB_MPOOL_CREATE, &next_pagep)) != 0)
return (ret);
- if (P_FREESPACE(next_pagep) >= new_datalen)
+ if (P_FREESPACE(dbp, next_pagep) >= new_datalen)
break;
}
@@ -517,58 +524,58 @@ __ham_check_move(dbc, add_len)
return (ret);
/* Add new page at the end of the chain. */
- if (P_FREESPACE(next_pagep) < new_datalen && (ret =
+ if (P_FREESPACE(dbp, next_pagep) < new_datalen && (ret =
__ham_add_ovflpage(dbc, next_pagep, 1, &next_pagep)) != 0) {
- (void)memp_fput(dbp->mpf, next_pagep, 0);
+ (void)mpf->put(mpf, next_pagep, 0);
return (ret);
}
/* Copy the item to the new page. */
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
rectype = PUTPAIR;
k.flags = 0;
d.flags = 0;
if (HPAGE_PTYPE(
- H_PAIRKEY(hcp->page, hcp->indx)) == H_OFFPAGE) {
+ H_PAIRKEY(dbp, hcp->page, hcp->indx)) == H_OFFPAGE) {
rectype |= PAIR_KEYMASK;
- k.data = H_PAIRKEY(hcp->page, hcp->indx);
+ k.data = H_PAIRKEY(dbp, hcp->page, hcp->indx);
k.size = HOFFPAGE_SIZE;
} else {
k.data =
- HKEYDATA_DATA(H_PAIRKEY(hcp->page, hcp->indx));
- k.size = LEN_HKEY(hcp->page, dbp->pgsize, hcp->indx);
+ HKEYDATA_DATA(H_PAIRKEY(dbp, hcp->page, hcp->indx));
+ k.size =
+ LEN_HKEY(dbp, hcp->page, dbp->pgsize, hcp->indx);
}
if (HPAGE_PTYPE(hk) == H_OFFPAGE) {
rectype |= PAIR_DATAMASK;
- d.data = H_PAIRDATA(hcp->page, hcp->indx);
+ d.data = H_PAIRDATA(dbp, hcp->page, hcp->indx);
d.size = HOFFPAGE_SIZE;
} else {
- if (HPAGE_PTYPE(H_PAIRDATA(hcp->page, hcp->indx))
+ if (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))
== H_DUPLICATE)
rectype |= PAIR_DUPMASK;
d.data =
- HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx));
- d.size = LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx);
+ HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx));
+ d.size = LEN_HDATA(dbp, hcp->page,
+ dbp->pgsize, hcp->indx);
}
- if ((ret = __ham_insdel_log(dbp->dbenv,
- dbc->txn, &new_lsn, 0, rectype,
- dbp->log_fileid, PGNO(next_pagep),
+ if ((ret = __ham_insdel_log(dbp,
+ dbc->txn, &new_lsn, 0, rectype, PGNO(next_pagep),
(u_int32_t)NUM_ENT(next_pagep), &LSN(next_pagep),
&k, &d)) != 0) {
- (void)memp_fput(dbp->mpf, next_pagep, 0);
+ (void)mpf->put(mpf, next_pagep, 0);
return (ret);
}
+ } else
+ LSN_NOT_LOGGED(new_lsn);
- /* Move lsn onto page. */
- LSN(next_pagep) = new_lsn; /* Structure assignment. */
- }
+ /* Move lsn onto page. */
+ LSN(next_pagep) = new_lsn; /* Structure assignment. */
- __ham_copy_item(dbp->pgsize,
- hcp->page, H_KEYINDEX(hcp->indx), next_pagep);
- __ham_copy_item(dbp->pgsize,
- hcp->page, H_DATAINDEX(hcp->indx), next_pagep);
+ __ham_copy_item(dbp, hcp->page, H_KEYINDEX(hcp->indx), next_pagep);
+ __ham_copy_item(dbp, hcp->page, H_DATAINDEX(hcp->indx), next_pagep);
/*
* We've just manually inserted a key and set of data onto
@@ -581,7 +588,7 @@ __ham_check_move(dbc, add_len)
* Note that __ham_del_pair should dirty the page we're moving
* the items from, so we need only dirty the new page ourselves.
*/
- if ((ret = memp_fset(dbp->mpf, next_pagep, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->set(mpf, next_pagep, DB_MPOOL_DIRTY)) != 0)
goto out;
/* Update all cursors that used to point to this item. */
@@ -596,12 +603,17 @@ __ham_check_move(dbc, add_len)
* __ham_del_pair decremented nelem. This is incorrect; we
* manually copied the element elsewhere, so the total number
* of elements hasn't changed. Increment it again.
+ *
+ * !!!
+ * Note that we still have the metadata page pinned, and
+ * __ham_del_pair dirtied it, so we don't need to set the dirty
+ * flag again.
*/
if (!STD_LOCKING(dbc))
hcp->hdr->nelem++;
out:
- (void)memp_fput(dbp->mpf, hcp->page, DB_MPOOL_DIRTY);
+ (void)mpf->put(mpf, hcp->page, DB_MPOOL_DIRTY);
hcp->page = next_pagep;
hcp->pgno = PGNO(hcp->page);
hcp->indx = NUM_ENT(hcp->page) - 2;
@@ -620,9 +632,8 @@ out:
* This is really just a special case of __onpage_replace; we should
* probably combine them.
*
- * PUBLIC: void __ham_move_offpage __P((DBC *, PAGE *, u_int32_t, db_pgno_t));
*/
-void
+static int
__ham_move_offpage(dbc, pagep, ndx, pgno)
DBC *dbc;
PAGE *pagep;
@@ -630,48 +641,51 @@ __ham_move_offpage(dbc, pagep, ndx, pgno)
db_pgno_t pgno;
{
DB *dbp;
- HASH_CURSOR *hcp;
DBT new_dbt;
DBT old_dbt;
HOFFDUP od;
- db_indx_t i;
+ db_indx_t i, *inp;
int32_t shrink;
u_int8_t *src;
+ int ret;
dbp = dbc->dbp;
- hcp = (HASH_CURSOR *)dbc->internal;
od.type = H_OFFDUP;
UMRW_SET(od.unused[0]);
UMRW_SET(od.unused[1]);
UMRW_SET(od.unused[2]);
od.pgno = pgno;
+ ret = 0;
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
new_dbt.data = &od;
new_dbt.size = HOFFDUP_SIZE;
- old_dbt.data = P_ENTRY(pagep, ndx);
- old_dbt.size = LEN_HITEM(pagep, dbp->pgsize, ndx);
- (void)__ham_replace_log(dbp->dbenv,
- dbc->txn, &LSN(pagep), 0, dbp->log_fileid,
+ old_dbt.data = P_ENTRY(dbp, pagep, ndx);
+ old_dbt.size = LEN_HITEM(dbp, pagep, dbp->pgsize, ndx);
+ if ((ret = __ham_replace_log(dbp, dbc->txn, &LSN(pagep), 0,
PGNO(pagep), (u_int32_t)ndx, &LSN(pagep), -1,
- &old_dbt, &new_dbt, 0);
- }
+ &old_dbt, &new_dbt, 0)) != 0)
+ return (ret);
+ } else
+ LSN_NOT_LOGGED(LSN(pagep));
- shrink = LEN_HITEM(pagep, dbp->pgsize, ndx) - HOFFDUP_SIZE;
+ shrink = LEN_HITEM(dbp, pagep, dbp->pgsize, ndx) - HOFFDUP_SIZE;
+ inp = P_INP(dbp, pagep);
if (shrink != 0) {
/* Copy data. */
src = (u_int8_t *)(pagep) + HOFFSET(pagep);
- memmove(src + shrink, src, pagep->inp[ndx] - HOFFSET(pagep));
+ memmove(src + shrink, src, inp[ndx] - HOFFSET(pagep));
HOFFSET(pagep) += shrink;
/* Update index table. */
for (i = ndx; i < NUM_ENT(pagep); i++)
- pagep->inp[i] += shrink;
+ inp[i] += shrink;
}
/* Now copy the offdup entry onto the page. */
- memcpy(P_ENTRY(pagep, ndx), &od, HOFFDUP_SIZE);
+ memcpy(P_ENTRY(dbp, pagep, ndx), &od, HOFFDUP_SIZE);
+ return (ret);
}
/*
@@ -679,13 +693,14 @@ __ham_move_offpage(dbc, pagep, ndx, pgno)
* Locate a particular duplicate in a duplicate set. Make sure that
* we exit with the cursor set appropriately.
*
- * PUBLIC: void __ham_dsearch __P((DBC *, DBT *, u_int32_t *, int *));
+ * PUBLIC: void __ham_dsearch
+ * PUBLIC: __P((DBC *, DBT *, u_int32_t *, int *, u_int32_t));
*/
void
-__ham_dsearch(dbc, dbt, offp, cmpp)
+__ham_dsearch(dbc, dbt, offp, cmpp, flags)
DBC *dbc;
DBT *dbt;
- u_int32_t *offp;
+ u_int32_t *offp, flags;
int *cmpp;
{
DB *dbp;
@@ -697,25 +712,36 @@ __ham_dsearch(dbc, dbt, offp, cmpp)
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
- if (dbp->dup_compare == NULL)
- func = __bam_defcmp;
- else
- func = dbp->dup_compare;
+ func = dbp->dup_compare == NULL ? __bam_defcmp : dbp->dup_compare;
i = F_ISSET(hcp, H_CONTINUE) ? hcp->dup_off: 0;
- data = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx)) + i;
- hcp->dup_tlen = LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx);
+ data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)) + i;
+ hcp->dup_tlen = LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx);
while (i < hcp->dup_tlen) {
memcpy(&len, data, sizeof(db_indx_t));
data += sizeof(db_indx_t);
cur.data = data;
cur.size = (u_int32_t)len;
+
+ /*
+ * If we find an exact match, we're done. If in a sorted
+ * duplicate set and the item is larger than our test item,
+ * we're done. In the latter case, if permitting partial
+ * matches, it's not a failure.
+ */
*cmpp = func(dbp, dbt, &cur);
- if (*cmpp == 0 || (*cmpp < 0 && dbp->dup_compare != NULL))
+ if (*cmpp == 0)
+ break;
+ if (*cmpp < 0 && dbp->dup_compare != NULL) {
+ if (flags == DB_GET_BOTH_RANGE)
+ *cmpp = 0;
break;
+ }
+
i += len + 2 * sizeof(db_indx_t);
data += len + sizeof(db_indx_t);
}
+
*offp = i;
hcp->dup_off = i;
hcp->dup_len = len;
@@ -727,29 +753,22 @@ __ham_dsearch(dbc, dbt, offp, cmpp)
* __ham_cprint --
* Display the current cursor list.
*
- * PUBLIC: int __ham_cprint __P((DB *));
+ * PUBLIC: void __ham_cprint __P((DBC *));
*/
-int
-__ham_cprint(dbp)
- DB *dbp;
+void
+__ham_cprint(dbc)
+ DBC *dbc;
{
HASH_CURSOR *cp;
- DBC *dbc;
- MUTEX_THREAD_LOCK(dbp->dbenv, dbp->mutexp);
- for (dbc = TAILQ_FIRST(&dbp->active_queue);
- dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
- cp = (HASH_CURSOR *)dbc->internal;
- fprintf(stderr, "%#0lx->%#0lx: page: %lu index: %lu",
- P_TO_ULONG(dbc), P_TO_ULONG(cp), (u_long)cp->pgno,
- (u_long)cp->indx);
- if (F_ISSET(cp, H_DELETED))
- fprintf(stderr, " (deleted)");
- fprintf(stderr, "\n");
- }
- MUTEX_THREAD_UNLOCK(dbp->dbenv, dbp->mutexp);
+ cp = (HASH_CURSOR *)dbc->internal;
- return (0);
+ fprintf(stderr, "%#0lx->%#0lx: page: %lu index: %lu",
+ P_TO_ULONG(dbc), P_TO_ULONG(cp), (u_long)cp->pgno,
+ (u_long)cp->indx);
+ if (F_ISSET(cp, H_DELETED))
+ fprintf(stderr, " (deleted)");
+ fprintf(stderr, "\n");
}
#endif /* DEBUG */
@@ -765,17 +784,17 @@ __ham_dcursor(dbc, pgno, indx)
u_int32_t indx;
{
DB *dbp;
- DBC *dbc_nopd;
HASH_CURSOR *hcp;
BTREE_CURSOR *dcp;
int ret;
dbp = dbc->dbp;
+ hcp = (HASH_CURSOR *)dbc->internal;
- if ((ret = __db_c_newopd(dbc, pgno, &dbc_nopd)) != 0)
+ if ((ret = __db_c_newopd(dbc, pgno, hcp->opd, &hcp->opd)) != 0)
return (ret);
- dcp = (BTREE_CURSOR *)dbc_nopd->internal;
+ dcp = (BTREE_CURSOR *)hcp->opd->internal;
dcp->pgno = pgno;
dcp->indx = indx;
@@ -792,14 +811,81 @@ __ham_dcursor(dbc, pgno, indx)
* Transfer the deleted flag from the top-level cursor to the
* created one.
*/
- hcp = (HASH_CURSOR *)dbc->internal;
if (F_ISSET(hcp, H_DELETED)) {
F_SET(dcp, C_DELETED);
F_CLR(hcp, H_DELETED);
}
- /* Stack the cursors and reset the initial cursor's index. */
- hcp->opd = dbc_nopd;
+ return (0);
+}
+
+/*
+ * __ham_c_chgpg --
+ * Adjust the cursors after moving an item to a new page. We only
+ * move cursors that are pointing at this one item and are not
+ * deleted; since we only touch non-deleted cursors, and since
+ * (by definition) no item existed at the pgno/indx we're moving the
+ * item to, we're guaranteed that all the cursors we affect here or
+ * on abort really do refer to this one item.
+ */
+static int
+__ham_c_chgpg(dbc, old_pgno, old_index, new_pgno, new_index)
+ DBC *dbc;
+ db_pgno_t old_pgno, new_pgno;
+ u_int32_t old_index, new_index;
+{
+ DB *dbp, *ldbp;
+ DB_ENV *dbenv;
+ DB_LSN lsn;
+ DB_TXN *my_txn;
+ DBC *cp;
+ HASH_CURSOR *hcp;
+ int found, ret;
+
+ dbp = dbc->dbp;
+ dbenv = dbp->dbenv;
+
+ my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL;
+ found = 0;
+
+ MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+ for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+ ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+ ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+ MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+ for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
+ cp = TAILQ_NEXT(cp, links)) {
+ if (cp == dbc || cp->dbtype != DB_HASH)
+ continue;
+
+ hcp = (HASH_CURSOR *)cp->internal;
+ /*
+ * If a cursor is deleted, it doesn't refer to this
+ * item--it just happens to have the same indx, but
+ * it points to a former neighbor. Don't move it.
+ */
+ if (F_ISSET(hcp, H_DELETED))
+ continue;
+
+ if (hcp->pgno == old_pgno) {
+ if (hcp->indx == old_index) {
+ hcp->pgno = new_pgno;
+ hcp->indx = new_index;
+ } else
+ continue;
+ if (my_txn != NULL && cp->txn != my_txn)
+ found = 1;
+ }
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+ if (found != 0 && DBC_LOGGING(dbc)) {
+ if ((ret = __ham_chgpg_log(dbp, my_txn, &lsn, 0, DB_HAM_CHGPG,
+ old_pgno, new_pgno, old_index, new_index)) != 0)
+ return (ret);
+ }
return (0);
}
diff --git a/bdb/hash/hash_func.c b/bdb/hash/hash_func.c
index 22b4f08ee70..c6cc2ad4460 100644
--- a/bdb/hash/hash_func.c
+++ b/bdb/hash/hash_func.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,7 +43,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: hash_func.c,v 11.7 2000/08/16 18:26:19 ubell Exp $";
+static const char revid[] = "$Id: hash_func.c,v 11.12 2002/03/28 19:49:42 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -51,8 +51,6 @@ static const char revid[] = "$Id: hash_func.c,v 11.7 2000/08/16 18:26:19 ubell E
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "hash.h"
/*
* __ham_func2 --
@@ -230,6 +228,11 @@ __ham_func5(dbp, key, len)
return (h);
}
+/*
+ * __ham_test --
+ *
+ * PUBLIC: u_int32_t __ham_test __P((DB *, const void *, u_int32_t));
+ */
u_int32_t
__ham_test(dbp, key, len)
DB *dbp;
diff --git a/bdb/hash/hash_meta.c b/bdb/hash/hash_meta.c
index d96a6db3207..9f224454869 100644
--- a/bdb/hash/hash_meta.c
+++ b/bdb/hash/hash_meta.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1999, 2000
+ * Copyright (c) 1999-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: hash_meta.c,v 11.10 2000/12/21 21:54:35 margo Exp $";
+static const char revid[] = "$Id: hash_meta.c,v 11.19 2002/06/03 14:22:15 ubell Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -16,11 +16,10 @@ static const char revid[] = "$Id: hash_meta.c,v 11.10 2000/12/21 21:54:35 margo
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "hash.h"
-#include "db_shash.h"
-#include "lock.h"
-#include "txn.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/hash.h"
+#include "dbinc/lock.h"
/*
* Acquire the meta-data page.
@@ -31,30 +30,32 @@ int
__ham_get_meta(dbc)
DBC *dbc;
{
- HASH_CURSOR *hcp;
- HASH *hashp;
DB *dbp;
+ DB_ENV *dbenv;
+ DB_MPOOLFILE *mpf;
+ HASH *hashp;
+ HASH_CURSOR *hcp;
int ret;
- hcp = (HASH_CURSOR *)dbc->internal;
dbp = dbc->dbp;
+ dbenv = dbp->dbenv;
+ mpf = dbp->mpf;
hashp = dbp->h_internal;
+ hcp = (HASH_CURSOR *)dbc->internal;
- if (dbp->dbenv != NULL &&
- STD_LOCKING(dbc) && !F_ISSET(dbc, DBC_RECOVER)) {
+ if (dbenv != NULL &&
+ STD_LOCKING(dbc) && !F_ISSET(dbc, DBC_RECOVER | DBC_COMPENSATE)) {
dbc->lock.pgno = hashp->meta_pgno;
- if ((ret = lock_get(dbp->dbenv, dbc->locker,
+ if ((ret = dbenv->lock_get(dbenv, dbc->locker,
DB_NONBLOCK(dbc) ? DB_LOCK_NOWAIT : 0,
&dbc->lock_dbt, DB_LOCK_READ, &hcp->hlock)) != 0)
return (ret);
}
- if ((ret = memp_fget(dbc->dbp->mpf,
+ if ((ret = mpf->get(mpf,
&hashp->meta_pgno, DB_MPOOL_CREATE, &(hcp->hdr))) != 0 &&
- hcp->hlock.off != LOCK_INVALID) {
- (void)lock_put(dbc->dbp->dbenv, &hcp->hlock);
- hcp->hlock.off = LOCK_INVALID;
- }
+ LOCK_ISSET(hcp->hlock))
+ (void)dbenv->lock_put(dbenv, &hcp->hlock);
return (ret);
}
@@ -68,18 +69,19 @@ int
__ham_release_meta(dbc)
DBC *dbc;
{
+ DB_MPOOLFILE *mpf;
HASH_CURSOR *hcp;
+ mpf = dbc->dbp->mpf;
hcp = (HASH_CURSOR *)dbc->internal;
if (hcp->hdr)
- (void)memp_fput(dbc->dbp->mpf, hcp->hdr,
+ (void)mpf->put(mpf, hcp->hdr,
F_ISSET(hcp, H_DIRTY) ? DB_MPOOL_DIRTY : 0);
hcp->hdr = NULL;
- if (!F_ISSET(dbc, DBC_RECOVER) &&
- dbc->txn == NULL && hcp->hlock.off != LOCK_INVALID)
- (void)lock_put(dbc->dbp->dbenv, &hcp->hlock);
- hcp->hlock.off = LOCK_INVALID;
+ if (!F_ISSET(dbc, DBC_RECOVER | DBC_COMPENSATE) &&
+ dbc->txn == NULL && LOCK_ISSET(hcp->hlock))
+ (void)dbc->dbp->dbenv->lock_put(dbc->dbp->dbenv, &hcp->hlock);
F_CLR(hcp, H_DIRTY);
return (0);
@@ -95,6 +97,7 @@ __ham_dirty_meta(dbc)
DBC *dbc;
{
DB *dbp;
+ DB_ENV *dbenv;
DB_LOCK _tmp;
HASH *hashp;
HASH_CURSOR *hcp;
@@ -105,12 +108,13 @@ __ham_dirty_meta(dbc)
hcp = (HASH_CURSOR *)dbc->internal;
ret = 0;
- if (STD_LOCKING(dbc) && !F_ISSET(dbc, DBC_RECOVER)) {
+ if (STD_LOCKING(dbc) && !F_ISSET(dbc, DBC_RECOVER | DBC_COMPENSATE)) {
+ dbenv = dbp->dbenv;
dbc->lock.pgno = hashp->meta_pgno;
- if ((ret = lock_get(dbp->dbenv, dbc->locker,
+ if ((ret = dbenv->lock_get(dbenv, dbc->locker,
DB_NONBLOCK(dbc) ? DB_LOCK_NOWAIT : 0,
&dbc->lock_dbt, DB_LOCK_WRITE, &_tmp)) == 0) {
- ret = lock_put(dbp->dbenv, &hcp->hlock);
+ ret = dbenv->lock_put(dbenv, &hcp->hlock);
hcp->hlock = _tmp;
}
}
diff --git a/bdb/hash/hash_method.c b/bdb/hash/hash_method.c
index f8239993dc5..9a6bf59536a 100644
--- a/bdb/hash/hash_method.c
+++ b/bdb/hash/hash_method.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1999, 2000
+ * Copyright (c) 1999-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: hash_method.c,v 11.7 2000/07/04 18:28:23 bostic Exp $";
+static const char revid[] = "$Id: hash_method.c,v 11.12 2002/03/27 04:32:12 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -16,8 +16,8 @@ static const char revid[] = "$Id: hash_method.c,v 11.7 2000/07/04 18:28:23 bosti
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "hash.h"
+#include "dbinc/db_page.h"
+#include "dbinc/hash.h"
static int __ham_set_h_ffactor __P((DB *, u_int32_t));
static int __ham_set_h_hash
@@ -38,7 +38,7 @@ __ham_db_create(dbp)
int ret;
if ((ret = __os_malloc(dbp->dbenv,
- sizeof(HASH), NULL, &dbp->h_internal)) != 0)
+ sizeof(HASH), &dbp->h_internal)) != 0)
return (ret);
hashp = dbp->h_internal;
@@ -63,7 +63,7 @@ __ham_db_close(dbp)
{
if (dbp->h_internal == NULL)
return (0);
- __os_free(dbp->h_internal, sizeof(HASH));
+ __os_free(dbp->dbenv, dbp->h_internal);
dbp->h_internal = NULL;
return (0);
}
diff --git a/bdb/hash/hash_open.c b/bdb/hash/hash_open.c
new file mode 100644
index 00000000000..041a1df1e7b
--- /dev/null
+++ b/bdb/hash/hash_open.c
@@ -0,0 +1,558 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996-2002
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ * Margo Seltzer. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: hash_open.c,v 11.175 2002/09/04 19:06:44 margo Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "dbinc/crypto.h"
+#include "dbinc/db_page.h"
+#include "dbinc/hash.h"
+#include "dbinc/log.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/lock.h"
+#include "dbinc/db_swap.h"
+#include "dbinc/btree.h"
+#include "dbinc/fop.h"
+
+static db_pgno_t __ham_init_meta __P((DB *, HMETA *, db_pgno_t, DB_LSN *));
+
+/*
+ * __ham_open --
+ *
+ * PUBLIC: int __ham_open __P((DB *,
+ * PUBLIC: DB_TXN *, const char * name, db_pgno_t, u_int32_t));
+ */
+int
+__ham_open(dbp, txn, name, base_pgno, flags)
+ DB *dbp;
+ DB_TXN *txn;
+ const char *name;
+ db_pgno_t base_pgno;
+ u_int32_t flags;
+{
+ DB_ENV *dbenv;
+ DBC *dbc;
+ DB_MPOOLFILE *mpf;
+ HASH_CURSOR *hcp;
+ HASH *hashp;
+ int ret, t_ret;
+
+ COMPQUIET(name, NULL);
+ dbenv = dbp->dbenv;
+ dbc = NULL;
+ mpf = dbp->mpf;
+
+ /* Initialize the remaining fields/methods of the DB. */
+ dbp->stat = __ham_stat;
+
+ /*
+ * Get a cursor. If DB_CREATE is specified, we may be creating
+ * pages, and to do that safely in CDB we need a write cursor.
+ * In STD_LOCKING mode, we'll synchronize using the meta page
+ * lock instead.
+ */
+ if ((ret = dbp->cursor(dbp,
+ txn, &dbc, LF_ISSET(DB_CREATE) && CDB_LOCKING(dbenv) ?
+ DB_WRITECURSOR : 0)) != 0)
+ return (ret);
+
+ hcp = (HASH_CURSOR *)dbc->internal;
+ hashp = dbp->h_internal;
+ hashp->meta_pgno = base_pgno;
+ if ((ret = __ham_get_meta(dbc)) != 0)
+ goto err1;
+
+ /* Initialize the hdr structure. */
+ if (hcp->hdr->dbmeta.magic == DB_HASHMAGIC) {
+ /* File exists, verify the data in the header. */
+ if (hashp->h_hash == NULL)
+ hashp->h_hash = hcp->hdr->dbmeta.version < 5
+ ? __ham_func4 : __ham_func5;
+ if (!F_ISSET(dbp, DB_AM_RDONLY) && !IS_RECOVERING(dbenv) &&
+ hashp->h_hash(dbp,
+ CHARKEY, sizeof(CHARKEY)) != hcp->hdr->h_charkey) {
+ __db_err(dbp->dbenv,
+ "hash: incompatible hash function");
+ ret = EINVAL;
+ goto err2;
+ }
+ if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUP))
+ F_SET(dbp, DB_AM_DUP);
+ if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUPSORT))
+ F_SET(dbp, DB_AM_DUPSORT);
+ if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_SUBDB))
+ F_SET(dbp, DB_AM_SUBDB);
+
+ /* We must initialize last_pgno, it could be stale. */
+ if (!F_ISSET(dbp, DB_AM_RDONLY) &&
+ dbp->meta_pgno == PGNO_BASE_MD) {
+ if ((ret = __ham_dirty_meta(dbc)) != 0)
+ goto err2;
+ mpf->last_pgno(mpf, &hcp->hdr->dbmeta.last_pgno);
+ }
+ } else if (!IS_RECOVERING(dbenv) && !F_ISSET(dbp, DB_AM_RECOVER))
+ DB_ASSERT(0);
+
+err2: /* Release the meta data page */
+ if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+err1: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+
+ return (ret);
+}
+
+/*
+ * __ham_metachk --
+ *
+ * PUBLIC: int __ham_metachk __P((DB *, const char *, HMETA *));
+ */
+int
+__ham_metachk(dbp, name, hashm)
+ DB *dbp;
+ const char *name;
+ HMETA *hashm;
+{
+ DB_ENV *dbenv;
+ u_int32_t vers;
+ int ret;
+
+ dbenv = dbp->dbenv;
+
+ /*
+ * At this point, all we know is that the magic number is for a Hash.
+ * Check the version, the database may be out of date.
+ */
+ vers = hashm->dbmeta.version;
+ if (F_ISSET(dbp, DB_AM_SWAP))
+ M_32_SWAP(vers);
+ switch (vers) {
+ case 4:
+ case 5:
+ case 6:
+ __db_err(dbenv,
+ "%s: hash version %lu requires a version upgrade",
+ name, (u_long)vers);
+ return (DB_OLD_VERSION);
+ case 7:
+ case 8:
+ break;
+ default:
+ __db_err(dbenv,
+ "%s: unsupported hash version: %lu", name, (u_long)vers);
+ return (EINVAL);
+ }
+
+ /* Swap the page if we need to. */
+ if (F_ISSET(dbp, DB_AM_SWAP) && (ret = __ham_mswap((PAGE *)hashm)) != 0)
+ return (ret);
+
+ /* Check the type. */
+ if (dbp->type != DB_HASH && dbp->type != DB_UNKNOWN)
+ return (EINVAL);
+ dbp->type = DB_HASH;
+ DB_ILLEGAL_METHOD(dbp, DB_OK_HASH);
+
+ /*
+ * Check application info against metadata info, and set info, flags,
+ * and type based on metadata info.
+ */
+ if ((ret = __db_fchk(dbenv,
+ "DB->open", hashm->dbmeta.flags,
+ DB_HASH_DUP | DB_HASH_SUBDB | DB_HASH_DUPSORT)) != 0)
+ return (ret);
+
+ if (F_ISSET(&hashm->dbmeta, DB_HASH_DUP))
+ F_SET(dbp, DB_AM_DUP);
+ else
+ if (F_ISSET(dbp, DB_AM_DUP)) {
+ __db_err(dbenv,
+ "%s: DB_DUP specified to open method but not set in database",
+ name);
+ return (EINVAL);
+ }
+
+ if (F_ISSET(&hashm->dbmeta, DB_HASH_SUBDB))
+ F_SET(dbp, DB_AM_SUBDB);
+ else
+ if (F_ISSET(dbp, DB_AM_SUBDB)) {
+ __db_err(dbenv,
+ "%s: multiple databases specified but not supported in file",
+ name);
+ return (EINVAL);
+ }
+
+ if (F_ISSET(&hashm->dbmeta, DB_HASH_DUPSORT)) {
+ if (dbp->dup_compare == NULL)
+ dbp->dup_compare = __bam_defcmp;
+ } else
+ if (dbp->dup_compare != NULL) {
+ __db_err(dbenv,
+ "%s: duplicate sort function specified but not set in database",
+ name);
+ return (EINVAL);
+ }
+
+ /* Set the page size. */
+ dbp->pgsize = hashm->dbmeta.pagesize;
+
+ /* Copy the file's ID. */
+ memcpy(dbp->fileid, hashm->dbmeta.uid, DB_FILE_ID_LEN);
+
+ return (0);
+}
+
+/*
+ * __ham_init_meta --
+ *
+ * Initialize a hash meta-data page. We assume that the meta-data page is
+ * contiguous with the initial buckets that we create. If that turns out
+ * to be false, we'll fix it up later. Return the initial number of buckets
+ * allocated.
+ */
+static db_pgno_t
+__ham_init_meta(dbp, meta, pgno, lsnp)
+ DB *dbp;
+ HMETA *meta;
+ db_pgno_t pgno;
+ DB_LSN *lsnp;
+{
+ HASH *hashp;
+ db_pgno_t nbuckets;
+ int i;
+ int32_t l2;
+
+ hashp = dbp->h_internal;
+ if (hashp->h_hash == NULL)
+ hashp->h_hash = DB_HASHVERSION < 5 ? __ham_func4 : __ham_func5;
+
+ if (hashp->h_nelem != 0 && hashp->h_ffactor != 0) {
+ hashp->h_nelem = (hashp->h_nelem - 1) / hashp->h_ffactor + 1;
+ l2 = __db_log2(hashp->h_nelem > 2 ? hashp->h_nelem : 2);
+ } else
+ l2 = 1;
+ nbuckets = (db_pgno_t)(1 << l2);
+
+ memset(meta, 0, sizeof(HMETA));
+ meta->dbmeta.lsn = *lsnp;
+ meta->dbmeta.pgno = pgno;
+ meta->dbmeta.magic = DB_HASHMAGIC;
+ meta->dbmeta.version = DB_HASHVERSION;
+ meta->dbmeta.pagesize = dbp->pgsize;
+ if (F_ISSET(dbp, DB_AM_CHKSUM))
+ FLD_SET(meta->dbmeta.metaflags, DBMETA_CHKSUM);
+ if (F_ISSET(dbp, DB_AM_ENCRYPT)) {
+ meta->dbmeta.encrypt_alg =
+ ((DB_CIPHER *)dbp->dbenv->crypto_handle)->alg;
+ DB_ASSERT(meta->dbmeta.encrypt_alg != 0);
+ meta->crypto_magic = meta->dbmeta.magic;
+ }
+ meta->dbmeta.type = P_HASHMETA;
+ meta->dbmeta.free = PGNO_INVALID;
+ meta->dbmeta.last_pgno = pgno;
+ meta->max_bucket = nbuckets - 1;
+ meta->high_mask = nbuckets - 1;
+ meta->low_mask = (nbuckets >> 1) - 1;
+ meta->ffactor = hashp->h_ffactor;
+ meta->h_charkey = hashp->h_hash(dbp, CHARKEY, sizeof(CHARKEY));
+ memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN);
+
+ if (F_ISSET(dbp, DB_AM_DUP))
+ F_SET(&meta->dbmeta, DB_HASH_DUP);
+ if (F_ISSET(dbp, DB_AM_SUBDB))
+ F_SET(&meta->dbmeta, DB_HASH_SUBDB);
+ if (dbp->dup_compare != NULL)
+ F_SET(&meta->dbmeta, DB_HASH_DUPSORT);
+
+ /*
+ * Create the first and second buckets pages so that we have the
+ * page numbers for them and we can store that page number in the
+ * meta-data header (spares[0]).
+ */
+ meta->spares[0] = pgno + 1;
+
+ /* Fill in the last fields of the meta data page. */
+ for (i = 1; i <= l2; i++)
+ meta->spares[i] = meta->spares[0];
+ for (; i < NCACHED; i++)
+ meta->spares[i] = PGNO_INVALID;
+
+ return (nbuckets);
+}
+
+/*
+ * __ham_new_file --
+ * Create the necessary pages to begin a new database file. If name
+ * is NULL, then this is an unnamed file, the mpf has been set in the dbp
+ * and we simply create the pages using mpool. In this case, we don't log
+ * because we never have to redo an unnamed create and the undo simply
+ * frees resources.
+ *
+ * This code appears more complex than it is because of the two cases (named
+ * and unnamed). The way to read the code is that for each page being created,
+ * there are three parts: 1) a "get page" chunk (which either uses malloc'd
+ * memory or calls mpf->get), 2) the initialization, and 3) the "put page"
+ * chunk which either does a fop write or an mpf->put.
+ *
+ * PUBLIC: int __ham_new_file __P((DB *, DB_TXN *, DB_FH *, const char *));
+ */
+int
+__ham_new_file(dbp, txn, fhp, name)
+ DB *dbp;
+ DB_TXN *txn;
+ DB_FH *fhp;
+ const char *name;
+{
+ DB_ENV *dbenv;
+ DB_LSN lsn;
+ DB_MPOOLFILE *mpf;
+ DB_PGINFO pginfo;
+ DBT pdbt;
+ HMETA *meta;
+ PAGE *page;
+ int ret;
+ db_pgno_t lpgno;
+ void *buf;
+
+ dbenv = dbp->dbenv;
+ mpf = dbp->mpf;
+ meta = NULL;
+ page = NULL;
+ memset(&pdbt, 0, sizeof(pdbt));
+
+ /* Build meta-data page. */
+ if (name == NULL) {
+ lpgno = PGNO_BASE_MD;
+ ret = mpf->get(mpf, &lpgno, DB_MPOOL_CREATE, &meta);
+ } else {
+ pginfo.db_pagesize = dbp->pgsize;
+ pginfo.type = dbp->type;
+ pginfo.flags =
+ F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP));
+ pdbt.data = &pginfo;
+ pdbt.size = sizeof(pginfo);
+ ret = __os_calloc(dbp->dbenv, 1, dbp->pgsize, &buf);
+ meta = (HMETA *)buf;
+ }
+ if (ret != 0)
+ return (ret);
+
+ LSN_NOT_LOGGED(lsn);
+ lpgno = __ham_init_meta(dbp, meta, PGNO_BASE_MD, &lsn);
+ meta->dbmeta.last_pgno = lpgno;
+
+ if (name == NULL)
+ ret = mpf->put(mpf, meta, DB_MPOOL_DIRTY);
+ else {
+ if ((ret = __db_pgout(dbenv, PGNO_BASE_MD, meta, &pdbt)) != 0)
+ goto err;
+ ret = __fop_write(dbenv, txn, name,
+ DB_APP_DATA, fhp, 0, buf, dbp->pgsize, 1);
+ }
+ if (ret != 0)
+ goto err;
+ meta = NULL;
+
+ /* Now allocate the final hash bucket. */
+ if (name == NULL) {
+ if ((ret = mpf->get(mpf, &lpgno, DB_MPOOL_CREATE, &page)) != 0)
+ goto err;
+ } else {
+#ifdef DIAGNOSTIC
+ memset(buf, dbp->pgsize, 0);
+#endif
+ page = (PAGE *)buf;
+ }
+
+ P_INIT(page, dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+ LSN_NOT_LOGGED(page->lsn);
+
+ if (name == NULL)
+ ret = mpf->put(mpf, page, DB_MPOOL_DIRTY);
+ else {
+ if ((ret = __db_pgout(dbenv, lpgno, buf, &pdbt)) != 0)
+ goto err;
+ ret = __fop_write(dbenv, txn, name,
+ DB_APP_DATA, fhp, lpgno * dbp->pgsize, buf, dbp->pgsize, 1);
+ }
+ if (ret != 0)
+ goto err;
+ page = NULL;
+
+err: if (name != NULL)
+ __os_free(dbenv, buf);
+ else {
+ if (meta != NULL)
+ (void)mpf->put(mpf, meta, 0);
+ if (page != NULL)
+ (void)mpf->put(mpf, page, 0);
+ }
+ return (ret);
+}
+
+/*
+ * __ham_new_subdb --
+ * Create the necessary pages to begin a new subdatabase.
+ *
+ * PUBLIC: int __ham_new_subdb __P((DB *, DB *, DB_TXN *));
+ */
+int
+__ham_new_subdb(mdbp, dbp, txn)
+ DB *mdbp, *dbp;
+ DB_TXN *txn;
+{
+ DBC *dbc;
+ DB_ENV *dbenv;
+ DB_LOCK metalock, mmlock;
+ DB_LSN lsn;
+ DB_MPOOLFILE *mpf;
+ DBMETA *mmeta;
+ HMETA *meta;
+ PAGE *h;
+ int i, ret, t_ret;
+ db_pgno_t lpgno, mpgno;
+
+ dbenv = mdbp->dbenv;
+ mpf = mdbp->mpf;
+ dbc = NULL;
+ meta = NULL;
+ mmeta = NULL;
+ LOCK_INIT(metalock);
+ LOCK_INIT(mmlock);
+
+ if ((ret = mdbp->cursor(mdbp, txn,
+ &dbc, CDB_LOCKING(dbenv) ? DB_WRITECURSOR : 0)) != 0)
+ return (ret);
+
+ /* Get and lock the new meta data page. */
+ if ((ret = __db_lget(dbc,
+ 0, dbp->meta_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
+ goto err;
+ if ((ret = mpf->get(mpf, &dbp->meta_pgno, DB_MPOOL_CREATE, &meta)) != 0)
+ goto err;
+
+ /* Initialize the new meta-data page. */
+ lsn = meta->dbmeta.lsn;
+ lpgno = __ham_init_meta(dbp, meta, dbp->meta_pgno, &lsn);
+
+ /*
+ * We are about to allocate a set of contiguous buckets (lpgno
+ * worth). We need to get the master meta-data page to figure
+ * out where these pages are and to allocate them. So, lock and
+ * get the master meta data page.
+ */
+ mpgno = PGNO_BASE_MD;
+ if ((ret = __db_lget(dbc, 0, mpgno, DB_LOCK_WRITE, 0, &mmlock)) != 0)
+ goto err;
+ if ((ret = mpf->get(mpf, &mpgno, 0, &mmeta)) != 0)
+ goto err;
+
+ /*
+ * Now update the hash meta-data page to reflect where the first
+ * set of buckets are actually located.
+ */
+ meta->spares[0] = mmeta->last_pgno + 1;
+ for (i = 0; i < NCACHED && meta->spares[i] != PGNO_INVALID; i++)
+ meta->spares[i] = meta->spares[0];
+
+ /* The new meta data page is now complete; log it. */
+ if ((ret = __db_log_page(mdbp,
+ txn, &meta->dbmeta.lsn, dbp->meta_pgno, (PAGE *)meta)) != 0)
+ goto err;
+
+ /* Reflect the group allocation. */
+ if (DBENV_LOGGING(dbenv))
+ if ((ret = __ham_groupalloc_log(mdbp, txn,
+ &LSN(mmeta), 0, &LSN(mmeta),
+ meta->spares[0], meta->max_bucket + 1, mmeta->free)) != 0)
+ goto err;
+
+ /* Release the new meta-data page. */
+ if ((ret = mpf->put(mpf, meta, DB_MPOOL_DIRTY)) != 0)
+ goto err;
+ meta = NULL;
+
+ mmeta->last_pgno +=lpgno;
+ lpgno = mmeta->last_pgno;
+
+ /* Now allocate the final hash bucket. */
+ if ((ret = mpf->get(mpf, &lpgno, DB_MPOOL_CREATE, &h)) != 0)
+ goto err;
+ P_INIT(h, dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+ LSN(h) = LSN(mmeta);
+ if ((ret = mpf->put(mpf, h, DB_MPOOL_DIRTY)) != 0)
+ goto err;
+
+ /* Now put the master-metadata page back. */
+ if ((ret = mpf->put(mpf, mmeta, DB_MPOOL_DIRTY)) != 0)
+ goto err;
+ mmeta = NULL;
+
+err:
+ if (mmeta != NULL)
+ if ((t_ret = mpf->put(mpf, mmeta, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ if (LOCK_ISSET(mmlock))
+ if ((t_ret = __LPUT(dbc, mmlock)) != 0 && ret == 0)
+ ret = t_ret;
+ if (meta != NULL)
+ if ((t_ret = mpf->put(mpf, meta, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ if (LOCK_ISSET(metalock))
+ if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0)
+ ret = t_ret;
+ if (dbc != NULL)
+ if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+ return (ret);
+}
diff --git a/bdb/hash/hash_page.c b/bdb/hash/hash_page.c
index 64f38853284..6788129773f 100644
--- a/bdb/hash/hash_page.c
+++ b/bdb/hash/hash_page.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,23 +43,14 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: hash_page.c,v 11.46 2001/01/11 18:19:51 bostic Exp $";
+static const char revid[] = "$Id: hash_page.c,v 11.87 2002/08/15 02:46:20 bostic Exp $";
#endif /* not lint */
/*
* PACKAGE: hashing
*
* DESCRIPTION:
- * Page manipulation for hashing package.
- *
- * ROUTINES:
- *
- * External
- * __get_page
- * __add_ovflpage
- * __overflow_page
- * Internal
- * open_temp
+ * Page manipulation for hashing package.
*/
#ifndef NO_SYSTEM_INCLUDES
@@ -69,11 +60,13 @@ static const char revid[] = "$Id: hash_page.c,v 11.46 2001/01/11 18:19:51 bostic
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "hash.h"
-#include "lock.h"
-#include "txn.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/hash.h"
+#include "dbinc/lock.h"
+
+static int __ham_c_delpg
+ __P((DBC *, db_pgno_t, db_pgno_t, u_int32_t, db_ham_mode, u_int32_t *));
/*
* PUBLIC: int __ham_item __P((DBC *, db_lockmode_t, db_pgno_t *));
@@ -104,15 +97,15 @@ __ham_item(dbc, mode, pgnop)
recheck:
/* Check if we are looking for space in which to insert an item. */
- if (hcp->seek_size && hcp->seek_found_page == PGNO_INVALID
- && hcp->seek_size < P_FREESPACE(hcp->page))
+ if (hcp->seek_size && hcp->seek_found_page == PGNO_INVALID &&
+ hcp->seek_size < P_FREESPACE(dbp, hcp->page))
hcp->seek_found_page = hcp->pgno;
/* Check for off-page duplicates. */
if (hcp->indx < NUM_ENT(hcp->page) &&
- HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) {
+ HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) {
memcpy(pgnop,
- HOFFDUP_PGNO(H_PAIRDATA(hcp->page, hcp->indx)),
+ HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)),
sizeof(db_pgno_t));
F_SET(hcp, H_OK);
return (0);
@@ -126,7 +119,7 @@ recheck:
* pointer to be the beginning of the datum.
*/
memcpy(&hcp->dup_len,
- HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx)) +
+ HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)) +
hcp->dup_off, sizeof(db_indx_t));
if (hcp->indx >= (db_indx_t)NUM_ENT(hcp->page)) {
@@ -153,15 +146,18 @@ int
__ham_item_reset(dbc)
DBC *dbc;
{
- HASH_CURSOR *hcp;
DB *dbp;
+ DB_MPOOLFILE *mpf;
+ HASH_CURSOR *hcp;
int ret;
- ret = 0;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
hcp = (HASH_CURSOR *)dbc->internal;
+
+ ret = 0;
if (hcp->page != NULL)
- ret = memp_fput(dbp->mpf, hcp->page, 0);
+ ret = mpf->put(mpf, hcp->page, 0);
__ham_item_init(dbc);
return (ret);
@@ -181,8 +177,7 @@ __ham_item_init(dbc)
* If this cursor still holds any locks, we must
* release them if we are not running with transactions.
*/
- if (hcp->lock.off != LOCK_INVALID && dbc->txn == NULL)
- (void)lock_put(dbc->dbp->dbenv, &hcp->lock);
+ (void)__TLPUT(dbc, hcp->lock);
/*
* The following fields must *not* be initialized here
@@ -191,7 +186,7 @@ __ham_item_init(dbc)
*/
hcp->bucket = BUCKET_INVALID;
hcp->lbucket = BUCKET_INVALID;
- hcp->lock.off = LOCK_INVALID;
+ LOCK_INIT(hcp->lock);
hcp->lock_mode = DB_LOCK_NG;
hcp->dup_off = 0;
hcp->dup_len = 0;
@@ -269,8 +264,9 @@ __ham_item_prev(dbc, mode, pgnop)
db_pgno_t next_pgno;
int ret;
- dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
+ dbp = dbc->dbp;
+
/*
* There are 5 cases for backing up in a hash file.
* Case 1: In the middle of a page, no duplicates, just dec the index.
@@ -291,9 +287,10 @@ __ham_item_prev(dbc, mode, pgnop)
* to handle backing up through keys.
*/
if (!F_ISSET(hcp, H_NEXT_NODUP) && F_ISSET(hcp, H_ISDUP)) {
- if (HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) {
+ if (HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) ==
+ H_OFFDUP) {
memcpy(pgnop,
- HOFFDUP_PGNO(H_PAIRDATA(hcp->page, hcp->indx)),
+ HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)),
sizeof(db_pgno_t));
F_SET(hcp, H_OK);
return (0);
@@ -302,7 +299,7 @@ __ham_item_prev(dbc, mode, pgnop)
/* Duplicates are on-page. */
if (hcp->dup_off != 0) {
memcpy(&hcp->dup_len, HKEYDATA_DATA(
- H_PAIRDATA(hcp->page, hcp->indx))
+ H_PAIRDATA(dbp, hcp->page, hcp->indx))
+ hcp->dup_off - sizeof(db_indx_t),
sizeof(db_indx_t));
hcp->dup_off -=
@@ -396,7 +393,7 @@ __ham_item_next(dbc, mode, pgnop)
if (F_ISSET(hcp, H_DELETED)) {
if (hcp->indx != NDX_INVALID &&
F_ISSET(hcp, H_ISDUP) &&
- HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx))
+ HPAGE_TYPE(dbc->dbp, hcp->page, H_DATAINDEX(hcp->indx))
== H_DUPLICATE && hcp->dup_tlen == hcp->dup_off) {
if (F_ISSET(hcp, H_DUPONLY)) {
F_CLR(hcp, H_OK);
@@ -447,7 +444,7 @@ __ham_item_next(dbc, mode, pgnop)
}
/*
- * PUBLIC: void __ham_putitem __P((PAGE *p, const DBT *, int));
+ * PUBLIC: void __ham_putitem __P((DB *, PAGE *p, const DBT *, int));
*
* This is a little bit sleazy in that we're overloading the meaning
* of the H_OFFPAGE type here. When we recover deletes, we have the
@@ -456,24 +453,27 @@ __ham_item_next(dbc, mode, pgnop)
* an H_KEYDATA around it.
*/
void
-__ham_putitem(p, dbt, type)
+__ham_putitem(dbp, p, dbt, type)
+ DB *dbp;
PAGE *p;
const DBT *dbt;
int type;
{
u_int16_t n, off;
+ db_indx_t *inp;
n = NUM_ENT(p);
+ inp = P_INP(dbp, p);
/* Put the item element on the page. */
if (type == H_OFFPAGE) {
off = HOFFSET(p) - dbt->size;
- HOFFSET(p) = p->inp[n] = off;
- memcpy(P_ENTRY(p, n), dbt->data, dbt->size);
+ HOFFSET(p) = inp[n] = off;
+ memcpy(P_ENTRY(dbp, p, n), dbt->data, dbt->size);
} else {
off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size);
- HOFFSET(p) = p->inp[n] = off;
- PUT_HKEYDATA(P_ENTRY(p, n), dbt->data, dbt->size, type);
+ HOFFSET(p) = inp[n] = off;
+ PUT_HKEYDATA(P_ENTRY(dbp, p, n), dbt->data, dbt->size, type);
}
/* Adjust page info. */
@@ -481,8 +481,8 @@ __ham_putitem(p, dbt, type)
}
/*
- * PUBLIC: void __ham_reputpair
- * PUBLIC: __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *));
+ * PUBLIC: void __ham_reputpair __P((DB *, PAGE *,
+ * PUBLIC: u_int32_t, const DBT *, const DBT *));
*
* This is a special case to restore a key/data pair to its original
* location during recovery. We are guaranteed that the pair fits
@@ -490,17 +490,21 @@ __ham_putitem(p, dbt, type)
* the last pair, the normal insert works).
*/
void
-__ham_reputpair(p, psize, ndx, key, data)
+__ham_reputpair(dbp, p, ndx, key, data)
+ DB *dbp;
PAGE *p;
- u_int32_t psize, ndx;
+ u_int32_t ndx;
const DBT *key, *data;
{
- db_indx_t i, movebytes, newbytes;
+ db_indx_t i, *inp, movebytes, newbytes;
+ size_t psize;
u_int8_t *from;
+ psize = dbp->pgsize;
+ inp = P_INP(dbp, p);
/* First shuffle the existing items up on the page. */
- movebytes =
- (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 2)]) - HOFFSET(p);
+ movebytes = (db_indx_t)(
+ (ndx == 0 ? psize : inp[H_DATAINDEX(ndx - 2)]) - HOFFSET(p));
newbytes = key->size + data->size;
from = (u_int8_t *)p + HOFFSET(p);
memmove(from - newbytes, from, movebytes);
@@ -511,17 +515,17 @@ __ham_reputpair(p, psize, ndx, key, data)
* we are dealing with index 0 (db_indx_t's are unsigned).
*/
for (i = NUM_ENT(p) - 1; ; i-- ) {
- p->inp[i + 2] = p->inp[i] - newbytes;
+ inp[i + 2] = inp[i] - newbytes;
if (i == H_KEYINDEX(ndx))
break;
}
/* Put the key and data on the page. */
- p->inp[H_KEYINDEX(ndx)] =
- (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 2)]) - key->size;
- p->inp[H_DATAINDEX(ndx)] = p->inp[H_KEYINDEX(ndx)] - data->size;
- memcpy(P_ENTRY(p, H_KEYINDEX(ndx)), key->data, key->size);
- memcpy(P_ENTRY(p, H_DATAINDEX(ndx)), data->data, data->size);
+ inp[H_KEYINDEX(ndx)] = (db_indx_t)(
+ (ndx == 0 ? psize : inp[H_DATAINDEX(ndx - 2)]) - key->size);
+ inp[H_DATAINDEX(ndx)] = inp[H_KEYINDEX(ndx)] - data->size;
+ memcpy(P_ENTRY(dbp, p, H_KEYINDEX(ndx)), key->data, key->size);
+ memcpy(P_ENTRY(dbp, p, H_DATAINDEX(ndx)), data->data, data->size);
/* Adjust page info. */
HOFFSET(p) -= newbytes;
@@ -537,25 +541,25 @@ __ham_del_pair(dbc, reclaim_page)
int reclaim_page;
{
DB *dbp;
- HASH_CURSOR *hcp;
DBT data_dbt, key_dbt;
- DB_ENV *dbenv;
DB_LSN new_lsn, *n_lsn, tmp_lsn;
+ DB_MPOOLFILE *mpf;
+ HASH_CURSOR *hcp;
PAGE *n_pagep, *nn_pagep, *p, *p_pagep;
+ db_ham_mode op;
db_indx_t ndx;
db_pgno_t chg_pgno, pgno, tmp_pgno;
int ret, t_ret;
+ u_int32_t order;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
hcp = (HASH_CURSOR *)dbc->internal;
-
- dbenv = dbp->dbenv;
- ndx = hcp->indx;
-
n_pagep = p_pagep = nn_pagep = NULL;
+ ndx = hcp->indx;
- if (hcp->page == NULL && (ret = memp_fget(dbp->mpf,
- &hcp->pgno, DB_MPOOL_CREATE, &hcp->page)) != 0)
+ if (hcp->page == NULL &&
+ (ret = mpf->get(mpf, &hcp->pgno, DB_MPOOL_CREATE, &hcp->page)) != 0)
return (ret);
p = hcp->page;
@@ -567,17 +571,17 @@ __ham_del_pair(dbc, reclaim_page)
* entry referring to the big item.
*/
ret = 0;
- if (HPAGE_PTYPE(H_PAIRKEY(p, ndx)) == H_OFFPAGE) {
- memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(p, H_KEYINDEX(ndx))),
+ if (HPAGE_PTYPE(H_PAIRKEY(dbp, p, ndx)) == H_OFFPAGE) {
+ memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_KEYINDEX(ndx))),
sizeof(db_pgno_t));
ret = __db_doff(dbc, pgno);
}
if (ret == 0)
- switch (HPAGE_PTYPE(H_PAIRDATA(p, ndx))) {
+ switch (HPAGE_PTYPE(H_PAIRDATA(dbp, p, ndx))) {
case H_OFFPAGE:
memcpy(&pgno,
- HOFFPAGE_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))),
+ HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_DATAINDEX(ndx))),
sizeof(db_pgno_t));
ret = __db_doff(dbc, pgno);
break;
@@ -596,21 +600,21 @@ __ham_del_pair(dbc, reclaim_page)
return (ret);
/* Now log the delete off this page. */
- if (DB_LOGGING(dbc)) {
- key_dbt.data = P_ENTRY(p, H_KEYINDEX(ndx));
- key_dbt.size = LEN_HITEM(p, dbp->pgsize, H_KEYINDEX(ndx));
- data_dbt.data = P_ENTRY(p, H_DATAINDEX(ndx));
- data_dbt.size = LEN_HITEM(p, dbp->pgsize, H_DATAINDEX(ndx));
-
- if ((ret = __ham_insdel_log(dbenv,
- dbc->txn, &new_lsn, 0, DELPAIR,
- dbp->log_fileid, PGNO(p), (u_int32_t)ndx,
+ if (DBC_LOGGING(dbc)) {
+ key_dbt.data = P_ENTRY(dbp, p, H_KEYINDEX(ndx));
+ key_dbt.size = LEN_HITEM(dbp, p, dbp->pgsize, H_KEYINDEX(ndx));
+ data_dbt.data = P_ENTRY(dbp, p, H_DATAINDEX(ndx));
+ data_dbt.size = LEN_HITEM(dbp, p, dbp->pgsize, H_DATAINDEX(ndx));
+
+ if ((ret = __ham_insdel_log(dbp,
+ dbc->txn, &new_lsn, 0, DELPAIR, PGNO(p), (u_int32_t)ndx,
&LSN(p), &key_dbt, &data_dbt)) != 0)
return (ret);
+ } else
+ LSN_NOT_LOGGED(new_lsn);
- /* Move lsn onto page. */
- LSN(p) = new_lsn;
- }
+ /* Move lsn onto page. */
+ LSN(p) = new_lsn;
/* Do the delete. */
__ham_dpair(dbp, p, ndx);
@@ -636,8 +640,11 @@ __ham_del_pair(dbc, reclaim_page)
* XXX
* Perhaps we can retain incremental numbers and apply them later.
*/
- if (!STD_LOCKING(dbc))
+ if (!STD_LOCKING(dbc)) {
--hcp->hdr->nelem;
+ if ((ret = __ham_dirty_meta(dbc)) != 0)
+ return (ret);
+ }
/*
* If we need to reclaim the page, then check if the page is empty.
@@ -650,43 +657,43 @@ __ham_del_pair(dbc, reclaim_page)
if (!reclaim_page ||
NUM_ENT(p) != 0 ||
(PREV_PGNO(p) == PGNO_INVALID && NEXT_PGNO(p) == PGNO_INVALID))
- return (memp_fset(dbp->mpf, p, DB_MPOOL_DIRTY));
+ return (mpf->set(mpf, p, DB_MPOOL_DIRTY));
if (PREV_PGNO(p) == PGNO_INVALID) {
/*
* First page in chain is empty and we know that there
* are more pages in the chain.
*/
- if ((ret =
- memp_fget(dbp->mpf, &NEXT_PGNO(p), 0, &n_pagep)) != 0)
+ if ((ret = mpf->get(mpf, &NEXT_PGNO(p), 0, &n_pagep)) != 0)
return (ret);
- if (NEXT_PGNO(n_pagep) != PGNO_INVALID &&
- (ret = memp_fget(dbp->mpf, &NEXT_PGNO(n_pagep), 0,
- &nn_pagep)) != 0)
+ if (NEXT_PGNO(n_pagep) != PGNO_INVALID && (ret =
+ mpf->get(mpf, &NEXT_PGNO(n_pagep), 0, &nn_pagep)) != 0)
goto err;
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
key_dbt.data = n_pagep;
key_dbt.size = dbp->pgsize;
- if ((ret = __ham_copypage_log(dbenv,
- dbc->txn, &new_lsn, 0, dbp->log_fileid, PGNO(p),
+ if ((ret = __ham_copypage_log(dbp,
+ dbc->txn, &new_lsn, 0, PGNO(p),
&LSN(p), PGNO(n_pagep), &LSN(n_pagep),
NEXT_PGNO(n_pagep),
nn_pagep == NULL ? NULL : &LSN(nn_pagep),
&key_dbt)) != 0)
goto err;
+ } else
+ LSN_NOT_LOGGED(new_lsn);
+
+ /* Move lsn onto page. */
+ LSN(p) = new_lsn; /* Structure assignment. */
+ LSN(n_pagep) = new_lsn;
+ if (NEXT_PGNO(n_pagep) != PGNO_INVALID)
+ LSN(nn_pagep) = new_lsn;
- /* Move lsn onto page. */
- LSN(p) = new_lsn; /* Structure assignment. */
- LSN(n_pagep) = new_lsn;
- if (NEXT_PGNO(n_pagep) != PGNO_INVALID)
- LSN(nn_pagep) = new_lsn;
- }
if (nn_pagep != NULL) {
PREV_PGNO(nn_pagep) = PGNO(p);
- if ((ret = memp_fput(dbp->mpf,
- nn_pagep, DB_MPOOL_DIRTY)) != 0) {
+ if ((ret =
+ mpf->put(mpf, nn_pagep, DB_MPOOL_DIRTY)) != 0) {
nn_pagep = NULL;
goto err;
}
@@ -703,26 +710,30 @@ __ham_del_pair(dbc, reclaim_page)
* Update cursors to reflect the fact that records
* on the second page have moved to the first page.
*/
- if ((ret = __ham_c_chgpg(dbc,
- PGNO(n_pagep), NDX_INVALID, PGNO(p), NDX_INVALID)) != 0)
- return (ret);
+ if ((ret = __ham_c_delpg(dbc, PGNO(n_pagep),
+ PGNO(p), 0, DB_HAM_DELFIRSTPG, &order)) != 0)
+ goto err;
/*
* Update the cursor to reflect its new position.
*/
hcp->indx = 0;
hcp->pgno = PGNO(p);
- if ((ret = memp_fset(dbp->mpf, p, DB_MPOOL_DIRTY)) != 0 ||
- (ret = __db_free(dbc, n_pagep)) != 0)
- return (ret);
+ hcp->order += order;
+
+ if ((ret = mpf->set(mpf, p, DB_MPOOL_DIRTY)) != 0)
+ goto err;
+ if ((ret = __db_free(dbc, n_pagep)) != 0) {
+ n_pagep = NULL;
+ goto err;
+ }
} else {
- if ((ret =
- memp_fget(dbp->mpf, &PREV_PGNO(p), 0, &p_pagep)) != 0)
+ if ((ret = mpf->get(mpf, &PREV_PGNO(p), 0, &p_pagep)) != 0)
goto err;
if (NEXT_PGNO(p) != PGNO_INVALID) {
- if ((ret = memp_fget(dbp->mpf,
- &NEXT_PGNO(p), 0, &n_pagep)) != 0)
+ if ((ret =
+ mpf->get(mpf, &NEXT_PGNO(p), 0, &n_pagep)) != 0)
goto err;
n_lsn = &LSN(n_pagep);
} else {
@@ -734,32 +745,40 @@ __ham_del_pair(dbc, reclaim_page)
if (n_pagep != NULL)
PREV_PGNO(n_pagep) = PGNO(p_pagep);
- if (DB_LOGGING(dbc)) {
- if ((ret = __ham_newpage_log(dbenv,
- dbc->txn, &new_lsn, 0, DELOVFL,
- dbp->log_fileid, PREV_PGNO(p), &LSN(p_pagep),
+ if (DBC_LOGGING(dbc)) {
+ if ((ret = __ham_newpage_log(dbp, dbc->txn,
+ &new_lsn, 0, DELOVFL, PREV_PGNO(p), &LSN(p_pagep),
PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0)
goto err;
+ } else
+ LSN_NOT_LOGGED(new_lsn);
+
+ /* Move lsn onto page. */
+ LSN(p_pagep) = new_lsn; /* Structure assignment. */
+ if (n_pagep)
+ LSN(n_pagep) = new_lsn;
+ LSN(p) = new_lsn;
- /* Move lsn onto page. */
- LSN(p_pagep) = new_lsn; /* Structure assignment. */
- if (n_pagep)
- LSN(n_pagep) = new_lsn;
- LSN(p) = new_lsn;
- }
if (NEXT_PGNO(p) == PGNO_INVALID) {
/*
* There is no next page; put the cursor on the
* previous page as if we'd deleted the last item
- * on that page; index greater than number of
- * valid entries and H_DELETED set.
+ * on that page, with index after the last valid
+ * entry.
+ *
+ * The deleted flag was set up above.
*/
hcp->pgno = PGNO(p_pagep);
hcp->indx = NUM_ENT(p_pagep);
- F_SET(hcp, H_DELETED);
+ op = DB_HAM_DELLASTPG;
} else {
+ /*
+ * There is a next page, so put the cursor at
+ * the beginning of it.
+ */
hcp->pgno = NEXT_PGNO(p);
hcp->indx = 0;
+ op = DB_HAM_DELMIDPG;
}
/*
@@ -770,26 +789,28 @@ __ham_del_pair(dbc, reclaim_page)
hcp->page = NULL;
chg_pgno = PGNO(p);
ret = __db_free(dbc, p);
- if ((t_ret = memp_fput(dbp->mpf, p_pagep, DB_MPOOL_DIRTY)) != 0
- && ret == 0)
+ if ((t_ret =
+ mpf->put(mpf, p_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
- if (n_pagep != NULL && (t_ret = memp_fput(dbp->mpf,
- n_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0)
+ if (n_pagep != NULL && (t_ret =
+ mpf->put(mpf, n_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
if (ret != 0)
return (ret);
- ret = __ham_c_chgpg(dbc,
- chg_pgno, 0, hcp->pgno, hcp->indx);
+ if ((ret = __ham_c_delpg(dbc,
+ chg_pgno, hcp->pgno, hcp->indx, op, &order)) != 0)
+ return (ret);
+ hcp->order += order;
}
return (ret);
err: /* Clean up any pages. */
if (n_pagep != NULL)
- (void)memp_fput(dbp->mpf, n_pagep, 0);
+ (void)mpf->put(mpf, n_pagep, 0);
if (nn_pagep != NULL)
- (void)memp_fput(dbp->mpf, nn_pagep, 0);
+ (void)mpf->put(mpf, nn_pagep, 0);
if (p_pagep != NULL)
- (void)memp_fput(dbp->mpf, p_pagep, 0);
+ (void)mpf->put(mpf, p_pagep, 0);
return (ret);
}
@@ -807,12 +828,13 @@ __ham_replpair(dbc, dbt, make_dup)
u_int32_t make_dup;
{
DB *dbp;
- HASH_CURSOR *hcp;
DBT old_dbt, tdata, tmp;
+ DB_ENV *dbenv;
DB_LSN new_lsn;
+ HASH_CURSOR *hcp;
int32_t change; /* XXX: Possible overflow. */
- u_int32_t dup, len, memsize;
- int is_big, ret, type;
+ u_int32_t dup_flag, len, memsize;
+ int beyond_eor, is_big, ret, type;
u_int8_t *beg, *dest, *end, *hk, *src;
void *memp;
@@ -828,6 +850,7 @@ __ham_replpair(dbc, dbt, make_dup)
* add.
*/
dbp = dbc->dbp;
+ dbenv = dbp->dbenv;
hcp = (HASH_CURSOR *)dbc->internal;
/*
@@ -841,19 +864,21 @@ __ham_replpair(dbc, dbt, make_dup)
*/
change = dbt->size - dbt->dlen;
- hk = H_PAIRDATA(hcp->page, hcp->indx);
+ hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
is_big = HPAGE_PTYPE(hk) == H_OFFPAGE;
if (is_big)
memcpy(&len, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
else
- len = LEN_HKEYDATA(hcp->page,
+ len = LEN_HKEYDATA(dbp, hcp->page,
dbp->pgsize, H_DATAINDEX(hcp->indx));
- if (dbt->doff + dbt->dlen > len)
+ beyond_eor = dbt->doff + dbt->dlen > len;
+ if (beyond_eor)
change += dbt->doff + dbt->dlen - len;
- if (change > (int32_t)P_FREESPACE(hcp->page) || is_big) {
+ if (change > (int32_t)P_FREESPACE(dbp, hcp->page) ||
+ beyond_eor || is_big) {
/*
* Case 3 -- two subcases.
* A. This is not really a partial operation, but an overwrite.
@@ -868,16 +893,16 @@ __ham_replpair(dbc, dbt, make_dup)
memset(&tmp, 0, sizeof(tmp));
if ((ret =
__db_ret(dbp, hcp->page, H_KEYINDEX(hcp->indx),
- &tmp, &dbc->rkey.data, &dbc->rkey.ulen)) != 0)
+ &tmp, &dbc->rkey->data, &dbc->rkey->ulen)) != 0)
return (ret);
/* Preserve duplicate info. */
- dup = F_ISSET(hcp, H_ISDUP);
+ dup_flag = F_ISSET(hcp, H_ISDUP);
if (dbt->doff == 0 && dbt->dlen == len) {
ret = __ham_del_pair(dbc, 0);
if (ret == 0)
ret = __ham_add_el(dbc,
- &tmp, dbt, dup ? H_DUPLICATE : H_KEYDATA);
+ &tmp, dbt, dup_flag ? H_DUPLICATE : H_KEYDATA);
} else { /* Case B */
type = HPAGE_PTYPE(hk) != H_OFFPAGE ?
HPAGE_PTYPE(hk) : H_KEYDATA;
@@ -891,15 +916,14 @@ __ham_replpair(dbc, dbt, make_dup)
/* Now we can delete the item. */
if ((ret = __ham_del_pair(dbc, 0)) != 0) {
- __os_free(memp, memsize);
+ __os_free(dbenv, memp);
goto err;
}
/* Now shift old data around to make room for new. */
if (change > 0) {
- if ((ret = __os_realloc(dbp->dbenv,
- tdata.size + change,
- NULL, &tdata.data)) != 0)
+ if ((ret = __os_realloc(dbenv,
+ tdata.size + change, &tdata.data)) != 0)
return (ret);
memp = tdata.data;
memsize = tdata.size + change;
@@ -920,9 +944,9 @@ __ham_replpair(dbc, dbt, make_dup)
/* Now add the pair. */
ret = __ham_add_el(dbc, &tmp, &tdata, type);
- __os_free(memp, memsize);
+ __os_free(dbenv, memp);
}
- F_SET(hcp, dup);
+ F_SET(hcp, dup_flag);
err: return (ret);
}
@@ -930,7 +954,7 @@ err: return (ret);
* Set up pointer into existing data. Do it before the log
* message so we can use it inside of the log setup.
*/
- beg = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx));
+ beg = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx));
beg += dbt->doff;
/*
@@ -938,20 +962,22 @@ err: return (ret);
* all the parameters here. Then log the call before moving
* anything around.
*/
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
old_dbt.data = beg;
old_dbt.size = dbt->dlen;
- if ((ret = __ham_replace_log(dbp->dbenv,
- dbc->txn, &new_lsn, 0, dbp->log_fileid, PGNO(hcp->page),
+ if ((ret = __ham_replace_log(dbp,
+ dbc->txn, &new_lsn, 0, PGNO(hcp->page),
(u_int32_t)H_DATAINDEX(hcp->indx), &LSN(hcp->page),
(u_int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0)
return (ret);
- LSN(hcp->page) = new_lsn; /* Structure assignment. */
- }
+ } else
+ LSN_NOT_LOGGED(new_lsn);
+
+ LSN(hcp->page) = new_lsn; /* Structure assignment. */
- __ham_onpage_replace(hcp->page, dbp->pgsize,
- (u_int32_t)H_DATAINDEX(hcp->indx), (int32_t)dbt->doff, change, dbt);
+ __ham_onpage_replace(dbp, hcp->page, (u_int32_t)H_DATAINDEX(hcp->indx),
+ (int32_t)dbt->doff, change, dbt);
return (0);
}
@@ -967,34 +993,41 @@ err: return (ret);
* off: Offset at which we are beginning the replacement.
* change: the number of bytes (+ or -) that the element is growing/shrinking.
* dbt: the new data that gets written at beg.
- * PUBLIC: void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
- * PUBLIC: int32_t, DBT *));
+ *
+ * PUBLIC: void __ham_onpage_replace __P((DB *, PAGE *, u_int32_t,
+ * PUBLIC: int32_t, int32_t, DBT *));
*/
void
-__ham_onpage_replace(pagep, pgsize, ndx, off, change, dbt)
+__ham_onpage_replace(dbp, pagep, ndx, off, change, dbt)
+ DB *dbp;
PAGE *pagep;
- size_t pgsize;
u_int32_t ndx;
int32_t off;
int32_t change;
DBT *dbt;
{
- db_indx_t i;
+ db_indx_t i, *inp;
int32_t len;
+ size_t pgsize;
u_int8_t *src, *dest;
int zero_me;
+ pgsize = dbp->pgsize;
+ inp = P_INP(dbp, pagep);
if (change != 0) {
zero_me = 0;
src = (u_int8_t *)(pagep) + HOFFSET(pagep);
if (off < 0)
- len = pagep->inp[ndx] - HOFFSET(pagep);
- else if ((u_int32_t)off >= LEN_HKEYDATA(pagep, pgsize, ndx)) {
- len = HKEYDATA_DATA(P_ENTRY(pagep, ndx)) +
- LEN_HKEYDATA(pagep, pgsize, ndx) - src;
+ len = inp[ndx] - HOFFSET(pagep);
+ else if ((u_int32_t)off >=
+ LEN_HKEYDATA(dbp, pagep, pgsize, ndx)) {
+ len = (int32_t)(HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx))
+ + LEN_HKEYDATA(dbp, pagep, pgsize, ndx) - src);
zero_me = 1;
} else
- len = (HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off) - src;
+ len = (int32_t)(
+ (HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + off) -
+ src);
dest = src - change;
memmove(dest, src, len);
if (zero_me)
@@ -1002,14 +1035,14 @@ __ham_onpage_replace(pagep, pgsize, ndx, off, change, dbt)
/* Now update the indices. */
for (i = ndx; i < NUM_ENT(pagep); i++)
- pagep->inp[i] -= change;
+ inp[i] -= change;
HOFFSET(pagep) -= change;
}
if (off >= 0)
- memcpy(HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off,
+ memcpy(HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + off,
dbt->data, dbt->size);
else
- memcpy(P_ENTRY(pagep, ndx), dbt->data, dbt->size);
+ memcpy(P_ENTRY(dbp, pagep, ndx), dbt->data, dbt->size);
}
/*
@@ -1022,10 +1055,12 @@ __ham_split_page(dbc, obucket, nbucket)
{
DB *dbp;
DBC **carray;
- HASH_CURSOR *hcp, *cp;
DBT key, page_dbt;
DB_ENV *dbenv;
+ DB_LOCK block;
DB_LSN new_lsn;
+ DB_MPOOLFILE *mpf;
+ HASH_CURSOR *hcp, *cp;
PAGE **pp, *old_pagep, *temp_pagep, *new_pagep;
db_indx_t n;
db_pgno_t bucket_pgno, npgno, next_pgno;
@@ -1034,22 +1069,24 @@ __ham_split_page(dbc, obucket, nbucket)
void *big_buf;
dbp = dbc->dbp;
- hcp = (HASH_CURSOR *)dbc->internal;
dbenv = dbp->dbenv;
+ mpf = dbp->mpf;
+ hcp = (HASH_CURSOR *)dbc->internal;
temp_pagep = old_pagep = new_pagep = NULL;
-
- if ((ret = __ham_get_clist(dbp, obucket, NDX_INVALID, &carray)) != 0)
- return (ret);
+ carray = NULL;
+ LOCK_INIT(block);
bucket_pgno = BUCKET_TO_PAGE(hcp, obucket);
- if ((ret = memp_fget(dbp->mpf,
+ if ((ret = __db_lget(dbc,
+ 0, bucket_pgno, DB_LOCK_WRITE, 0, &block)) != 0)
+ goto err;
+ if ((ret = mpf->get(mpf,
&bucket_pgno, DB_MPOOL_CREATE, &old_pagep)) != 0)
goto err;
/* Properly initialize the new bucket page. */
npgno = BUCKET_TO_PAGE(hcp, nbucket);
- if ((ret = memp_fget(dbp->mpf,
- &npgno, DB_MPOOL_CREATE, &new_pagep)) != 0)
+ if ((ret = mpf->get(mpf, &npgno, DB_MPOOL_CREATE, &new_pagep)) != 0)
goto err;
P_INIT(new_pagep,
dbp->pgsize, npgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
@@ -1057,33 +1094,35 @@ __ham_split_page(dbc, obucket, nbucket)
temp_pagep = hcp->split_buf;
memcpy(temp_pagep, old_pagep, dbp->pgsize);
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
page_dbt.size = dbp->pgsize;
page_dbt.data = old_pagep;
- if ((ret = __ham_splitdata_log(dbenv,
- dbc->txn, &new_lsn, 0, dbp->log_fileid, SPLITOLD,
+ if ((ret = __ham_splitdata_log(dbp,
+ dbc->txn, &new_lsn, 0, SPLITOLD,
PGNO(old_pagep), &page_dbt, &LSN(old_pagep))) != 0)
goto err;
- }
+ } else
+ LSN_NOT_LOGGED(new_lsn);
+
+ LSN(old_pagep) = new_lsn; /* Structure assignment. */
P_INIT(old_pagep, dbp->pgsize, PGNO(old_pagep), PGNO_INVALID,
PGNO_INVALID, 0, P_HASH);
- if (DB_LOGGING(dbc))
- LSN(old_pagep) = new_lsn; /* Structure assignment. */
-
big_len = 0;
big_buf = NULL;
key.flags = 0;
while (temp_pagep != NULL) {
+ if ((ret = __ham_get_clist(dbp,
+ PGNO(temp_pagep), NDX_INVALID, &carray)) != 0)
+ goto err;
+
for (n = 0; n < (db_indx_t)NUM_ENT(temp_pagep); n += 2) {
- if ((ret =
- __db_ret(dbp, temp_pagep, H_KEYINDEX(n),
- &key, &big_buf, &big_len)) != 0)
+ if ((ret = __db_ret(dbp, temp_pagep,
+ H_KEYINDEX(n), &key, &big_buf, &big_len)) != 0)
goto err;
- if (__ham_call_hash(dbc, key.data, key.size)
- == obucket)
+ if (__ham_call_hash(dbc, key.data, key.size) == obucket)
pp = &old_pagep;
else
pp = &new_pagep;
@@ -1092,25 +1131,24 @@ __ham_split_page(dbc, obucket, nbucket)
* Figure out how many bytes we need on the new
* page to store the key/data pair.
*/
-
- len = LEN_HITEM(temp_pagep, dbp->pgsize,
+ len = LEN_HITEM(dbp, temp_pagep, dbp->pgsize,
H_DATAINDEX(n)) +
- LEN_HITEM(temp_pagep, dbp->pgsize,
+ LEN_HITEM(dbp, temp_pagep, dbp->pgsize,
H_KEYINDEX(n)) +
2 * sizeof(db_indx_t);
- if (P_FREESPACE(*pp) < len) {
- if (DB_LOGGING(dbc)) {
+ if (P_FREESPACE(dbp, *pp) < len) {
+ if (DBC_LOGGING(dbc)) {
page_dbt.size = dbp->pgsize;
page_dbt.data = *pp;
- if ((ret = __ham_splitdata_log(
- dbenv, dbc->txn,
- &new_lsn, 0, dbp->log_fileid,
+ if ((ret = __ham_splitdata_log(dbp,
+ dbc->txn, &new_lsn, 0,
SPLITNEW, PGNO(*pp), &page_dbt,
&LSN(*pp))) != 0)
goto err;
- LSN(*pp) = new_lsn;
- }
+ } else
+ LSN_NOT_LOGGED(new_lsn);
+ LSN(*pp) = new_lsn;
if ((ret =
__ham_add_ovflpage(dbc, *pp, 1, pp)) != 0)
goto err;
@@ -1122,28 +1160,25 @@ __ham_split_page(dbc, obucket, nbucket)
for (i = 0; carray[i] != NULL; i++) {
cp =
(HASH_CURSOR *)carray[i]->internal;
- if (cp->pgno == PGNO(temp_pagep)
- && cp->indx == n) {
+ if (cp->pgno == PGNO(temp_pagep) &&
+ cp->indx == n) {
cp->pgno = PGNO(*pp);
cp->indx = NUM_ENT(*pp);
found = 1;
}
}
- if (found && DB_LOGGING(dbc)
- && IS_SUBTRANSACTION(dbc->txn)) {
+ if (found && DBC_LOGGING(dbc) &&
+ IS_SUBTRANSACTION(dbc->txn)) {
if ((ret =
- __ham_chgpg_log(dbp->dbenv,
+ __ham_chgpg_log(dbp,
dbc->txn, &new_lsn, 0,
- dbp->log_fileid,
DB_HAM_SPLIT, PGNO(temp_pagep),
PGNO(*pp), n, NUM_ENT(*pp))) != 0)
goto err;
}
}
- __ham_copy_item(dbp->pgsize,
- temp_pagep, H_KEYINDEX(n), *pp);
- __ham_copy_item(dbp->pgsize,
- temp_pagep, H_DATAINDEX(n), *pp);
+ __ham_copy_item(dbp, temp_pagep, H_KEYINDEX(n), *pp);
+ __ham_copy_item(dbp, temp_pagep, H_DATAINDEX(n), *pp);
}
next_pgno = NEXT_PGNO(temp_pagep);
@@ -1156,23 +1191,30 @@ __ham_split_page(dbc, obucket, nbucket)
if (next_pgno == PGNO_INVALID)
temp_pagep = NULL;
- else if ((ret = memp_fget(dbp->mpf,
- &next_pgno, DB_MPOOL_CREATE, &temp_pagep)) != 0)
+ else if ((ret = mpf->get(
+ mpf, &next_pgno, DB_MPOOL_CREATE, &temp_pagep)) != 0)
goto err;
- if (temp_pagep != NULL && DB_LOGGING(dbc)) {
- page_dbt.size = dbp->pgsize;
- page_dbt.data = temp_pagep;
- if ((ret = __ham_splitdata_log(dbenv,
- dbc->txn, &new_lsn, 0, dbp->log_fileid,
- SPLITOLD, PGNO(temp_pagep),
- &page_dbt, &LSN(temp_pagep))) != 0)
- goto err;
+ if (temp_pagep != NULL) {
+ if (DBC_LOGGING(dbc)) {
+ page_dbt.size = dbp->pgsize;
+ page_dbt.data = temp_pagep;
+ if ((ret = __ham_splitdata_log(dbp,
+ dbc->txn, &new_lsn, 0,
+ SPLITOLD, PGNO(temp_pagep),
+ &page_dbt, &LSN(temp_pagep))) != 0)
+ goto err;
+ } else
+ LSN_NOT_LOGGED(new_lsn);
LSN(temp_pagep) = new_lsn;
}
+
+ if (carray != NULL) /* We never knew its size. */
+ __os_free(dbenv, carray);
+ carray = NULL;
}
if (big_buf != NULL)
- __os_free(big_buf, big_len);
+ __os_free(dbenv, big_buf);
/*
* If the original bucket spanned multiple pages, then we've got
@@ -1188,37 +1230,43 @@ __ham_split_page(dbc, obucket, nbucket)
/*
* Write new buckets out.
*/
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
page_dbt.size = dbp->pgsize;
page_dbt.data = old_pagep;
- if ((ret = __ham_splitdata_log(dbenv, dbc->txn, &new_lsn, 0,
- dbp->log_fileid, SPLITNEW, PGNO(old_pagep), &page_dbt,
+ if ((ret = __ham_splitdata_log(dbp, dbc->txn,
+ &new_lsn, 0, SPLITNEW, PGNO(old_pagep), &page_dbt,
&LSN(old_pagep))) != 0)
goto err;
LSN(old_pagep) = new_lsn;
page_dbt.data = new_pagep;
- if ((ret = __ham_splitdata_log(dbenv, dbc->txn, &new_lsn, 0,
- dbp->log_fileid, SPLITNEW, PGNO(new_pagep), &page_dbt,
+ if ((ret = __ham_splitdata_log(dbp, dbc->txn, &new_lsn, 0,
+ SPLITNEW, PGNO(new_pagep), &page_dbt,
&LSN(new_pagep))) != 0)
goto err;
LSN(new_pagep) = new_lsn;
+ } else {
+ LSN_NOT_LOGGED(LSN(old_pagep));
+ LSN_NOT_LOGGED(LSN(new_pagep));
}
- ret = memp_fput(dbp->mpf, old_pagep, DB_MPOOL_DIRTY);
- if ((t_ret = memp_fput(dbp->mpf, new_pagep, DB_MPOOL_DIRTY)) != 0
- && ret == 0)
+
+ ret = mpf->put(mpf, old_pagep, DB_MPOOL_DIRTY);
+ if ((t_ret =
+ mpf->put(mpf, new_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
if (0) {
err: if (old_pagep != NULL)
- (void)memp_fput(dbp->mpf, old_pagep, DB_MPOOL_DIRTY);
+ (void)mpf->put(mpf, old_pagep, DB_MPOOL_DIRTY);
if (new_pagep != NULL)
- (void)memp_fput(dbp->mpf, new_pagep, DB_MPOOL_DIRTY);
+ (void)mpf->put(mpf, new_pagep, DB_MPOOL_DIRTY);
if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno)
- (void)memp_fput(dbp->mpf, temp_pagep, DB_MPOOL_DIRTY);
+ (void)mpf->put(mpf, temp_pagep, DB_MPOOL_DIRTY);
}
+ if (LOCK_ISSET(block))
+ __TLPUT(dbc, block);
if (carray != NULL) /* We never knew its size. */
- __os_free(carray, 0);
+ __os_free(dbenv, carray);
return (ret);
}
@@ -1237,11 +1285,12 @@ __ham_add_el(dbc, key, val, type)
const DBT *key, *val;
int type;
{
- DB *dbp;
- HASH_CURSOR *hcp;
const DBT *pkey, *pdata;
+ DB *dbp;
DBT key_dbt, data_dbt;
DB_LSN new_lsn;
+ DB_MPOOLFILE *mpf;
+ HASH_CURSOR *hcp;
HOFFPAGE doff, koff;
db_pgno_t next_pgno, pgno;
u_int32_t data_size, key_size, pairsize, rectype;
@@ -1249,13 +1298,14 @@ __ham_add_el(dbc, key, val, type)
int key_type, data_type;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
hcp = (HASH_CURSOR *)dbc->internal;
do_expand = 0;
- pgno = hcp->seek_found_page != PGNO_INVALID ? hcp->seek_found_page :
- hcp->pgno;
- if (hcp->page == NULL && (ret = memp_fget(dbp->mpf, &pgno,
- DB_MPOOL_CREATE, &hcp->page)) != 0)
+ pgno = hcp->seek_found_page != PGNO_INVALID ?
+ hcp->seek_found_page : hcp->pgno;
+ if (hcp->page == NULL &&
+ (ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &hcp->page)) != 0)
return (ret);
key_size = HKEYDATA_PSIZE(key->size);
@@ -1276,21 +1326,20 @@ __ham_add_el(dbc, key, val, type)
* anyway. Check if it's a bigpair that fits or a regular
* pair that fits.
*/
- if (P_FREESPACE(hcp->page) >= pairsize)
+ if (P_FREESPACE(dbp, hcp->page) >= pairsize)
break;
next_pgno = NEXT_PGNO(hcp->page);
- if ((ret =
- __ham_next_cpage(dbc, next_pgno, 0)) != 0)
+ if ((ret = __ham_next_cpage(dbc, next_pgno, 0)) != 0)
return (ret);
}
/*
* Check if we need to allocate a new page.
*/
- if (P_FREESPACE(hcp->page) < pairsize) {
+ if (P_FREESPACE(dbp, hcp->page) < pairsize) {
do_expand = 1;
if ((ret = __ham_add_ovflpage(dbc,
- (PAGE *)hcp->page, 1, (PAGE **)&hcp->page)) != 0)
+ (PAGE *)hcp->page, 1, (PAGE **)&hcp->page)) != 0)
return (ret);
hcp->pgno = PGNO(hcp->page);
}
@@ -1334,7 +1383,7 @@ __ham_add_el(dbc, key, val, type)
data_type = type;
}
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
rectype = PUTPAIR;
if (is_databig)
rectype |= PAIR_DATAMASK;
@@ -1343,18 +1392,18 @@ __ham_add_el(dbc, key, val, type)
if (type == H_DUPLICATE)
rectype |= PAIR_DUPMASK;
- if ((ret = __ham_insdel_log(dbp->dbenv, dbc->txn, &new_lsn, 0,
- rectype, dbp->log_fileid, PGNO(hcp->page),
- (u_int32_t)NUM_ENT(hcp->page), &LSN(hcp->page), pkey,
- pdata)) != 0)
+ if ((ret = __ham_insdel_log(dbp, dbc->txn, &new_lsn, 0,
+ rectype, PGNO(hcp->page), (u_int32_t)NUM_ENT(hcp->page),
+ &LSN(hcp->page), pkey, pdata)) != 0)
return (ret);
+ } else
+ LSN_NOT_LOGGED(new_lsn);
- /* Move lsn onto page. */
- LSN(hcp->page) = new_lsn; /* Structure assignment. */
- }
+ /* Move lsn onto page. */
+ LSN(hcp->page) = new_lsn; /* Structure assignment. */
- __ham_putitem(hcp->page, pkey, key_type);
- __ham_putitem(hcp->page, pdata, data_type);
+ __ham_putitem(dbp, hcp->page, pkey, key_type);
+ __ham_putitem(dbp, hcp->page, pdata, data_type);
/*
* For splits, we are going to update item_info's page number
@@ -1369,8 +1418,11 @@ __ham_add_el(dbc, key, val, type)
* XXX
* Maybe keep incremental numbers here.
*/
- if (!STD_LOCKING(dbc))
+ if (!STD_LOCKING(dbc)) {
hcp->hdr->nelem++;
+ if ((ret = __ham_dirty_meta(dbc)) != 0)
+ return (ret);
+ }
if (do_expand || (hcp->hdr->ffactor != 0 &&
(u_int32_t)H_NUMPAIRS(hcp->page) > hcp->hdr->ffactor))
@@ -1384,28 +1436,32 @@ __ham_add_el(dbc, key, val, type)
* H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we
* do not need to do any logging here.
*
- * PUBLIC: void __ham_copy_item __P((size_t, PAGE *, u_int32_t, PAGE *));
+ * PUBLIC: void __ham_copy_item __P((DB *, PAGE *, u_int32_t, PAGE *));
*/
void
-__ham_copy_item(pgsize, src_page, src_ndx, dest_page)
- size_t pgsize;
+__ham_copy_item(dbp, src_page, src_ndx, dest_page)
+ DB *dbp;
PAGE *src_page;
u_int32_t src_ndx;
PAGE *dest_page;
{
u_int32_t len;
+ size_t pgsize;
void *src, *dest;
+ db_indx_t *inp;
+ pgsize = dbp->pgsize;
+ inp = P_INP(dbp, dest_page);
/*
* Copy the key and data entries onto this new page.
*/
- src = P_ENTRY(src_page, src_ndx);
+ src = P_ENTRY(dbp, src_page, src_ndx);
/* Set up space on dest. */
- len = LEN_HITEM(src_page, pgsize, src_ndx);
+ len = (u_int32_t)LEN_HITEM(dbp, src_page, pgsize, src_ndx);
HOFFSET(dest_page) -= len;
- dest_page->inp[NUM_ENT(dest_page)] = HOFFSET(dest_page);
- dest = P_ENTRY(dest_page, NUM_ENT(dest_page));
+ inp[NUM_ENT(dest_page)] = HOFFSET(dest_page);
+ dest = P_ENTRY(dbp, dest_page, NUM_ENT(dest_page));
NUM_ENT(dest_page)++;
memcpy(dest, src, len);
@@ -1414,8 +1470,8 @@ __ham_copy_item(pgsize, src_page, src_ndx, dest_page)
/*
*
* Returns:
- * pointer on success
- * NULL on error
+ * pointer on success
+ * NULL on error
*
* PUBLIC: int __ham_add_ovflpage __P((DBC *, PAGE *, int, PAGE **));
*/
@@ -1427,31 +1483,33 @@ __ham_add_ovflpage(dbc, pagep, release, pp)
PAGE **pp;
{
DB *dbp;
- HASH_CURSOR *hcp;
DB_LSN new_lsn;
+ DB_MPOOLFILE *mpf;
PAGE *new_pagep;
int ret;
dbp = dbc->dbp;
- hcp = (HASH_CURSOR *)dbc->internal;
+ mpf = dbp->mpf;
if ((ret = __db_new(dbc, P_HASH, &new_pagep)) != 0)
return (ret);
- if (DB_LOGGING(dbc)) {
- if ((ret = __ham_newpage_log(dbp->dbenv, dbc->txn, &new_lsn, 0,
- PUTOVFL, dbp->log_fileid, PGNO(pagep), &LSN(pagep),
+ if (DBC_LOGGING(dbc)) {
+ if ((ret = __ham_newpage_log(dbp, dbc->txn, &new_lsn, 0,
+ PUTOVFL, PGNO(pagep), &LSN(pagep),
PGNO(new_pagep), &LSN(new_pagep), PGNO_INVALID, NULL)) != 0)
return (ret);
+ } else
+ LSN_NOT_LOGGED(new_lsn);
- /* Move lsn onto page. */
- LSN(pagep) = LSN(new_pagep) = new_lsn;
- }
+ /* Move lsn onto page. */
+ LSN(pagep) = LSN(new_pagep) = new_lsn;
NEXT_PGNO(pagep) = PGNO(new_pagep);
+
PREV_PGNO(new_pagep) = PGNO(pagep);
if (release)
- ret = memp_fput(dbp->mpf, pagep, DB_MPOOL_DIRTY);
+ ret = mpf->put(mpf, pagep, DB_MPOOL_DIRTY);
*pp = new_pagep;
return (ret);
@@ -1467,10 +1525,12 @@ __ham_get_cpage(dbc, mode)
{
DB *dbp;
DB_LOCK tmp_lock;
+ DB_MPOOLFILE *mpf;
HASH_CURSOR *hcp;
int ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
hcp = (HASH_CURSOR *)dbc->internal;
ret = 0;
@@ -1485,25 +1545,22 @@ __ham_get_cpage(dbc, mode)
* 4. If there is a lock, but it's for a different bucket, then we need
* to release the existing lock and get a new lock.
*/
- tmp_lock.off = LOCK_INVALID;
+ LOCK_INIT(tmp_lock);
if (STD_LOCKING(dbc)) {
- if (hcp->lock.off != LOCK_INVALID &&
- hcp->lbucket != hcp->bucket) { /* Case 4 */
- if (dbc->txn == NULL &&
- (ret = lock_put(dbp->dbenv, &hcp->lock)) != 0)
- return (ret);
- hcp->lock.off = LOCK_INVALID;
- }
- if ((hcp->lock.off != LOCK_INVALID &&
+ if (hcp->lbucket != hcp->bucket && /* Case 4 */
+ (ret = __TLPUT(dbc, hcp->lock)) != 0)
+ return (ret);
+
+ if ((LOCK_ISSET(hcp->lock) &&
(hcp->lock_mode == DB_LOCK_READ &&
mode == DB_LOCK_WRITE))) {
/* Case 3. */
tmp_lock = hcp->lock;
- hcp->lock.off = LOCK_INVALID;
+ LOCK_INIT(hcp->lock);
}
/* Acquire the lock. */
- if (hcp->lock.off == LOCK_INVALID)
+ if (!LOCK_ISSET(hcp->lock))
/* Cases 1, 3, and 4. */
if ((ret = __ham_lock_bucket(dbc, mode)) != 0)
return (ret);
@@ -1511,17 +1568,18 @@ __ham_get_cpage(dbc, mode)
if (ret == 0) {
hcp->lock_mode = mode;
hcp->lbucket = hcp->bucket;
- if (tmp_lock.off != LOCK_INVALID)
+ if (LOCK_ISSET(tmp_lock))
/* Case 3: release the original lock. */
- ret = lock_put(dbp->dbenv, &tmp_lock);
- } else if (tmp_lock.off != LOCK_INVALID)
+ ret =
+ dbp->dbenv->lock_put(dbp->dbenv, &tmp_lock);
+ } else if (LOCK_ISSET(tmp_lock))
hcp->lock = tmp_lock;
}
if (ret == 0 && hcp->page == NULL) {
if (hcp->pgno == PGNO_INVALID)
hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
- if ((ret = memp_fget(dbp->mpf,
+ if ((ret = mpf->get(mpf,
&hcp->pgno, DB_MPOOL_CREATE, &hcp->page)) != 0)
return (ret);
}
@@ -1543,18 +1601,21 @@ __ham_next_cpage(dbc, pgno, dirty)
int dirty;
{
DB *dbp;
+ DB_MPOOLFILE *mpf;
HASH_CURSOR *hcp;
PAGE *p;
int ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
hcp = (HASH_CURSOR *)dbc->internal;
- if (hcp->page != NULL && (ret = memp_fput(dbp->mpf,
- hcp->page, dirty ? DB_MPOOL_DIRTY : 0)) != 0)
+ if (hcp->page != NULL &&
+ (ret = mpf->put(mpf, hcp->page, dirty ? DB_MPOOL_DIRTY : 0)) != 0)
return (ret);
+ hcp->page = NULL;
- if ((ret = memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, &p)) != 0)
+ if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &p)) != 0)
return (ret);
hcp->page = p;
@@ -1576,7 +1637,7 @@ __ham_lock_bucket(dbc, mode)
db_lockmode_t mode;
{
HASH_CURSOR *hcp;
- u_int32_t flags;
+ db_pgno_t pgno;
int gotmeta, ret;
hcp = (HASH_CURSOR *)dbc->internal;
@@ -1584,17 +1645,12 @@ __ham_lock_bucket(dbc, mode)
if (gotmeta)
if ((ret = __ham_get_meta(dbc)) != 0)
return (ret);
- dbc->lock.pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
+ pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
if (gotmeta)
if ((ret = __ham_release_meta(dbc)) != 0)
return (ret);
- flags = 0;
- if (DB_NONBLOCK(dbc))
- LF_SET(DB_LOCK_NOWAIT);
-
- ret = lock_get(dbc->dbp->dbenv,
- dbc->locker, flags, &dbc->lock_dbt, mode, &hcp->lock);
+ ret = __db_lget(dbc, 0, pgno, mode, 0, &hcp->lock);
hcp->lock_mode = mode;
return (ret);
@@ -1606,6 +1662,9 @@ __ham_lock_bucket(dbc, mode)
* represents. The caller is responsible for freeing up duplicates
* or offpage entries that might be referenced by this pair.
*
+ * Recovery assumes that this may be called without the metadata
+ * page pinned.
+ *
* PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t));
*/
void
@@ -1614,15 +1673,16 @@ __ham_dpair(dbp, p, indx)
PAGE *p;
u_int32_t indx;
{
- db_indx_t delta, n;
+ db_indx_t delta, n, *inp;
u_int8_t *dest, *src;
+ inp = P_INP(dbp, p);
/*
* Compute "delta", the amount we have to shift all of the
* offsets. To find the delta, we just need to calculate
* the size of the pair of elements we are removing.
*/
- delta = H_PAIRSIZE(p, dbp->pgsize, indx);
+ delta = H_PAIRSIZE(dbp, p, dbp->pgsize, indx);
/*
* The hard case: we want to remove something other than
@@ -1641,7 +1701,7 @@ __ham_dpair(dbp, p, indx)
* be an overlapping copy, so we have to use memmove.
*/
dest = src + delta;
- memmove(dest, src, p->inp[H_DATAINDEX(indx)] - HOFFSET(p));
+ memmove(dest, src, inp[H_DATAINDEX(indx)] - HOFFSET(p));
}
/* Adjust page metadata. */
@@ -1650,6 +1710,153 @@ __ham_dpair(dbp, p, indx)
/* Adjust the offsets. */
for (n = (db_indx_t)indx; n < (db_indx_t)(NUM_ENT(p)); n++)
- p->inp[n] = p->inp[n + 2] + delta;
+ inp[n] = inp[n + 2] + delta;
+
+}
+
+/*
+ * __ham_c_delpg --
+ *
+ * Adjust the cursors after we've emptied a page in a bucket, taking
+ * care that when we move cursors pointing to deleted items, their
+ * orders don't collide with the orders of cursors on the page we move
+ * them to (since after this function is called, cursors with the same
+ * index on the two pages will be otherwise indistinguishable--they'll
+ * all have pgno new_pgno). There are three cases:
+ *
+ * 1) The emptied page is the first page in the bucket. In this
+ * case, we've copied all the items from the second page into the
+ * first page, so the first page is new_pgno and the second page is
+ * old_pgno. new_pgno is empty, but can have deleted cursors
+ * pointing at indx 0, so we need to be careful of the orders
+ * there. This is DB_HAM_DELFIRSTPG.
+ *
+ * 2) The page is somewhere in the middle of a bucket. Our caller
+ * can just delete such a page, so it's old_pgno. old_pgno is
+ * empty, but may have deleted cursors pointing at indx 0, so we
+ * need to be careful of indx 0 when we move those cursors to
+ * new_pgno. This is DB_HAM_DELMIDPG.
+ *
+ * 3) The page is the last in a bucket. Again the empty page is
+ * old_pgno, and again it should only have cursors that are deleted
+ * and at indx == 0. This time, though, there's no next page to
+ * move them to, so we set them to indx == num_ent on the previous
+ * page--and indx == num_ent is the index whose cursors we need to
+ * be careful of. This is DB_HAM_DELLASTPG.
+ */
+static int
+__ham_c_delpg(dbc, old_pgno, new_pgno, num_ent, op, orderp)
+ DBC *dbc;
+ db_pgno_t old_pgno, new_pgno;
+ u_int32_t num_ent;
+ db_ham_mode op;
+ u_int32_t *orderp;
+{
+ DB *dbp, *ldbp;
+ DB_ENV *dbenv;
+ DB_LSN lsn;
+ DB_TXN *my_txn;
+ DBC *cp;
+ HASH_CURSOR *hcp;
+ int found, ret;
+ db_indx_t indx;
+ u_int32_t order;
+
+ /* Which is the worrisome index? */
+ indx = (op == DB_HAM_DELLASTPG) ? num_ent : 0;
+ dbp = dbc->dbp;
+ dbenv = dbp->dbenv;
+
+ my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL;
+ found = 0;
+
+ MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+ /*
+ * Find the highest order of any cursor our movement
+ * may collide with.
+ */
+ order = 1;
+ for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+ ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+ ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+ MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+ for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
+ cp = TAILQ_NEXT(cp, links)) {
+ if (cp == dbc || cp->dbtype != DB_HASH)
+ continue;
+ hcp = (HASH_CURSOR *)cp->internal;
+ if (hcp->pgno == new_pgno) {
+ if (hcp->indx == indx &&
+ F_ISSET(hcp, H_DELETED) &&
+ hcp->order >= order)
+ order = hcp->order + 1;
+ DB_ASSERT(op != DB_HAM_DELFIRSTPG ||
+ hcp->indx == NDX_INVALID ||
+ (hcp->indx == 0 &&
+ F_ISSET(hcp, H_DELETED)));
+ }
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+ }
+
+ for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+ ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+ ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+ MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+ for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
+ cp = TAILQ_NEXT(cp, links)) {
+ if (cp == dbc || cp->dbtype != DB_HASH)
+ continue;
+
+ hcp = (HASH_CURSOR *)cp->internal;
+
+ if (hcp->pgno == old_pgno) {
+ switch (op) {
+ case DB_HAM_DELFIRSTPG:
+ /*
+ * We're moving all items,
+ * regardless of index.
+ */
+ hcp->pgno = new_pgno;
+
+ /*
+ * But we have to be careful of
+ * the order values.
+ */
+ if (hcp->indx == indx)
+ hcp->order += order;
+ break;
+ case DB_HAM_DELMIDPG:
+ hcp->pgno = new_pgno;
+ DB_ASSERT(hcp->indx == 0 &&
+ F_ISSET(hcp, H_DELETED));
+ hcp->order += order;
+ break;
+ case DB_HAM_DELLASTPG:
+ hcp->pgno = new_pgno;
+ DB_ASSERT(hcp->indx == 0 &&
+ F_ISSET(hcp, H_DELETED));
+ hcp->indx = indx;
+ hcp->order += order;
+ break;
+ default:
+ DB_ASSERT(0);
+ return (__db_panic(dbenv, EINVAL));
+ }
+ if (my_txn != NULL && cp->txn != my_txn)
+ found = 1;
+ }
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+ }
+ MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+ if (found != 0 && DBC_LOGGING(dbc)) {
+ if ((ret = __ham_chgpg_log(dbp, my_txn, &lsn, 0, op,
+ old_pgno, new_pgno, indx, order)) != 0)
+ return (ret);
+ }
+ *orderp = order;
+ return (0);
}
diff --git a/bdb/hash/hash_rec.c b/bdb/hash/hash_rec.c
index ded58c281e9..24d3473c508 100644
--- a/bdb/hash/hash_rec.c
+++ b/bdb/hash/hash_rec.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,7 +43,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: hash_rec.c,v 11.34 2001/01/11 18:19:52 bostic Exp $";
+static const char revid[] = "$Id: hash_rec.c,v 11.69 2002/09/03 14:12:49 margo Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -53,15 +53,12 @@ static const char revid[] = "$Id: hash_rec.c,v 11.34 2001/01/11 18:19:52 bostic
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "btree.h"
-#include "hash.h"
-#include "lock.h"
-#include "log.h"
-#include "mp.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+#include "dbinc/hash.h"
+#include "dbinc/log.h"
-static int __ham_alloc_pages __P((DB *, __ham_groupalloc_args *));
+static int __ham_alloc_pages __P((DB *, __ham_groupalloc_args *, DB_LSN *));
/*
* __ham_insdel_recover --
@@ -82,16 +79,16 @@ __ham_insdel_recover(dbenv, dbtp, lsnp, op, info)
DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
- u_int32_t opcode;
- int cmp_n, cmp_p, flags, getmeta, ret, type;
+ u_int32_t flags, opcode;
+ int cmp_n, cmp_p, ret, type;
+ pagep = NULL;
COMPQUIET(info, NULL);
- getmeta = 0;
REC_PRINT(__ham_insdel_print);
REC_INTRO(__ham_insdel_read, 1);
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op)) {
/*
* We are undoing and the page doesn't exist. That
@@ -100,15 +97,11 @@ __ham_insdel_recover(dbenv, dbtp, lsnp, op, info)
* don't bother creating a page.
*/
goto done;
- } else if ((ret = memp_fget(mpf, &argp->pgno,
- DB_MPOOL_CREATE, &pagep)) != 0)
+ } else if ((ret = mpf->get(mpf,
+ &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
}
- if ((ret = __ham_get_meta(dbc)) != 0)
- goto out;
- getmeta = 1;
-
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn);
@@ -135,7 +128,7 @@ __ham_insdel_recover(dbenv, dbtp, lsnp, op, info)
*/
if (opcode != DELPAIR ||
argp->ndx == (u_int32_t)NUM_ENT(pagep)) {
- __ham_putitem(pagep, &argp->key,
+ __ham_putitem(file_dbp, pagep, &argp->key,
DB_UNDO(op) || PAIR_ISKEYBIG(argp->opcode) ?
H_OFFPAGE : H_KEYDATA);
@@ -145,31 +138,32 @@ __ham_insdel_recover(dbenv, dbtp, lsnp, op, info)
type = H_OFFPAGE;
else
type = H_KEYDATA;
- __ham_putitem(pagep, &argp->data, type);
+ __ham_putitem(file_dbp, pagep, &argp->data, type);
} else
- (void)__ham_reputpair(pagep, file_dbp->pgsize,
+ (void)__ham_reputpair(file_dbp, pagep,
argp->ndx, &argp->key, &argp->data);
LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
flags = DB_MPOOL_DIRTY;
- } else if ((opcode == DELPAIR && cmp_p == 0 && DB_REDO(op))
- || (opcode == PUTPAIR && cmp_n == 0 && DB_UNDO(op))) {
+ } else if ((opcode == DELPAIR && cmp_p == 0 && DB_REDO(op)) ||
+ (opcode == PUTPAIR && cmp_n == 0 && DB_UNDO(op))) {
/* Need to undo a put or redo a delete. */
__ham_dpair(file_dbp, pagep, argp->ndx);
LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
flags = DB_MPOOL_DIRTY;
}
- if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+ if ((ret = mpf->put(mpf, pagep, flags)) != 0)
goto out;
+ pagep = NULL;
/* Return the previous LSN. */
done: *lsnp = argp->prev_lsn;
ret = 0;
-out: if (getmeta)
- (void)__ham_release_meta(dbc);
+out: if (pagep != NULL)
+ (void)mpf->put(mpf, pagep, 0);
REC_CLOSE;
}
@@ -194,15 +188,16 @@ __ham_newpage_recover(dbenv, dbtp, lsnp, op, info)
DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
- int cmp_n, cmp_p, flags, getmeta, ret;
+ u_int32_t flags;
+ int cmp_n, cmp_p, ret;
+ pagep = NULL;
COMPQUIET(info, NULL);
- getmeta = 0;
REC_PRINT(__ham_newpage_print);
REC_INTRO(__ham_newpage_read, 1);
- if ((ret = memp_fget(mpf, &argp->new_pgno, 0, &pagep)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->new_pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op)) {
/*
* We are undoing and the page doesn't exist. That
@@ -212,15 +207,11 @@ __ham_newpage_recover(dbenv, dbtp, lsnp, op, info)
*/
ret = 0;
goto ppage;
- } else if ((ret = memp_fget(mpf, &argp->new_pgno,
- DB_MPOOL_CREATE, &pagep)) != 0)
+ } else if ((ret = mpf->get(mpf,
+ &argp->new_pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
}
- if ((ret = __ham_get_meta(dbc)) != 0)
- goto out;
- getmeta = 1;
-
/*
* There are potentially three pages we need to check: the one
* that we created/deleted, the one before it and the one after
@@ -250,12 +241,13 @@ __ham_newpage_recover(dbenv, dbtp, lsnp, op, info)
if (flags)
LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
- if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+ if ((ret = mpf->put(mpf, pagep, flags)) != 0)
goto out;
+ pagep = NULL;
/* Now do the prev page. */
ppage: if (argp->prev_pgno != PGNO_INVALID) {
- if ((ret = memp_fget(mpf, &argp->prev_pgno, 0, &pagep)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->prev_pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op)) {
/*
* We are undoing and the page doesn't exist.
@@ -265,9 +257,8 @@ ppage: if (argp->prev_pgno != PGNO_INVALID) {
*/
ret = 0;
goto npage;
- } else if ((ret =
- memp_fget(mpf, &argp->prev_pgno,
- DB_MPOOL_CREATE, &pagep)) != 0)
+ } else if ((ret = mpf->get(mpf,
+ &argp->prev_pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
}
@@ -281,7 +272,8 @@ ppage: if (argp->prev_pgno != PGNO_INVALID) {
/* Redo a create new page or undo a delete new page. */
pagep->next_pgno = argp->new_pgno;
flags = DB_MPOOL_DIRTY;
- } else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) ||
+ } else if ((cmp_p == 0 &&
+ DB_REDO(op) && argp->opcode == DELOVFL) ||
(cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
/* Redo a delete or undo a create new page. */
pagep->next_pgno = argp->next_pgno;
@@ -291,13 +283,14 @@ ppage: if (argp->prev_pgno != PGNO_INVALID) {
if (flags)
LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
- if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+ if ((ret = mpf->put(mpf, pagep, flags)) != 0)
goto out;
+ pagep = NULL;
}
/* Now time to do the next page */
npage: if (argp->next_pgno != PGNO_INVALID) {
- if ((ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->next_pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op)) {
/*
* We are undoing and the page doesn't exist.
@@ -306,9 +299,8 @@ npage: if (argp->next_pgno != PGNO_INVALID) {
* this case, don't bother creating a page.
*/
goto done;
- } else if ((ret =
- memp_fget(mpf, &argp->next_pgno,
- DB_MPOOL_CREATE, &pagep)) != 0)
+ } else if ((ret = mpf->get(mpf,
+ &argp->next_pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
}
@@ -322,7 +314,8 @@ npage: if (argp->next_pgno != PGNO_INVALID) {
/* Redo a create new page or undo a delete new page. */
pagep->prev_pgno = argp->new_pgno;
flags = DB_MPOOL_DIRTY;
- } else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) ||
+ } else if ((cmp_p == 0 &&
+ DB_REDO(op) && argp->opcode == DELOVFL) ||
(cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
/* Redo a delete or undo a create new page. */
pagep->prev_pgno = argp->prev_pgno;
@@ -332,14 +325,15 @@ npage: if (argp->next_pgno != PGNO_INVALID) {
if (flags)
LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
- if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+ if ((ret = mpf->put(mpf, pagep, flags)) != 0)
goto out;
+ pagep = NULL;
}
done: *lsnp = argp->prev_lsn;
ret = 0;
-out: if (getmeta)
- (void)__ham_release_meta(dbc);
+out: if (pagep != NULL)
+ (void)mpf->put(mpf, pagep, 0);
REC_CLOSE;
}
@@ -366,17 +360,18 @@ __ham_replace_recover(dbenv, dbtp, lsnp, op, info)
DB_MPOOLFILE *mpf;
DBT dbt;
PAGE *pagep;
+ u_int32_t flags;
int32_t grow;
- int cmp_n, cmp_p, flags, getmeta, ret;
+ int cmp_n, cmp_p, ret;
u_int8_t *hk;
+ pagep = NULL;
COMPQUIET(info, NULL);
- getmeta = 0;
REC_PRINT(__ham_replace_print);
REC_INTRO(__ham_replace_read, 1);
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op)) {
/*
* We are undoing and the page doesn't exist. That
@@ -385,15 +380,11 @@ __ham_replace_recover(dbenv, dbtp, lsnp, op, info)
* don't bother creating a page.
*/
goto done;
- } else if ((ret = memp_fget(mpf, &argp->pgno,
- DB_MPOOL_CREATE, &pagep)) != 0)
+ } else if ((ret = mpf->get(mpf,
+ &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
}
- if ((ret = __ham_get_meta(dbc)) != 0)
- goto out;
- getmeta = 1;
-
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn);
@@ -419,10 +410,10 @@ __ham_replace_recover(dbenv, dbtp, lsnp, op, info)
}
if (flags) {
- __ham_onpage_replace(pagep,
- file_dbp->pgsize, argp->ndx, argp->off, grow, &dbt);
+ __ham_onpage_replace(file_dbp, pagep,
+ argp->ndx, argp->off, grow, &dbt);
if (argp->makedup) {
- hk = P_ENTRY(pagep, argp->ndx);
+ hk = P_ENTRY(file_dbp, pagep, argp->ndx);
if (DB_REDO(op))
HPAGE_PTYPE(hk) = H_DUPLICATE;
else
@@ -430,14 +421,15 @@ __ham_replace_recover(dbenv, dbtp, lsnp, op, info)
}
}
- if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+ if ((ret = mpf->put(mpf, pagep, flags)) != 0)
goto out;
+ pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
-out: if (getmeta)
- (void)__ham_release_meta(dbc);
+out: if (pagep != NULL)
+ (void)mpf->put(mpf, pagep, 0);
REC_CLOSE;
}
@@ -460,15 +452,16 @@ __ham_splitdata_recover(dbenv, dbtp, lsnp, op, info)
DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
- int cmp_n, cmp_p, flags, getmeta, ret;
+ u_int32_t flags;
+ int cmp_n, cmp_p, ret;
+ pagep = NULL;
COMPQUIET(info, NULL);
- getmeta = 0;
REC_PRINT(__ham_splitdata_print);
REC_INTRO(__ham_splitdata_read, 1);
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op)) {
/*
* We are undoing and the page doesn't exist. That
@@ -477,15 +470,11 @@ __ham_splitdata_recover(dbenv, dbtp, lsnp, op, info)
* don't bother creating a page.
*/
goto done;
- } else if ((ret = memp_fget(mpf, &argp->pgno,
- DB_MPOOL_CREATE, &pagep)) != 0)
+ } else if ((ret = mpf->get(mpf,
+ &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
}
- if ((ret = __ham_get_meta(dbc)) != 0)
- goto out;
- getmeta = 1;
-
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn);
@@ -519,14 +508,15 @@ __ham_splitdata_recover(dbenv, dbtp, lsnp, op, info)
LSN(pagep) = argp->pagelsn;
flags = DB_MPOOL_DIRTY;
}
- if ((ret = memp_fput(file_dbp->mpf, pagep, flags)) != 0)
+ if ((ret = mpf->put(mpf, pagep, flags)) != 0)
goto out;
+ pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
-out: if (getmeta)
- (void)__ham_release_meta(dbc);
+out: if (pagep != NULL)
+ (void)mpf->put(mpf, pagep, 0);
REC_CLOSE;
}
@@ -550,21 +540,19 @@ __ham_copypage_recover(dbenv, dbtp, lsnp, op, info)
DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
- int cmp_n, cmp_p, flags, getmeta, ret;
+ u_int32_t flags;
+ int cmp_n, cmp_p, ret;
+ pagep = NULL;
COMPQUIET(info, NULL);
- getmeta = 0;
REC_PRINT(__ham_copypage_print);
REC_INTRO(__ham_copypage_read, 1);
- if ((ret = __ham_get_meta(dbc)) != 0)
- goto out;
- getmeta = 1;
flags = 0;
/* This is the bucket page. */
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op)) {
/*
* We are undoing and the page doesn't exist. That
@@ -574,8 +562,8 @@ __ham_copypage_recover(dbenv, dbtp, lsnp, op, info)
*/
ret = 0;
goto donext;
- } else if ((ret = memp_fget(mpf, &argp->pgno,
- DB_MPOOL_CREATE, &pagep)) != 0)
+ } else if ((ret = mpf->get(mpf,
+ &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
}
@@ -597,11 +585,12 @@ __ham_copypage_recover(dbenv, dbtp, lsnp, op, info)
LSN(pagep) = argp->pagelsn;
flags = DB_MPOOL_DIRTY;
}
- if ((ret = memp_fput(mpf, pagep, flags)) != 0)
+ if ((ret = mpf->put(mpf, pagep, flags)) != 0)
goto out;
+ pagep = NULL;
donext: /* Now fix up the "next" page. */
- if ((ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->next_pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op)) {
/*
* We are undoing and the page doesn't exist. That
@@ -611,8 +600,8 @@ donext: /* Now fix up the "next" page. */
*/
ret = 0;
goto do_nn;
- } else if ((ret = memp_fget(mpf, &argp->next_pgno,
- DB_MPOOL_CREATE, &pagep)) != 0)
+ } else if ((ret = mpf->get(mpf,
+ &argp->next_pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
}
@@ -629,14 +618,15 @@ donext: /* Now fix up the "next" page. */
memcpy(pagep, argp->page.data, argp->page.size);
flags = DB_MPOOL_DIRTY;
}
- if ((ret = memp_fput(mpf, pagep, flags)) != 0)
+ if ((ret = mpf->put(mpf, pagep, flags)) != 0)
goto out;
+ pagep = NULL;
/* Now fix up the next's next page. */
do_nn: if (argp->nnext_pgno == PGNO_INVALID)
goto done;
- if ((ret = memp_fget(mpf, &argp->nnext_pgno, 0, &pagep)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->nnext_pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op)) {
/*
* We are undoing and the page doesn't exist. That
@@ -645,8 +635,8 @@ do_nn: if (argp->nnext_pgno == PGNO_INVALID)
* don't bother creating a page.
*/
goto done;
- } else if ((ret = memp_fget(mpf, &argp->nnext_pgno,
- DB_MPOOL_CREATE, &pagep)) != 0)
+ } else if ((ret = mpf->get(mpf,
+ &argp->nnext_pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
}
@@ -666,14 +656,15 @@ do_nn: if (argp->nnext_pgno == PGNO_INVALID)
LSN(pagep) = argp->nnextlsn;
flags = DB_MPOOL_DIRTY;
}
- if ((ret = memp_fput(mpf, pagep, flags)) != 0)
+ if ((ret = mpf->put(mpf, pagep, flags)) != 0)
goto out;
+ pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
-out: if (getmeta)
- (void)__ham_release_meta(dbc);
+out: if (pagep != NULL)
+ (void)mpf->put(mpf, pagep, 0);
REC_CLOSE;
}
@@ -695,13 +686,17 @@ __ham_metagroup_recover(dbenv, dbtp, lsnp, op, info)
__ham_metagroup_args *argp;
HASH_CURSOR *hcp;
DB *file_dbp;
+ DBMETA *mmeta;
DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
- db_pgno_t last_pgno;
- int cmp_n, cmp_p, flags, groupgrow, ret;
+ db_pgno_t pgno;
+ u_int32_t flags, mmeta_flags;
+ int cmp_n, cmp_p, did_recover, groupgrow, ret;
COMPQUIET(info, NULL);
+ mmeta_flags = 0;
+ mmeta = NULL;
REC_PRINT(__ham_metagroup_print);
REC_INTRO(__ham_metagroup_read, 1);
@@ -709,22 +704,24 @@ __ham_metagroup_recover(dbenv, dbtp, lsnp, op, info)
* This logs the virtual create of pages pgno to pgno + bucket
* Since the mpool page-allocation is not really able to be
* transaction protected, we can never undo it. Even in an abort,
- * we have to allocate these pages to the hash table.
+ * we have to allocate these pages to the hash table if they
+ * were actually created. In particular, during disaster
+ * recovery the metapage may be before this point if we
+ * are rolling backward. If the file has not been extended
+ * then the metapage could not have been updated.
* The log record contains:
* bucket: new bucket being allocated.
* pgno: page number of the new bucket.
* if bucket is a power of 2, then we allocated a whole batch of
* pages; if it's not, then we simply allocated one new page.
*/
- groupgrow =
- (u_int32_t)(1 << __db_log2(argp->bucket + 1)) == argp->bucket + 1;
+ groupgrow = (u_int32_t)(1 << __db_log2(argp->bucket + 1)) ==
+ argp->bucket + 1;
+ pgno = argp->pgno;
+ if (argp->newalloc)
+ pgno += argp->bucket;
- last_pgno = argp->pgno;
- if (groupgrow)
- /* Read the last page. */
- last_pgno += argp->bucket;
-
- if ((ret = memp_fget(mpf, &last_pgno, DB_MPOOL_CREATE, &pagep)) != 0)
+ if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
cmp_n = log_compare(lsnp, &LSN(pagep));
@@ -743,7 +740,7 @@ __ham_metagroup_recover(dbenv, dbtp, lsnp, op, info)
pagep->lsn = argp->pagelsn;
flags = DB_MPOOL_DIRTY;
}
- if ((ret = memp_fput(mpf, pagep, flags)) != 0)
+ if ((ret = mpf->put(mpf, pagep, flags)) != 0)
goto out;
/* Now we have to update the meta-data page. */
@@ -753,39 +750,90 @@ __ham_metagroup_recover(dbenv, dbtp, lsnp, op, info)
cmp_n = log_compare(lsnp, &hcp->hdr->dbmeta.lsn);
cmp_p = log_compare(&hcp->hdr->dbmeta.lsn, &argp->metalsn);
CHECK_LSN(op, cmp_p, &hcp->hdr->dbmeta.lsn, &argp->metalsn);
- if ((cmp_p == 0 && DB_REDO(op)) || (cmp_n == 0 && DB_UNDO(op))) {
- if (DB_REDO(op)) {
- /* Redo the actual updating of bucket counts. */
- ++hcp->hdr->max_bucket;
- if (groupgrow) {
- hcp->hdr->low_mask = hcp->hdr->high_mask;
- hcp->hdr->high_mask =
- (argp->bucket + 1) | hcp->hdr->low_mask;
- }
- hcp->hdr->dbmeta.lsn = *lsnp;
- } else {
- /* Undo the actual updating of bucket counts. */
- --hcp->hdr->max_bucket;
- if (groupgrow) {
- hcp->hdr->high_mask = hcp->hdr->low_mask;
- hcp->hdr->low_mask = hcp->hdr->high_mask >> 1;
- }
- hcp->hdr->dbmeta.lsn = argp->metalsn;
+ did_recover = 0;
+ if (cmp_p == 0 && DB_REDO(op)) {
+ /* Redo the actual updating of bucket counts. */
+ ++hcp->hdr->max_bucket;
+ if (groupgrow) {
+ hcp->hdr->low_mask = hcp->hdr->high_mask;
+ hcp->hdr->high_mask =
+ (argp->bucket + 1) | hcp->hdr->low_mask;
}
- if (groupgrow &&
- hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] ==
- PGNO_INVALID)
- hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] =
- argp->pgno - argp->bucket - 1;
- F_SET(hcp, H_DIRTY);
+ hcp->hdr->dbmeta.lsn = *lsnp;
+ did_recover = 1;
+ } else if (cmp_n == 0 && DB_UNDO(op)) {
+ /* Undo the actual updating of bucket counts. */
+ --hcp->hdr->max_bucket;
+ if (groupgrow) {
+ hcp->hdr->high_mask = hcp->hdr->low_mask;
+ hcp->hdr->low_mask = hcp->hdr->high_mask >> 1;
+ }
+ hcp->hdr->dbmeta.lsn = argp->metalsn;
+ did_recover = 1;
+ }
+
+ /*
+ * Now we need to fix up the spares array. Each entry in the
+ * spares array indicates the beginning page number for the
+ * indicated doubling. We need to fill this in whenever the
+ * spares array is invalid, since we never reclaim pages from
+ * the spares array and we have to allocate the pages to the
+ * spares array in both the redo and undo cases.
+ */
+ if (argp->newalloc &&
+ hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] == PGNO_INVALID) {
+ hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] =
+ argp->pgno - argp->bucket - 1;
+ did_recover = 1;
+ }
+
+ /*
+ * Finally, we need to potentially fix up the last_pgno field
+ * in the master meta-data page (which may or may not be the
+ * same as the hash header page).
+ */
+ if (argp->mmpgno != argp->mpgno) {
+ if ((ret =
+ mpf->get(mpf, &argp->mmpgno, 0, (PAGE **)&mmeta)) != 0)
+ goto out;
+ mmeta_flags = 0;
+ cmp_n = log_compare(lsnp, &mmeta->lsn);
+ cmp_p = log_compare(&mmeta->lsn, &argp->mmetalsn);
+ if (cmp_p == 0 && DB_REDO(op)) {
+ mmeta->lsn = *lsnp;
+ mmeta_flags = DB_MPOOL_DIRTY;
+ } else if (cmp_n == 0 && DB_UNDO(op)) {
+ mmeta->lsn = argp->mmetalsn;
+ mmeta_flags = DB_MPOOL_DIRTY;
+ }
+ } else
+ mmeta = (DBMETA *)hcp->hdr;
+
+ if (argp->newalloc) {
+ if (mmeta->last_pgno < pgno)
+ mmeta->last_pgno = pgno;
+ mmeta_flags = DB_MPOOL_DIRTY;
}
- if ((ret = __ham_release_meta(dbc)) != 0)
+
+ if (argp->mmpgno != argp->mpgno &&
+ (ret = mpf->put(mpf, mmeta, mmeta_flags)) != 0)
goto out;
+ mmeta = NULL;
+
+ if (did_recover)
+ F_SET(hcp, H_DIRTY);
done: *lsnp = argp->prev_lsn;
ret = 0;
-out: REC_CLOSE;
+out: if (mmeta != NULL)
+ (void)mpf->put(mpf, mmeta, 0);
+ if (dbc != NULL)
+ (void)__ham_release_meta(dbc);
+ if (ret == ENOENT && op == DB_TXN_BACKWARD_ALLOC)
+ ret = 0;
+
+ REC_CLOSE;
}
/*
@@ -808,17 +856,20 @@ __ham_groupalloc_recover(dbenv, dbtp, lsnp, op, info)
DB_MPOOLFILE *mpf;
DB *file_dbp;
DBC *dbc;
+ PAGE *pagep;
db_pgno_t pgno;
- int cmp_n, cmp_p, flags, ret;
+ int cmp_n, cmp_p, modified, ret;
+ mmeta = NULL;
+ modified = 0;
REC_PRINT(__ham_groupalloc_print);
REC_INTRO(__ham_groupalloc_read, 0);
pgno = PGNO_BASE_MD;
- if ((ret = memp_fget(mpf, &pgno, 0, &mmeta)) != 0) {
+ if ((ret = mpf->get(mpf, &pgno, 0, &mmeta)) != 0) {
if (DB_REDO(op)) {
/* Page should have existed. */
- (void)__db_pgerr(file_dbp, pgno);
+ __db_pgerr(file_dbp, pgno, ret);
goto out;
} else {
ret = 0;
@@ -839,37 +890,48 @@ __ham_groupalloc_recover(dbenv, dbtp, lsnp, op, info)
* that the pages were never allocated, so we'd better check for
* that and handle it here.
*/
-
- flags = 0;
if (DB_REDO(op)) {
- if ((ret = __ham_alloc_pages(file_dbp, argp)) != 0)
- goto out1;
+ if ((ret = __ham_alloc_pages(file_dbp, argp, lsnp)) != 0)
+ goto out;
if (cmp_p == 0) {
LSN(mmeta) = *lsnp;
- flags = DB_MPOOL_DIRTY;
+ modified = 1;
}
- }
+ } else if (DB_UNDO(op)) {
+ /*
+ * Reset the last page back to its preallocation state.
+ */
+ pgno = argp->start_pgno + argp->num - 1;
+ if ((ret = mpf->get(mpf, &pgno, 0, &pagep)) == 0) {
- /*
- * Always put the pages into the limbo list and free them later.
- */
- else if (DB_UNDO(op)) {
+ if (log_compare(&pagep->lsn, lsnp) == 0)
+ ZERO_LSN(pagep->lsn);
+
+ if ((ret = mpf->put(mpf, pagep, DB_MPOOL_DIRTY)) != 0)
+ goto out;
+ } else if (ret != DB_PAGE_NOTFOUND)
+ goto out;
+ /*
+ * Always put the pages into the limbo list and free them later.
+ */
if ((ret = __db_add_limbo(dbenv,
info, argp->fileid, argp->start_pgno, argp->num)) != 0)
goto out;
if (cmp_n == 0) {
LSN(mmeta) = argp->meta_lsn;
- flags = DB_MPOOL_DIRTY;
+ modified = 1;
}
}
-out1: if ((ret = memp_fput(mpf, mmeta, flags)) != 0)
- goto out;
-
done: if (ret == 0)
*lsnp = argp->prev_lsn;
-out: REC_CLOSE;
+out: if (mmeta != NULL)
+ (void)mpf->put(mpf, mmeta, modified ? DB_MPOOL_DIRTY : 0);
+
+ if (ret == ENOENT && op == DB_TXN_BACKWARD_ALLOC)
+ ret = 0;
+ REC_CLOSE;
}
/*
@@ -883,9 +945,10 @@ out: REC_CLOSE;
* Hash normally has holes in its files and handles them appropriately.
*/
static int
-__ham_alloc_pages(dbp, argp)
+__ham_alloc_pages(dbp, argp, lsnp)
DB *dbp;
__ham_groupalloc_args *argp;
+ DB_LSN *lsnp;
{
DB_MPOOLFILE *mpf;
PAGE *pagep;
@@ -898,38 +961,26 @@ __ham_alloc_pages(dbp, argp)
pgno = argp->start_pgno + argp->num - 1;
/* If the page exists, and it has been initialized, then we're done. */
- if ((ret = memp_fget(mpf, &pgno, 0, &pagep)) == 0) {
- if ((pagep->type == P_INVALID) && IS_ZERO_LSN(pagep->lsn))
+ if ((ret = mpf->get(mpf, &pgno, 0, &pagep)) == 0) {
+ if (NUM_ENT(pagep) == 0 && IS_ZERO_LSN(pagep->lsn))
goto reinit_page;
- if ((ret = memp_fput(mpf, pagep, 0)) != 0)
+ if ((ret = mpf->put(mpf, pagep, 0)) != 0)
return (ret);
return (0);
}
- /*
- * Had to create the page. On some systems (read "Windows"),
- * you can find random garbage on pages to which you haven't
- * yet written. So, we have an os layer that will do the
- * right thing for group allocations. We call that directly
- * to make sure all the pages are allocated and then continue
- * merrily on our way with normal recovery.
- */
- if ((ret = __os_fpinit(dbp->dbenv, &mpf->fh,
- argp->start_pgno, argp->num, dbp->pgsize)) != 0)
- return (ret);
-
- if ((ret = memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
- (void)__db_pgerr(dbp, pgno);
+ /* Had to create the page. */
+ if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
+ __db_pgerr(dbp, pgno, ret);
return (ret);
}
reinit_page:
/* Initialize the newly allocated page. */
- P_INIT(pagep,
- dbp->pgsize, pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
- ZERO_LSN(pagep->lsn);
+ P_INIT(pagep, dbp->pgsize, pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+ pagep->lsn = *lsnp;
- if ((ret = memp_fput(mpf, pagep, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->put(mpf, pagep, DB_MPOOL_DIRTY)) != 0)
return (ret);
return (0);
@@ -942,7 +993,6 @@ reinit_page:
* PUBLIC: int __ham_curadj_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
-
int
__ham_curadj_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
@@ -958,14 +1008,13 @@ __ham_curadj_recover(dbenv, dbtp, lsnp, op, info)
int ret;
HASH_CURSOR *hcp;
- REC_PRINT(__ham_groupalloc_print);
+ COMPQUIET(info, NULL);
+ REC_PRINT(__ham_curadj_print);
+ REC_INTRO(__ham_curadj_read, 0);
- ret = 0;
if (op != DB_TXN_ABORT)
goto done;
- REC_INTRO(__ham_curadj_read, 0);
- COMPQUIET(info, NULL);
/*
* Undo the adjustment by reinitializing the the cursor
* to look like the one that was used to do the adustment,
@@ -991,7 +1040,6 @@ out: REC_CLOSE;
* PUBLIC: int __ham_chgpg_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
-
int
__ham_chgpg_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
@@ -1008,15 +1056,18 @@ __ham_chgpg_recover(dbenv, dbtp, lsnp, op, info)
int ret;
DBC *cp;
HASH_CURSOR *lcp;
+ u_int32_t order, indx;
+ COMPQUIET(info, NULL);
REC_PRINT(__ham_chgpg_print);
+ REC_INTRO(__ham_chgpg_read, 0);
- ret = 0;
if (op != DB_TXN_ABORT)
- goto out;
- REC_INTRO(__ham_chgpg_read, 0);
+ goto done;
- COMPQUIET(info, NULL);
+ /* Overloaded fields for DB_HAM_DEL*PG */
+ indx = argp->old_indx;
+ order = argp->new_indx;
MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
for (ldbp = __dblist_get(dbenv, file_dbp->adj_fileid);
@@ -1029,50 +1080,77 @@ __ham_chgpg_recover(dbenv, dbtp, lsnp, op, info)
lcp = (HASH_CURSOR *)cp->internal;
switch (argp->mode) {
- case DB_HAM_CHGPG:
+ case DB_HAM_DELFIRSTPG:
if (lcp->pgno != argp->new_pgno)
break;
-
- if (argp->old_indx == NDX_INVALID)
+ if (lcp->indx != indx ||
+ !F_ISSET(lcp, H_DELETED) ||
+ lcp->order >= order) {
lcp->pgno = argp->old_pgno;
- else if (lcp->indx == argp->new_indx) {
- lcp->indx = argp->old_indx;
+ if (lcp->indx == indx)
+ lcp->order -= order;
+ }
+ break;
+ case DB_HAM_DELMIDPG:
+ case DB_HAM_DELLASTPG:
+ if (lcp->pgno == argp->new_pgno &&
+ lcp->indx == indx &&
+ F_ISSET(lcp, H_DELETED) &&
+ lcp->order >= order) {
lcp->pgno = argp->old_pgno;
+ lcp->order -= order;
+ lcp->indx = 0;
}
break;
-
+ case DB_HAM_CHGPG:
+ /*
+ * If we're doing a CHGPG, we're undoing
+ * the move of a non-deleted item to a
+ * new page. Any cursors with the deleted
+ * flag set do not belong to this item;
+ * don't touch them.
+ */
+ if (F_ISSET(lcp, H_DELETED))
+ break;
+ /* FALLTHROUGH */
case DB_HAM_SPLIT:
- if (lcp->pgno == argp->new_pgno
- && lcp->indx == argp->new_indx) {
+ if (lcp->pgno == argp->new_pgno &&
+ lcp->indx == argp->new_indx) {
lcp->indx = argp->old_indx;
lcp->pgno = argp->old_pgno;
}
break;
-
case DB_HAM_DUP:
- if (lcp->opd != NULL) {
- opdcp =
- (BTREE_CURSOR *)lcp->opd->internal;
- if (opdcp->pgno == argp->new_pgno &&
- opdcp->indx == argp->new_indx) {
- if (F_ISSET(opdcp, C_DELETED))
- F_SET(lcp, H_DELETED);
- if ((ret =
- lcp->opd->c_close(
- lcp->opd)) != 0)
- goto out;
- lcp->opd = NULL;
- }
- }
+ if (lcp->opd == NULL)
+ break;
+ opdcp = (BTREE_CURSOR *)lcp->opd->internal;
+ if (opdcp->pgno != argp->new_pgno ||
+ opdcp->indx != argp->new_indx)
+ break;
+
+ if (F_ISSET(opdcp, C_DELETED))
+ F_SET(lcp, H_DELETED);
+ /*
+ * We can't close a cursor while we have the
+ * dbp mutex locked, since c_close reacquires
+ * it. It should be safe to drop the mutex
+ * here, though, since newly opened cursors
+ * are put only at the end of the tailq and
+ * the cursor we're adjusting can't be closed
+ * under us.
+ */
+ MUTEX_THREAD_UNLOCK(dbenv, file_dbp->mutexp);
+ if ((ret = lcp->opd->c_close(lcp->opd)) != 0)
+ goto out;
+ MUTEX_THREAD_LOCK(dbenv, file_dbp->mutexp);
+ lcp->opd = NULL;
break;
}
}
-
MUTEX_THREAD_UNLOCK(dbenv, file_dbp->mutexp);
}
MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
done: *lsnp = argp->prev_lsn;
- ret = 0;
out: REC_CLOSE;
}
diff --git a/bdb/hash/hash_reclaim.c b/bdb/hash/hash_reclaim.c
index 8857c5406a4..ac90ffff08a 100644
--- a/bdb/hash/hash_reclaim.c
+++ b/bdb/hash/hash_reclaim.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: hash_reclaim.c,v 11.4 2000/11/30 00:58:37 ubell Exp $";
+static const char revid[] = "$Id: hash_reclaim.c,v 11.12 2002/03/28 19:49:43 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -18,10 +18,8 @@ static const char revid[] = "$Id: hash_reclaim.c,v 11.4 2000/11/30 00:58:37 ubel
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "hash.h"
-#include "lock.h"
+#include "dbinc/db_page.h"
+#include "dbinc/hash.h"
/*
* __ham_reclaim --
@@ -52,8 +50,8 @@ __ham_reclaim(dbp, txn)
if ((ret = __ham_get_meta(dbc)) != 0)
goto err;
- if ((ret = __ham_traverse(dbp,
- dbc, DB_LOCK_WRITE, __db_reclaim_callback, dbc)) != 0)
+ if ((ret = __ham_traverse(dbc,
+ DB_LOCK_WRITE, __db_reclaim_callback, dbc, 1)) != 0)
goto err;
if ((ret = dbc->c_close(dbc)) != 0)
goto err;
@@ -66,3 +64,48 @@ err: if (hcp->hdr != NULL)
(void)dbc->c_close(dbc);
return (ret);
}
+
+/*
+ * __ham_truncate --
+ * Reclaim the pages from a subdatabase and return them to the
+ * parent free list.
+ *
+ * PUBLIC: int __ham_truncate __P((DB *, DB_TXN *txn, u_int32_t *));
+ */
+int
+__ham_truncate(dbp, txn, countp)
+ DB *dbp;
+ DB_TXN *txn;
+ u_int32_t *countp;
+{
+ DBC *dbc;
+ HASH_CURSOR *hcp;
+ db_trunc_param trunc;
+ int ret;
+
+ /* Open up a cursor that we'll use for traversing. */
+ if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0)
+ return (ret);
+ hcp = (HASH_CURSOR *)dbc->internal;
+
+ if ((ret = __ham_get_meta(dbc)) != 0)
+ goto err;
+
+ trunc.count = 0;
+ trunc.dbc = dbc;
+
+ if ((ret = __ham_traverse(dbc,
+ DB_LOCK_WRITE, __db_truncate_callback, &trunc, 1)) != 0)
+ goto err;
+ if ((ret = __ham_release_meta(dbc)) != 0)
+ goto err;
+ if ((ret = dbc->c_close(dbc)) != 0)
+ goto err;
+ *countp = trunc.count;
+ return (0);
+
+err: if (hcp->hdr != NULL)
+ (void)__ham_release_meta(dbc);
+ (void)dbc->c_close(dbc);
+ return (ret);
+}
diff --git a/bdb/hash/hash_stat.c b/bdb/hash/hash_stat.c
index ed64bbc68bd..f9ee1d099cb 100644
--- a/bdb/hash/hash_stat.c
+++ b/bdb/hash/hash_stat.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: hash_stat.c,v 11.24 2000/12/21 21:54:35 margo Exp $";
+static const char revid[] = "$Id: hash_stat.c,v 11.48 2002/08/06 06:11:28 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -18,11 +18,9 @@ static const char revid[] = "$Id: hash_stat.c,v 11.24 2000/12/21 21:54:35 margo
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "btree.h"
-#include "hash.h"
-#include "lock.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+#include "dbinc/hash.h"
static int __ham_stat_callback __P((DB *, PAGE *, void *, int *));
@@ -30,24 +28,29 @@ static int __ham_stat_callback __P((DB *, PAGE *, void *, int *));
* __ham_stat --
* Gather/print the hash statistics
*
- * PUBLIC: int __ham_stat __P((DB *, void *, void *(*)(size_t), u_int32_t));
+ * PUBLIC: int __ham_stat __P((DB *, void *, u_int32_t));
*/
int
-__ham_stat(dbp, spp, db_malloc, flags)
+__ham_stat(dbp, spp, flags)
DB *dbp;
- void *spp, *(*db_malloc) __P((size_t));
+ void *spp;
u_int32_t flags;
{
+ DBC *dbc;
+ DB_ENV *dbenv;
DB_HASH_STAT *sp;
+ DB_MPOOLFILE *mpf;
HASH_CURSOR *hcp;
- DBC *dbc;
PAGE *h;
db_pgno_t pgno;
int ret;
- PANIC_CHECK(dbp->dbenv);
+ dbenv = dbp->dbenv;
+
+ PANIC_CHECK(dbenv);
DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->stat");
+ mpf = dbp->mpf;
sp = NULL;
/* Check for invalid flags. */
@@ -62,39 +65,39 @@ __ham_stat(dbp, spp, db_malloc, flags)
goto err;
/* Allocate and clear the structure. */
- if ((ret = __os_malloc(dbp->dbenv, sizeof(*sp), db_malloc, &sp)) != 0)
+ if ((ret = __os_umalloc(dbenv, sizeof(*sp), &sp)) != 0)
goto err;
memset(sp, 0, sizeof(*sp));
- if (flags == DB_CACHED_COUNTS) {
- sp->hash_nkeys = hcp->hdr->dbmeta.key_count;
- sp->hash_ndata = hcp->hdr->dbmeta.record_count;
- goto done;
- }
-
/* Copy the fields that we have. */
+ sp->hash_nkeys = hcp->hdr->dbmeta.key_count;
+ sp->hash_ndata = hcp->hdr->dbmeta.record_count;
sp->hash_pagesize = dbp->pgsize;
sp->hash_buckets = hcp->hdr->max_bucket + 1;
sp->hash_magic = hcp->hdr->dbmeta.magic;
sp->hash_version = hcp->hdr->dbmeta.version;
sp->hash_metaflags = hcp->hdr->dbmeta.flags;
- sp->hash_nelem = hcp->hdr->nelem;
sp->hash_ffactor = hcp->hdr->ffactor;
+ if (flags == DB_FAST_STAT || flags == DB_CACHED_COUNTS)
+ goto done;
+
/* Walk the free list, counting pages. */
for (sp->hash_free = 0, pgno = hcp->hdr->dbmeta.free;
pgno != PGNO_INVALID;) {
++sp->hash_free;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
goto err;
pgno = h->next_pgno;
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
}
/* Now traverse the rest of the table. */
- if ((ret = __ham_traverse(dbp,
- dbc, DB_LOCK_READ, __ham_stat_callback, sp)) != 0)
+ sp->hash_nkeys = 0;
+ sp->hash_ndata = 0;
+ if ((ret = __ham_traverse(dbc,
+ DB_LOCK_READ, __ham_stat_callback, sp, 0)) != 0)
goto err;
if (!F_ISSET(dbp, DB_AM_RDONLY)) {
@@ -114,7 +117,7 @@ done:
return (0);
err: if (sp != NULL)
- __os_free(sp, sizeof(*sp));
+ __os_ufree(dbenv, sp);
if (hcp->hdr != NULL)
(void)__ham_release_meta(dbc);
(void)dbc->c_close(dbc);
@@ -127,26 +130,30 @@ err: if (sp != NULL)
* Traverse an entire hash table. We use the callback so that we
* can use this both for stat collection and for deallocation.
*
- * PUBLIC: int __ham_traverse __P((DB *, DBC *, db_lockmode_t,
- * PUBLIC: int (*)(DB *, PAGE *, void *, int *), void *));
+ * PUBLIC: int __ham_traverse __P((DBC *, db_lockmode_t,
+ * PUBLIC: int (*)(DB *, PAGE *, void *, int *), void *, int));
*/
int
-__ham_traverse(dbp, dbc, mode, callback, cookie)
- DB *dbp;
+__ham_traverse(dbc, mode, callback, cookie, look_past_max)
DBC *dbc;
db_lockmode_t mode;
int (*callback) __P((DB *, PAGE *, void *, int *));
void *cookie;
+ int look_past_max;
{
+ DB *dbp;
+ DBC *opd;
+ DB_MPOOLFILE *mpf;
HASH_CURSOR *hcp;
HKEYDATA *hk;
- DBC *opd;
db_pgno_t pgno, opgno;
- u_int32_t bucket;
int did_put, i, ret, t_ret;
+ u_int32_t bucket, spares_entry;
- hcp = (HASH_CURSOR *)dbc->internal;
+ dbp = dbc->dbp;
opd = NULL;
+ mpf = dbp->mpf;
+ hcp = (HASH_CURSOR *)dbc->internal;
ret = 0;
/*
@@ -156,12 +163,47 @@ __ham_traverse(dbp, dbc, mode, callback, cookie)
* locking easy, makes this a pain in the butt. We have to traverse
* duplicate, overflow and big pages from the bucket so that we
* don't access anything that isn't properly locked.
+ *
*/
- for (bucket = 0; bucket <= hcp->hdr->max_bucket; bucket++) {
+ for (bucket = 0;; bucket++) {
+ /*
+ * We put the loop exit condition check here, because
+ * it made for a really vile extended ?: that made SCO's
+ * compiler drop core.
+ *
+ * If look_past_max is not set, we can stop at max_bucket;
+ * if it is set, we need to include pages that are part of
+ * the current doubling but beyond the highest bucket we've
+ * split into, as well as pages from a "future" doubling
+ * that may have been created within an aborted
+ * transaction. To do this, keep looping (and incrementing
+ * bucket) until the corresponding spares array entries
+ * cease to be defined.
+ */
+ if (look_past_max) {
+ spares_entry = __db_log2(bucket + 1);
+ if (spares_entry >= NCACHED ||
+ hcp->hdr->spares[spares_entry] == 0)
+ break;
+ } else {
+ if (bucket > hcp->hdr->max_bucket)
+ break;
+ }
+
hcp->bucket = bucket;
hcp->pgno = pgno = BUCKET_TO_PAGE(hcp, bucket);
for (ret = __ham_get_cpage(dbc, mode); ret == 0;
ret = __ham_next_cpage(dbc, pgno, 0)) {
+
+ /*
+ * If we are cleaning up pages past the max_bucket,
+ * then they may be on the free list and have their
+ * next pointers set, but the should be ignored. In
+ * fact, we really ought to just skip anybody who is
+ * not a valid page.
+ */
+ if (TYPE(hcp->page) == P_INVALID)
+ break;
pgno = NEXT_PGNO(hcp->page);
/*
@@ -171,17 +213,17 @@ __ham_traverse(dbp, dbc, mode, callback, cookie)
* case we have to count those pages).
*/
for (i = 0; i < NUM_ENT(hcp->page); i++) {
- hk = (HKEYDATA *)P_ENTRY(hcp->page, i);
+ hk = (HKEYDATA *)P_ENTRY(dbp, hcp->page, i);
switch (HPAGE_PTYPE(hk)) {
case H_OFFDUP:
memcpy(&opgno, HOFFDUP_PGNO(hk),
sizeof(db_pgno_t));
if ((ret = __db_c_newopd(dbc,
- opgno, &opd)) != 0)
+ opgno, NULL, &opd)) != 0)
return (ret);
if ((ret = __bam_traverse(opd,
DB_LOCK_READ, opgno,
- __ham_stat_callback, cookie))
+ callback, cookie))
!= 0)
goto err;
if ((ret = opd->c_close(opd)) != 0)
@@ -221,10 +263,10 @@ __ham_traverse(dbp, dbc, mode, callback, cookie)
goto err;
if (STD_LOCKING(dbc))
- (void)lock_put(dbp->dbenv, &hcp->lock);
+ (void)dbp->dbenv->lock_put(dbp->dbenv, &hcp->lock);
if (hcp->page != NULL) {
- if ((ret = memp_fput(dbc->dbp->mpf, hcp->page, 0)) != 0)
+ if ((ret = mpf->put(mpf, hcp->page, 0)) != 0)
return (ret);
hcp->page = NULL;
}
@@ -247,6 +289,7 @@ __ham_stat_callback(dbp, pagep, cookie, putp)
DB_BTREE_STAT bstat;
db_indx_t indx, len, off, tlen, top;
u_int8_t *hk;
+ int ret;
*putp = 0;
sp = cookie;
@@ -266,15 +309,15 @@ __ham_stat_callback(dbp, pagep, cookie, putp)
* is a bucket.
*/
if (PREV_PGNO(pagep) == PGNO_INVALID)
- sp->hash_bfree += P_FREESPACE(pagep);
+ sp->hash_bfree += P_FREESPACE(dbp, pagep);
else {
sp->hash_overflows++;
- sp->hash_ovfl_free += P_FREESPACE(pagep);
+ sp->hash_ovfl_free += P_FREESPACE(dbp, pagep);
}
top = NUM_ENT(pagep);
/* Correct for on-page duplicates and deleted items. */
for (indx = 0; indx < top; indx += P_INDX) {
- switch (*H_PAIRDATA(pagep, indx)) {
+ switch (*H_PAIRDATA(dbp, pagep, indx)) {
case H_OFFDUP:
case H_OFFPAGE:
break;
@@ -282,8 +325,8 @@ __ham_stat_callback(dbp, pagep, cookie, putp)
sp->hash_ndata++;
break;
case H_DUPLICATE:
- tlen = LEN_HDATA(pagep, 0, indx);
- hk = H_PAIRDATA(pagep, indx);
+ tlen = LEN_HDATA(dbp, pagep, 0, indx);
+ hk = H_PAIRDATA(dbp, pagep, indx);
for (off = 0; off < tlen;
off += len + 2 * sizeof (db_indx_t)) {
sp->hash_ndata++;
@@ -310,7 +353,8 @@ __ham_stat_callback(dbp, pagep, cookie, putp)
bstat.bt_int_pgfree = 0;
bstat.bt_leaf_pgfree = 0;
bstat.bt_ndata = 0;
- __bam_stat_callback(dbp, pagep, &bstat, putp);
+ if ((ret = __bam_stat_callback(dbp, pagep, &bstat, putp)) != 0)
+ return (ret);
sp->hash_dup++;
sp->hash_dup_free += bstat.bt_leaf_pgfree +
bstat.bt_dup_pgfree + bstat.bt_int_pgfree;
@@ -318,11 +362,10 @@ __ham_stat_callback(dbp, pagep, cookie, putp)
break;
case P_OVERFLOW:
sp->hash_bigpages++;
- sp->hash_big_bfree += P_OVFLSPACE(dbp->pgsize, pagep);
+ sp->hash_big_bfree += P_OVFLSPACE(dbp, dbp->pgsize, pagep);
break;
default:
- return (__db_unknown_type(dbp->dbenv,
- "__ham_stat_callback", pagep->type));
+ return (__db_pgfmt(dbp->dbenv, pagep->pgno));
}
return (0);
diff --git a/bdb/hash/hash_upgrade.c b/bdb/hash/hash_upgrade.c
index c34381276b4..2dd21d7b644 100644
--- a/bdb/hash/hash_upgrade.c
+++ b/bdb/hash/hash_upgrade.c
@@ -1,13 +1,13 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: hash_upgrade.c,v 11.25 2000/12/14 19:18:32 bostic Exp $";
+static const char revid[] = "$Id: hash_upgrade.c,v 11.32 2002/08/06 05:34:58 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -18,14 +18,13 @@ static const char revid[] = "$Id: hash_upgrade.c,v 11.25 2000/12/14 19:18:32 bos
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_swap.h"
-#include "hash.h"
-#include "db_upgrade.h"
+#include "dbinc/db_page.h"
+#include "dbinc/hash.h"
+#include "dbinc/db_upgrade.h"
/*
* __ham_30_hashmeta --
- * Upgrade the database from version 4/5 to version 6.
+ * Upgrade the database from version 4/5 to version 6.
*
* PUBLIC: int __ham_30_hashmeta __P((DB *, char *, u_int8_t *));
*/
@@ -163,10 +162,6 @@ __ham_30_sizefix(dbp, fhp, realname, metabuf)
return (ret);
if ((ret = __os_write(dbenv, fhp, buf, pagesize, &nw)) != 0)
return (ret);
- if (nw != pagesize) {
- __db_err(dbenv, "Short write during upgrade");
- return (EIO);
- }
}
return (0);
@@ -174,7 +169,7 @@ __ham_30_sizefix(dbp, fhp, realname, metabuf)
/*
* __ham_31_hashmeta --
- * Upgrade the database from version 6 to version 7.
+ * Upgrade the database from version 6 to version 7.
*
* PUBLIC: int __ham_31_hashmeta
* PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
@@ -229,7 +224,7 @@ __ham_31_hashmeta(dbp, real_name, flags, fhp, h, dirtyp)
/*
* __ham_31_hash --
- * Upgrade the database hash leaf pages.
+ * Upgrade the database hash leaf pages.
*
* PUBLIC: int __ham_31_hash
* PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
@@ -252,7 +247,7 @@ __ham_31_hash(dbp, real_name, flags, fhp, h, dirtyp)
ret = 0;
for (indx = 0; indx < NUM_ENT(h); indx += 2) {
- hk = (HKEYDATA *)H_PAIRDATA(h, indx);
+ hk = (HKEYDATA *)H_PAIRDATA(dbp, h, indx);
if (HPAGE_PTYPE(hk) == H_OFFDUP) {
memcpy(&pgno, HOFFDUP_PGNO(hk), sizeof(db_pgno_t));
tpgno = pgno;
diff --git a/bdb/hash/hash_verify.c b/bdb/hash/hash_verify.c
index 31dd7cc2299..e6f5a2b0d65 100644
--- a/bdb/hash/hash_verify.c
+++ b/bdb/hash/hash_verify.c
@@ -1,16 +1,16 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1999, 2000
+ * Copyright (c) 1999-2002
* Sleepycat Software. All rights reserved.
*
- * $Id: hash_verify.c,v 1.31 2000/11/30 00:58:37 ubell Exp $
+ * $Id: hash_verify.c,v 1.53 2002/08/06 05:35:02 bostic Exp $
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: hash_verify.c,v 1.31 2000/11/30 00:58:37 ubell Exp $";
+static const char revid[] = "$Id: hash_verify.c,v 1.53 2002/08/06 05:35:02 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -20,10 +20,10 @@ static const char revid[] = "$Id: hash_verify.c,v 1.31 2000/11/30 00:58:37 ubell
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_verify.h"
-#include "btree.h"
-#include "hash.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_verify.h"
+#include "dbinc/btree.h"
+#include "dbinc/hash.h"
static int __ham_dups_unsorted __P((DB *, u_int8_t *, u_int32_t));
static int __ham_vrfy_bucket __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t,
@@ -83,8 +83,8 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags)
if (!LF_ISSET(DB_NOORDERCHK))
if (m->h_charkey != hfunc(dbp, CHARKEY, sizeof(CHARKEY))) {
EPRINT((dbp->dbenv,
-"Database has different custom hash function; reverify with DB_NOORDERCHK set"
- ));
+"Page %lu: database has different custom hash function; reverify with DB_NOORDERCHK set",
+ (u_long)pgno));
/*
* Return immediately; this is probably a sign
* of user error rather than database corruption, so
@@ -97,8 +97,8 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags)
/* max_bucket must be less than the last pgno. */
if (m->max_bucket > vdp->last_pgno) {
EPRINT((dbp->dbenv,
- "Impossible max_bucket %lu on meta page %lu",
- m->max_bucket, pgno));
+ "Page %lu: Impossible max_bucket %lu on meta page",
+ (u_long)pgno, (u_long)m->max_bucket));
/*
* Most other fields depend somehow on max_bucket, so
* we just return--there will be lots of extraneous
@@ -118,15 +118,15 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags)
pwr = (m->max_bucket == 0) ? 1 : 1 << __db_log2(m->max_bucket + 1);
if (m->high_mask != pwr - 1) {
EPRINT((dbp->dbenv,
- "Incorrect high_mask %lu on page %lu, should be %lu",
- m->high_mask, pgno, pwr - 1));
+ "Page %lu: incorrect high_mask %lu, should be %lu",
+ (u_long)pgno, (u_long)m->high_mask, (u_long)pwr - 1));
isbad = 1;
}
pwr >>= 1;
if (m->low_mask != pwr - 1) {
EPRINT((dbp->dbenv,
- "Incorrect low_mask %lu on page %lu, should be %lu",
- m->low_mask, pgno, pwr - 1));
+ "Page %lu: incorrect low_mask %lu, should be %lu",
+ (u_long)pgno, (u_long)m->low_mask, (u_long)pwr - 1));
isbad = 1;
}
@@ -140,8 +140,8 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags)
*/
if (m->nelem > 0x80000000) {
EPRINT((dbp->dbenv,
- "Suspiciously high nelem of %lu on page %lu",
- m->nelem, pgno));
+ "Page %lu: suspiciously high nelem of %lu",
+ (u_long)pgno, (u_long)m->nelem));
isbad = 1;
pip->h_nelem = 0;
} else
@@ -164,13 +164,14 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags)
mbucket = (1 << i) - 1;
if (BS_TO_PAGE(mbucket, m->spares) > vdp->last_pgno) {
EPRINT((dbp->dbenv,
- "Spares array entry %lu, page %lu is invalid",
- i, pgno));
+ "Page %lu: spares array entry %d is invalid",
+ (u_long)pgno, i));
isbad = 1;
}
}
-err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+err: if ((t_ret =
+ __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
}
@@ -192,6 +193,7 @@ __ham_vrfy(dbp, vdp, h, pgno, flags)
{
VRFY_PAGEINFO *pip;
u_int32_t ent, himark, inpend;
+ db_indx_t *inp;
int isbad, ret, t_ret;
isbad = 0;
@@ -226,31 +228,33 @@ __ham_vrfy(dbp, vdp, h, pgno, flags)
* In any case, we return immediately if things are bad, as it would
* be unsafe to proceed.
*/
+ inp = P_INP(dbp, h);
for (ent = 0, himark = dbp->pgsize,
- inpend = (u_int8_t *)h->inp - (u_int8_t *)h;
+ inpend = (u_int32_t)((u_int8_t *)inp - (u_int8_t *)h);
ent < NUM_ENT(h); ent++)
- if (h->inp[ent] >= himark) {
+ if (inp[ent] >= himark) {
EPRINT((dbp->dbenv,
- "Item %lu on page %lu out of order or nonsensical",
- ent, pgno));
+ "Page %lu: item %lu is out of order or nonsensical",
+ (u_long)pgno, (u_long)ent));
isbad = 1;
goto err;
} else if (inpend >= himark) {
EPRINT((dbp->dbenv,
- "inp array collided with data on page %lu",
- pgno));
+ "Page %lu: entries array collided with data",
+ (u_long)pgno));
isbad = 1;
goto err;
} else {
- himark = h->inp[ent];
+ himark = inp[ent];
inpend += sizeof(db_indx_t);
if ((ret = __ham_vrfy_item(
dbp, vdp, pgno, h, ent, flags)) != 0)
goto err;
}
-err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+err: if ((t_ret =
+ __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
return (ret == 0 && isbad == 1 ? DB_VERIFY_BAD : ret);
}
@@ -279,7 +283,7 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags)
if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
return (ret);
- switch (HPAGE_TYPE(h, i)) {
+ switch (HPAGE_TYPE(dbp, h, i)) {
case H_KEYDATA:
/* Nothing to do here--everything but the type field is data */
break;
@@ -287,8 +291,8 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags)
/* Are we a datum or a key? Better be the former. */
if (i % 2 == 0) {
EPRINT((dbp->dbenv,
- "Hash key stored as duplicate at page %lu item %lu",
- pip->pgno, i));
+ "Page %lu: hash key stored as duplicate item %lu",
+ (u_long)pip->pgno, (u_long)i));
}
/*
* Dups are encoded as a series within a single HKEYDATA,
@@ -300,16 +304,16 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags)
* Note that at this point, we've verified item i-1, so
* it's safe to use LEN_HKEYDATA (which looks at inp[i-1]).
*/
- len = LEN_HKEYDATA(h, dbp->pgsize, i);
- databuf = HKEYDATA_DATA(P_ENTRY(h, i));
+ len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i);
+ databuf = HKEYDATA_DATA(P_ENTRY(dbp, h, i));
for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) {
memcpy(&dlen, databuf + offset, sizeof(db_indx_t));
/* Make sure the length is plausible. */
if (offset + DUP_SIZE(dlen) > len) {
EPRINT((dbp->dbenv,
- "Duplicate item %lu, page %lu has bad length",
- i, pip->pgno));
+ "Page %lu: duplicate item %lu has bad length",
+ (u_long)pip->pgno, (u_long)i));
ret = DB_VERIFY_BAD;
goto err;
}
@@ -323,8 +327,8 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags)
sizeof(db_indx_t));
if (elen != dlen) {
EPRINT((dbp->dbenv,
- "Duplicate item %lu, page %lu has two different lengths",
- i, pip->pgno));
+ "Page %lu: duplicate item %lu has two different lengths",
+ (u_long)pip->pgno, (u_long)i));
ret = DB_VERIFY_BAD;
goto err;
}
@@ -336,12 +340,12 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags)
break;
case H_OFFPAGE:
/* Offpage item. Make sure pgno is sane, save off. */
- memcpy(&hop, P_ENTRY(h, i), HOFFPAGE_SIZE);
+ memcpy(&hop, P_ENTRY(dbp, h, i), HOFFPAGE_SIZE);
if (!IS_VALID_PGNO(hop.pgno) || hop.pgno == pip->pgno ||
hop.pgno == PGNO_INVALID) {
EPRINT((dbp->dbenv,
- "Offpage item %lu, page %lu has bad page number",
- i, pip->pgno));
+ "Page %lu: offpage item %lu has bad pgno %lu",
+ (u_long)pip->pgno, (u_long)i, (u_long)hop.pgno));
ret = DB_VERIFY_BAD;
goto err;
}
@@ -354,12 +358,12 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags)
break;
case H_OFFDUP:
/* Offpage duplicate item. Same drill. */
- memcpy(&hod, P_ENTRY(h, i), HOFFDUP_SIZE);
+ memcpy(&hod, P_ENTRY(dbp, h, i), HOFFDUP_SIZE);
if (!IS_VALID_PGNO(hod.pgno) || hod.pgno == pip->pgno ||
hod.pgno == PGNO_INVALID) {
EPRINT((dbp->dbenv,
- "Offpage item %lu, page %lu has bad page number",
- i, pip->pgno));
+ "Page %lu: offpage item %lu has bad page number",
+ (u_long)pip->pgno, (u_long)i));
ret = DB_VERIFY_BAD;
goto err;
}
@@ -372,12 +376,14 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags)
break;
default:
EPRINT((dbp->dbenv,
- "Item %i, page %lu has bad type", i, pip->pgno));
+ "Page %lu: item %i has bad type",
+ (u_long)pip->pgno, (u_long)i));
ret = DB_VERIFY_BAD;
break;
}
-err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+err: if ((t_ret =
+ __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
return (ret);
}
@@ -397,29 +403,32 @@ __ham_vrfy_structure(dbp, vdp, meta_pgno, flags)
u_int32_t flags;
{
DB *pgset;
+ DB_MPOOLFILE *mpf;
HMETA *m;
PAGE *h;
VRFY_PAGEINFO *pip;
int isbad, p, ret, t_ret;
db_pgno_t pgno;
- u_int32_t bucket;
+ u_int32_t bucket, spares_entry;
- ret = isbad = 0;
- h = NULL;
+ mpf = dbp->mpf;
pgset = vdp->pgset;
+ h = NULL;
+ ret = isbad = 0;
if ((ret = __db_vrfy_pgset_get(pgset, meta_pgno, &p)) != 0)
return (ret);
if (p != 0) {
EPRINT((dbp->dbenv,
- "Hash meta page %lu referenced twice", meta_pgno));
+ "Page %lu: Hash meta page referenced twice",
+ (u_long)meta_pgno));
return (DB_VERIFY_BAD);
}
if ((ret = __db_vrfy_pgset_inc(pgset, meta_pgno)) != 0)
return (ret);
/* Get the meta page; we'll need it frequently. */
- if ((ret = memp_fget(dbp->mpf, &meta_pgno, 0, &m)) != 0)
+ if ((ret = mpf->get(mpf, &meta_pgno, 0, &m)) != 0)
return (ret);
/* Loop through bucket by bucket. */
@@ -445,8 +454,8 @@ __ham_vrfy_structure(dbp, vdp, meta_pgno, flags)
* Note that this should be safe, since we've already verified
* that the spares array is sane.
*/
- for (bucket = m->max_bucket + 1;
- m->spares[__db_log2(bucket + 1)] != 0; bucket++) {
+ for (bucket = m->max_bucket + 1; spares_entry = __db_log2(bucket + 1),
+ spares_entry < NCACHED && m->spares[spares_entry] != 0; bucket++) {
pgno = BS_TO_PAGE(bucket, m->spares);
if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
goto err;
@@ -454,43 +463,51 @@ __ham_vrfy_structure(dbp, vdp, meta_pgno, flags)
/* It's okay if these pages are totally zeroed; unmark it. */
F_CLR(pip, VRFY_IS_ALLZEROES);
+ /* It's also OK if this page is simply invalid. */
+ if (pip->type == P_INVALID) {
+ if ((ret = __db_vrfy_putpageinfo(dbp->dbenv,
+ vdp, pip)) != 0)
+ goto err;
+ continue;
+ }
+
if (pip->type != P_HASH) {
EPRINT((dbp->dbenv,
- "Hash bucket %lu maps to non-hash page %lu",
- bucket, pgno));
+ "Page %lu: hash bucket %lu maps to non-hash page",
+ (u_long)pgno, (u_long)bucket));
isbad = 1;
} else if (pip->entries != 0) {
EPRINT((dbp->dbenv,
- "Non-empty page %lu in unused hash bucket %lu",
- pgno, bucket));
+ "Page %lu: non-empty page in unused hash bucket %lu",
+ (u_long)pgno, (u_long)bucket));
isbad = 1;
} else {
if ((ret = __db_vrfy_pgset_get(pgset, pgno, &p)) != 0)
goto err;
if (p != 0) {
EPRINT((dbp->dbenv,
- "Hash page %lu above max_bucket referenced",
- pgno));
+ "Page %lu: above max_bucket referenced",
+ (u_long)pgno));
isbad = 1;
} else {
if ((ret =
__db_vrfy_pgset_inc(pgset, pgno)) != 0)
goto err;
- if ((ret =
- __db_vrfy_putpageinfo(vdp, pip)) != 0)
+ if ((ret = __db_vrfy_putpageinfo(dbp->dbenv,
+ vdp, pip)) != 0)
goto err;
continue;
}
}
/* If we got here, it's an error. */
- (void)__db_vrfy_putpageinfo(vdp, pip);
+ (void)__db_vrfy_putpageinfo(dbp->dbenv, vdp, pip);
goto err;
}
-err: if ((t_ret = memp_fput(dbp->mpf, m, 0)) != 0)
+err: if ((t_ret = mpf->put(mpf, m, 0)) != 0)
return (t_ret);
- if (h != NULL && (t_ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ if (h != NULL && (t_ret = mpf->put(mpf, h, 0)) != 0)
return (t_ret);
return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD: ret);
}
@@ -535,8 +552,9 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)
/* Make sure we got a plausible page number. */
if (pgno > vdp->last_pgno || pip->type != P_HASH) {
- EPRINT((dbp->dbenv, "Bucket %lu has impossible first page %lu",
- bucket, pgno));
+ EPRINT((dbp->dbenv,
+ "Page %lu: impossible first page in bucket %lu",
+ (u_long)pgno, (u_long)bucket));
/* Unsafe to continue. */
isbad = 1;
goto err;
@@ -544,7 +562,8 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)
if (pip->prev_pgno != PGNO_INVALID) {
EPRINT((dbp->dbenv,
- "First hash page %lu in bucket %lu has a prev_pgno", pgno));
+ "Page %lu: first page in hash bucket %lu has a prev_pgno",
+ (u_long)pgno, (u_long)bucket));
isbad = 1;
}
@@ -564,7 +583,8 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)
goto err;
if (p != 0) {
EPRINT((dbp->dbenv,
- "Hash page %lu referenced twice", pgno));
+ "Page %lu: hash page referenced twice",
+ (u_long)pgno));
isbad = 1;
/* Unsafe to continue. */
goto err;
@@ -584,11 +604,11 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)
F_CLR(pip, VRFY_IS_ALLZEROES);
/* If we have dups, our meta page had better know about it. */
- if (F_ISSET(pip, VRFY_HAS_DUPS)
- && !F_ISSET(mip, VRFY_HAS_DUPS)) {
+ if (F_ISSET(pip, VRFY_HAS_DUPS) &&
+ !F_ISSET(mip, VRFY_HAS_DUPS)) {
EPRINT((dbp->dbenv,
- "Duplicates present in non-duplicate database, page %lu",
- pgno));
+ "Page %lu: duplicates present in non-duplicate database",
+ (u_long)pgno));
isbad = 1;
}
@@ -599,8 +619,8 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)
if (F_ISSET(mip, VRFY_HAS_DUPSORT) &&
F_ISSET(pip, VRFY_DUPS_UNSORTED)) {
EPRINT((dbp->dbenv,
- "Unsorted dups in sorted-dup database, page %lu",
- pgno));
+ "Page %lu: unsorted dups in sorted-dup database",
+ (u_long)pgno));
isbad = 1;
}
@@ -625,8 +645,8 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)
}
if ((ret = __bam_vrfy_subtree(dbp, vdp,
child->pgno, NULL, NULL,
- flags | ST_RECNUM | ST_DUPSET, NULL,
- NULL, NULL)) != 0) {
+ flags | ST_RECNUM | ST_DUPSET | ST_TOPLEVEL,
+ NULL, NULL, NULL)) != 0) {
if (ret == DB_VERIFY_BAD)
isbad = 1;
else
@@ -648,7 +668,7 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)
}
next_pgno = pip->next_pgno;
- ret = __db_vrfy_putpageinfo(vdp, pip);
+ ret = __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip);
pip = NULL;
if (ret != 0)
@@ -661,7 +681,8 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)
if (!IS_VALID_PGNO(next_pgno)) {
DB_ASSERT(0);
EPRINT((dbp->dbenv,
- "Hash page %lu has bad next_pgno", pgno));
+ "Page %lu: hash page has bad next_pgno",
+ (u_long)pgno));
isbad = 1;
goto err;
}
@@ -670,8 +691,9 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)
goto err;
if (pip->prev_pgno != pgno) {
- EPRINT((dbp->dbenv, "Hash page %lu has bad prev_pgno",
- next_pgno));
+ EPRINT((dbp->dbenv,
+ "Page %lu: hash page has bad prev_pgno",
+ (u_long)next_pgno));
isbad = 1;
}
pgno = next_pgno;
@@ -679,11 +701,11 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)
err: if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0)
ret = t_ret;
- if (mip != NULL && ((t_ret = __db_vrfy_putpageinfo(vdp, mip)) != 0) &&
- ret == 0)
+ if (mip != NULL && ((t_ret =
+ __db_vrfy_putpageinfo(dbp->dbenv, vdp, mip)) != 0) && ret == 0)
ret = t_ret;
- if (pip != NULL && ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0) &&
- ret == 0)
+ if (pip != NULL && ((t_ret =
+ __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0) && ret == 0)
ret = t_ret;
return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
}
@@ -707,16 +729,19 @@ __ham_vrfy_hashing(dbp, nentries, m, thisbucket, pgno, flags, hfunc)
u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t));
{
DBT dbt;
+ DB_MPOOLFILE *mpf;
PAGE *h;
db_indx_t i;
int ret, t_ret, isbad;
u_int32_t hval, bucket;
+ mpf = dbp->mpf;
ret = isbad = 0;
+
memset(&dbt, 0, sizeof(DBT));
F_SET(&dbt, DB_DBT_REALLOC);
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
return (ret);
for (i = 0; i < nentries; i += 2) {
@@ -738,15 +763,15 @@ __ham_vrfy_hashing(dbp, nentries, m, thisbucket, pgno, flags, hfunc)
if (bucket != thisbucket) {
EPRINT((dbp->dbenv,
- "Item %lu on page %lu hashes incorrectly",
- i, pgno));
+ "Page %lu: item %lu hashes incorrectly",
+ (u_long)pgno, (u_long)i));
isbad = 1;
}
}
err: if (dbt.data != NULL)
- __os_free(dbt.data, 0);
- if ((t_ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ __os_ufree(dbp->dbenv, dbt.data);
+ if ((t_ret = mpf->put(mpf, h, 0)) != 0)
return (t_ret);
return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
@@ -782,7 +807,7 @@ __ham_salvage(dbp, vdp, pgno, h, handle, callback, flags)
dbt.flags = DB_DBT_REALLOC;
memset(&unkdbt, 0, sizeof(DBT));
- unkdbt.size = strlen("UNKNOWN") + 1;
+ unkdbt.size = (u_int32_t)strlen("UNKNOWN") + 1;
unkdbt.data = "UNKNOWN";
err_ret = 0;
@@ -791,7 +816,7 @@ __ham_salvage(dbp, vdp, pgno, h, handle, callback, flags)
* Allocate a buffer for overflow items. Start at one page;
* __db_safe_goff will realloc as needed.
*/
- if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &buf)) != 0)
+ if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &buf)) != 0)
return (ret);
himark = dbp->pgsize;
@@ -808,8 +833,8 @@ __ham_salvage(dbp, vdp, pgno, h, handle, callback, flags)
break;
if (ret == 0) {
- hk = P_ENTRY(h, i);
- len = LEN_HKEYDATA(h, dbp->pgsize, i);
+ hk = P_ENTRY(dbp, h, i);
+ len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i);
if ((u_int32_t)(hk + len - (u_int8_t *)h) >
dbp->pgsize) {
/*
@@ -834,7 +859,7 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len);
dbt.size = len;
dbt.data = buf;
if ((ret = __db_prdbt(&dbt,
- 0, " ", handle, callback, 0, NULL)) != 0)
+ 0, " ", handle, callback, 0, vdp)) != 0)
err_ret = ret;
break;
case H_OFFPAGE:
@@ -848,11 +873,11 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len);
dpgno, &dbt, &buf, flags)) != 0) {
err_ret = ret;
(void)__db_prdbt(&unkdbt, 0, " ",
- handle, callback, 0, NULL);
+ handle, callback, 0, vdp);
break;
}
if ((ret = __db_prdbt(&dbt,
- 0, " ", handle, callback, 0, NULL)) != 0)
+ 0, " ", handle, callback, 0, vdp)) != 0)
err_ret = ret;
break;
case H_OFFDUP:
@@ -865,7 +890,7 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len);
/* UNKNOWN iff pgno is bad or we're a key. */
if (!IS_VALID_PGNO(dpgno) || (i % 2 == 0)) {
if ((ret = __db_prdbt(&unkdbt, 0, " ",
- handle, callback, 0, NULL)) != 0)
+ handle, callback, 0, vdp)) != 0)
err_ret = ret;
} else if ((ret = __db_salvage_duptree(dbp,
vdp, dpgno, &dbt, handle, callback,
@@ -908,7 +933,7 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len);
dbt.size = dlen;
dbt.data = buf;
if ((ret = __db_prdbt(&dbt, 0, " ",
- handle, callback, 0, NULL)) != 0)
+ handle, callback, 0, vdp)) != 0)
err_ret = ret;
tlen += sizeof(db_indx_t);
}
@@ -917,7 +942,7 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len);
}
}
- __os_free(buf, 0);
+ __os_free(dbp->dbenv, buf);
if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0)
return (t_ret);
return ((ret == 0 && err_ret != 0) ? err_ret : ret);
@@ -938,6 +963,7 @@ int __ham_meta2pgset(dbp, vdp, hmeta, flags, pgset)
u_int32_t flags;
DB *pgset;
{
+ DB_MPOOLFILE *mpf;
PAGE *h;
db_pgno_t pgno;
u_int32_t bucket, totpgs;
@@ -951,6 +977,7 @@ int __ham_meta2pgset(dbp, vdp, hmeta, flags, pgset)
DB_ASSERT(pgset != NULL);
+ mpf = dbp->mpf;
totpgs = 0;
/*
@@ -967,7 +994,7 @@ int __ham_meta2pgset(dbp, vdp, hmeta, flags, pgset)
* Safely walk the list of pages in this bucket.
*/
for (;;) {
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
return (ret);
if (TYPE(h) == P_HASH) {
@@ -976,24 +1003,26 @@ int __ham_meta2pgset(dbp, vdp, hmeta, flags, pgset)
* pgset.
*/
if (++totpgs > vdp->last_pgno) {
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
return (DB_VERIFY_BAD);
}
if ((ret =
- __db_vrfy_pgset_inc(pgset, pgno)) != 0)
+ __db_vrfy_pgset_inc(pgset, pgno)) != 0) {
+ (void)mpf->put(mpf, h, 0);
return (ret);
+ }
pgno = NEXT_PGNO(h);
} else
pgno = PGNO_INVALID;
- if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ if ((ret = mpf->put(mpf, h, 0)) != 0)
return (ret);
/* If the new pgno is wonky, go onto the next bucket. */
if (!IS_VALID_PGNO(pgno) ||
pgno == PGNO_INVALID)
- goto nextbucket;
+ break;
/*
* If we've touched this page before, we have a cycle;
@@ -1002,9 +1031,8 @@ int __ham_meta2pgset(dbp, vdp, hmeta, flags, pgset)
if ((ret = __db_vrfy_pgset_get(pgset, pgno, &val)) != 0)
return (ret);
if (val != 0)
- goto nextbucket;
+ break;
}
-nextbucket: ;
}
return (0);
}