summaryrefslogtreecommitdiff
path: root/bdb/btree
diff options
context:
space:
mode:
authorunknown <ram@mysql.r18.ru>2002-10-30 15:57:05 +0400
committerunknown <ram@mysql.r18.ru>2002-10-30 15:57:05 +0400
commit155e78f014de1a2e259ae5119f4621fbb210a784 (patch)
tree6881a3cca88bea0bb9eeffd5aae34be437152786 /bdb/btree
parentb8798d25ab71436bf690ee8ae48285a655c5487e (diff)
downloadmariadb-git-155e78f014de1a2e259ae5119f4621fbb210a784.tar.gz
BDB 4.1.24
BitKeeper/deleted/.del-ex_access.wpj~3df6ae8c99bf7c5f: Delete: bdb/build_vxworks/ex_access/ex_access.wpj BitKeeper/deleted/.del-ex_btrec.wpj~a7622f1c6f432dc6: Delete: bdb/build_vxworks/ex_btrec/ex_btrec.wpj BitKeeper/deleted/.del-ex_dbclient.wpj~7345440f3b204cdd: Delete: bdb/build_vxworks/ex_dbclient/ex_dbclient.wpj BitKeeper/deleted/.del-ex_env.wpj~fbe1ab10b04e8b74: Delete: bdb/build_vxworks/ex_env/ex_env.wpj BitKeeper/deleted/.del-ex_mpool.wpj~4479cfd5c45f327d: Delete: bdb/build_vxworks/ex_mpool/ex_mpool.wpj BitKeeper/deleted/.del-ex_tpcb.wpj~f78093006e14bf41: Delete: bdb/build_vxworks/ex_tpcb/ex_tpcb.wpj BitKeeper/deleted/.del-db_buildall.dsp~bd749ff6da11682: Delete: bdb/build_win32/db_buildall.dsp BitKeeper/deleted/.del-cxx_app.cpp~ad8df8e0791011ed: Delete: bdb/cxx/cxx_app.cpp BitKeeper/deleted/.del-cxx_log.cpp~a50ff3118fe06952: Delete: bdb/cxx/cxx_log.cpp BitKeeper/deleted/.del-cxx_table.cpp~ecd751e79b055556: Delete: bdb/cxx/cxx_table.cpp BitKeeper/deleted/.del-namemap.txt~796a3acd3885d8fd: Delete: bdb/cxx/namemap.txt BitKeeper/deleted/.del-Design.fileop~3ca4da68f1727373: Delete: bdb/db/Design.fileop BitKeeper/deleted/.del-db185_int.h~61bee3736e7959ef: Delete: bdb/db185/db185_int.h BitKeeper/deleted/.del-acconfig.h~411e8854d67ad8b5: Delete: bdb/dist/acconfig.h BitKeeper/deleted/.del-mutex.m4~a13383cde18a64e1: Delete: bdb/dist/aclocal/mutex.m4 BitKeeper/deleted/.del-options.m4~b9d0ca637213750a: Delete: bdb/dist/aclocal/options.m4 BitKeeper/deleted/.del-programs.m4~3ce7890b47732b30: Delete: bdb/dist/aclocal/programs.m4 BitKeeper/deleted/.del-tcl.m4~f944e2db93c3b6db: Delete: bdb/dist/aclocal/tcl.m4 BitKeeper/deleted/.del-types.m4~59cae158c9a32cff: Delete: bdb/dist/aclocal/types.m4 BitKeeper/deleted/.del-script~d38f6d3a4f159cb4: Delete: bdb/dist/build/script BitKeeper/deleted/.del-configure.in~ac795a92c8fe049c: Delete: bdb/dist/configure.in BitKeeper/deleted/.del-ltconfig~66bbd007d8024af: Delete: bdb/dist/ltconfig BitKeeper/deleted/.del-rec_ctemp~a28554362534f00a: Delete: bdb/dist/rec_ctemp BitKeeper/deleted/.del-s_tcl~2ffe4326459fcd9f: Delete: bdb/dist/s_tcl BitKeeper/deleted/.del-.IGNORE_ME~d8148b08fa7d5d15: Delete: bdb/dist/template/.IGNORE_ME BitKeeper/deleted/.del-btree.h~179f2aefec1753d: Delete: bdb/include/btree.h BitKeeper/deleted/.del-cxx_int.h~6b649c04766508f8: Delete: bdb/include/cxx_int.h BitKeeper/deleted/.del-db.src~6b433ae615b16a8d: Delete: bdb/include/db.src BitKeeper/deleted/.del-db_185.h~ad8b373d9391d35c: Delete: bdb/include/db_185.h BitKeeper/deleted/.del-db_am.h~a714912b6b75932f: Delete: bdb/include/db_am.h BitKeeper/deleted/.del-db_cxx.h~fcafadf45f5d19e9: Delete: bdb/include/db_cxx.h BitKeeper/deleted/.del-db_dispatch.h~6844f20f7eb46904: Delete: bdb/include/db_dispatch.h BitKeeper/deleted/.del-db_int.src~419a3f48b6a01da7: Delete: bdb/include/db_int.src BitKeeper/deleted/.del-db_join.h~76f9747a42c3399a: Delete: bdb/include/db_join.h BitKeeper/deleted/.del-db_page.h~e302ca3a4db3abdc: Delete: bdb/include/db_page.h BitKeeper/deleted/.del-db_server_int.h~e1d20b6ba3bca1ab: Delete: bdb/include/db_server_int.h BitKeeper/deleted/.del-db_shash.h~5fbf2d696fac90f3: Delete: bdb/include/db_shash.h BitKeeper/deleted/.del-db_swap.h~1e60887550864a59: Delete: bdb/include/db_swap.h BitKeeper/deleted/.del-db_upgrade.h~c644eee73701fc8d: Delete: bdb/include/db_upgrade.h BitKeeper/deleted/.del-db_verify.h~b8d6c297c61f342e: Delete: bdb/include/db_verify.h BitKeeper/deleted/.del-debug.h~dc2b4f2cf27ccebc: Delete: bdb/include/debug.h BitKeeper/deleted/.del-hash.h~2aaa548b28882dfb: Delete: bdb/include/hash.h BitKeeper/deleted/.del-lock.h~a761c1b7de57b77f: Delete: bdb/include/lock.h BitKeeper/deleted/.del-log.h~ff20184238e35e4d: Delete: bdb/include/log.h BitKeeper/deleted/.del-mp.h~7e317597622f3411: Delete: bdb/include/mp.h BitKeeper/deleted/.del-mutex.h~d3ae7a2977a68137: Delete: bdb/include/mutex.h BitKeeper/deleted/.del-os.h~91867cc8757cd0e3: Delete: bdb/include/os.h BitKeeper/deleted/.del-os_jump.h~e1b939fa5151d4be: Delete: bdb/include/os_jump.h BitKeeper/deleted/.del-qam.h~6fad0c1b5723d597: Delete: bdb/include/qam.h BitKeeper/deleted/.del-queue.h~4c72c0826c123d5: Delete: bdb/include/queue.h BitKeeper/deleted/.del-region.h~513fe04d977ca0fc: Delete: bdb/include/region.h BitKeeper/deleted/.del-shqueue.h~525fc3e6c2025c36: Delete: bdb/include/shqueue.h BitKeeper/deleted/.del-tcl_db.h~c536fd61a844f23f: Delete: bdb/include/tcl_db.h BitKeeper/deleted/.del-txn.h~c8d94b221ec147e4: Delete: bdb/include/txn.h BitKeeper/deleted/.del-xa.h~ecc466493aae9d9a: Delete: bdb/include/xa.h BitKeeper/deleted/.del-DbRecoveryInit.java~756b52601a0b9023: Delete: bdb/java/src/com/sleepycat/db/DbRecoveryInit.java BitKeeper/deleted/.del-DbTxnRecover.java~74607cba7ab89d6d: Delete: bdb/java/src/com/sleepycat/db/DbTxnRecover.java BitKeeper/deleted/.del-lock_conflict.c~fc5e0f14cf597a2b: Delete: bdb/lock/lock_conflict.c BitKeeper/deleted/.del-log.src~53ac9e7b5cb023f2: Delete: bdb/log/log.src BitKeeper/deleted/.del-log_findckp.c~24287f008916e81f: Delete: bdb/log/log_findckp.c BitKeeper/deleted/.del-log_rec.c~d51711f2cac09297: Delete: bdb/log/log_rec.c BitKeeper/deleted/.del-log_register.c~b40bb4efac75ca15: Delete: bdb/log/log_register.c BitKeeper/deleted/.del-Design~b3d0f179f2767b: Delete: bdb/mp/Design BitKeeper/deleted/.del-os_finit.c~95dbefc6fe79b26c: Delete: bdb/os/os_finit.c BitKeeper/deleted/.del-os_abs.c~df95d1e7db81924: Delete: bdb/os_vxworks/os_abs.c BitKeeper/deleted/.del-os_finit.c~803b484bdb9d0122: Delete: bdb/os_vxworks/os_finit.c BitKeeper/deleted/.del-os_map.c~3a6d7926398b76d3: Delete: bdb/os_vxworks/os_map.c BitKeeper/deleted/.del-os_finit.c~19a227c6d3c78ad: Delete: bdb/os_win32/os_finit.c BitKeeper/deleted/.del-log-corruption.patch~1cf2ecc7c6408d5d: Delete: bdb/patches/log-corruption.patch BitKeeper/deleted/.del-Btree.pm~af6d0c5eaed4a98e: Delete: bdb/perl.BerkeleyDB/BerkeleyDB/Btree.pm BitKeeper/deleted/.del-BerkeleyDB.pm~7244036d4482643: Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pm BitKeeper/deleted/.del-BerkeleyDB.pod~e7b18fd6132448e3: Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pod BitKeeper/deleted/.del-Hash.pm~10292a26c06a5c95: Delete: bdb/perl.BerkeleyDB/BerkeleyDB/Hash.pm BitKeeper/deleted/.del-BerkeleyDB.pod.P~79f76a1495eda203: Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pod.P BitKeeper/deleted/.del-BerkeleyDB.xs~80c99afbd98e392c: Delete: bdb/perl.BerkeleyDB/BerkeleyDB.xs BitKeeper/deleted/.del-Changes~729c1891efa60de9: Delete: bdb/perl.BerkeleyDB/Changes BitKeeper/deleted/.del-MANIFEST~63a1e34aecf157a0: Delete: bdb/perl.BerkeleyDB/MANIFEST BitKeeper/deleted/.del-Makefile.PL~c68797707d8df87a: Delete: bdb/perl.BerkeleyDB/Makefile.PL BitKeeper/deleted/.del-README~5f2f579b1a241407: Delete: bdb/perl.BerkeleyDB/README BitKeeper/deleted/.del-Todo~dca3c66c193adda9: Delete: bdb/perl.BerkeleyDB/Todo BitKeeper/deleted/.del-config.in~ae81681e450e0999: Delete: bdb/perl.BerkeleyDB/config.in BitKeeper/deleted/.del-dbinfo~28ad67d83be4f68e: Delete: bdb/perl.BerkeleyDB/dbinfo BitKeeper/deleted/.del-mkconsts~543ab60669c7a04e: Delete: bdb/perl.BerkeleyDB/mkconsts BitKeeper/deleted/.del-mkpod~182c0ca54e439afb: Delete: bdb/perl.BerkeleyDB/mkpod BitKeeper/deleted/.del-5.004~e008cb5a48805543: Delete: bdb/perl.BerkeleyDB/patches/5.004 BitKeeper/deleted/.del-irix_6_5.pl~61662bb08afcdec8: Delete: bdb/perl.BerkeleyDB/hints/irix_6_5.pl BitKeeper/deleted/.del-solaris.pl~6771e7182394e152: Delete: bdb/perl.BerkeleyDB/hints/solaris.pl BitKeeper/deleted/.del-typemap~783b8f5295b05f3d: Delete: bdb/perl.BerkeleyDB/typemap BitKeeper/deleted/.del-5.004_01~6081ce2fff7b0bc: Delete: bdb/perl.BerkeleyDB/patches/5.004_01 BitKeeper/deleted/.del-5.004_02~87214eac35ad9e6: Delete: bdb/perl.BerkeleyDB/patches/5.004_02 BitKeeper/deleted/.del-5.004_03~9a672becec7cb40f: Delete: bdb/perl.BerkeleyDB/patches/5.004_03 BitKeeper/deleted/.del-5.004_04~e326cb51af09d154: Delete: bdb/perl.BerkeleyDB/patches/5.004_04 BitKeeper/deleted/.del-5.004_05~7ab457a1e41a92fe: Delete: bdb/perl.BerkeleyDB/patches/5.004_05 BitKeeper/deleted/.del-5.005~f9e2d59b5964cd4b: Delete: bdb/perl.BerkeleyDB/patches/5.005 BitKeeper/deleted/.del-5.005_01~3eb9fb7b5842ea8e: Delete: bdb/perl.BerkeleyDB/patches/5.005_01 BitKeeper/deleted/.del-5.005_02~67477ce0bef717cb: Delete: bdb/perl.BerkeleyDB/patches/5.005_02 BitKeeper/deleted/.del-5.005_03~c4c29a1fb21e290a: Delete: bdb/perl.BerkeleyDB/patches/5.005_03 BitKeeper/deleted/.del-5.6.0~e1fb9897d124ee22: Delete: bdb/perl.BerkeleyDB/patches/5.6.0 BitKeeper/deleted/.del-btree.t~e4a1a3c675ddc406: Delete: bdb/perl.BerkeleyDB/t/btree.t BitKeeper/deleted/.del-db-3.0.t~d2c60991d84558f2: Delete: bdb/perl.BerkeleyDB/t/db-3.0.t BitKeeper/deleted/.del-db-3.1.t~6ee88cd13f55e018: Delete: bdb/perl.BerkeleyDB/t/db-3.1.t BitKeeper/deleted/.del-db-3.2.t~f73b6461f98fd1cf: Delete: bdb/perl.BerkeleyDB/t/db-3.2.t BitKeeper/deleted/.del-destroy.t~cc6a2ae1980a2ecd: Delete: bdb/perl.BerkeleyDB/t/destroy.t BitKeeper/deleted/.del-env.t~a8604a4499c4bd07: Delete: bdb/perl.BerkeleyDB/t/env.t BitKeeper/deleted/.del-examples.t~2571b77c3cc75574: Delete: bdb/perl.BerkeleyDB/t/examples.t BitKeeper/deleted/.del-examples.t.T~8228bdd75ac78b88: Delete: bdb/perl.BerkeleyDB/t/examples.t.T BitKeeper/deleted/.del-examples3.t.T~66a186897a87026d: Delete: bdb/perl.BerkeleyDB/t/examples3.t.T BitKeeper/deleted/.del-examples3.t~fe3822ba2f2d7f83: Delete: bdb/perl.BerkeleyDB/t/examples3.t BitKeeper/deleted/.del-filter.t~f87b045c1b708637: Delete: bdb/perl.BerkeleyDB/t/filter.t BitKeeper/deleted/.del-hash.t~616bfb4d644de3a3: Delete: bdb/perl.BerkeleyDB/t/hash.t BitKeeper/deleted/.del-join.t~29fc39f74a83ca22: Delete: bdb/perl.BerkeleyDB/t/join.t BitKeeper/deleted/.del-mldbm.t~31f5015341eea040: Delete: bdb/perl.BerkeleyDB/t/mldbm.t BitKeeper/deleted/.del-queue.t~8f338034ce44a641: Delete: bdb/perl.BerkeleyDB/t/queue.t BitKeeper/deleted/.del-recno.t~d4ddbd3743add63e: Delete: bdb/perl.BerkeleyDB/t/recno.t BitKeeper/deleted/.del-strict.t~6885cdd2ea71ca2d: Delete: bdb/perl.BerkeleyDB/t/strict.t BitKeeper/deleted/.del-subdb.t~aab62a5d5864c603: Delete: bdb/perl.BerkeleyDB/t/subdb.t BitKeeper/deleted/.del-txn.t~65033b8558ae1216: Delete: bdb/perl.BerkeleyDB/t/txn.t BitKeeper/deleted/.del-unknown.t~f3710458682665e1: Delete: bdb/perl.BerkeleyDB/t/unknown.t BitKeeper/deleted/.del-Changes~436f74a5c414c65b: Delete: bdb/perl.DB_File/Changes BitKeeper/deleted/.del-DB_File.pm~ae0951c6c7665a82: Delete: bdb/perl.DB_File/DB_File.pm BitKeeper/deleted/.del-DB_File.xs~89e49a0b5556f1d8: Delete: bdb/perl.DB_File/DB_File.xs BitKeeper/deleted/.del-DB_File_BS~290fad5dbbb87069: Delete: bdb/perl.DB_File/DB_File_BS BitKeeper/deleted/.del-MANIFEST~90ee581572bdd4ac: Delete: bdb/perl.DB_File/MANIFEST BitKeeper/deleted/.del-Makefile.PL~ac0567bb5a377e38: Delete: bdb/perl.DB_File/Makefile.PL BitKeeper/deleted/.del-README~77e924a5a9bae6b3: Delete: bdb/perl.DB_File/README BitKeeper/deleted/.del-config.in~ab4c2792b86a810b: Delete: bdb/perl.DB_File/config.in BitKeeper/deleted/.del-dbinfo~461c43b30fab2cb: Delete: bdb/perl.DB_File/dbinfo BitKeeper/deleted/.del-dynixptx.pl~50dcddfae25d17e9: Delete: bdb/perl.DB_File/hints/dynixptx.pl BitKeeper/deleted/.del-typemap~55cffb3288a9e587: Delete: bdb/perl.DB_File/typemap BitKeeper/deleted/.del-version.c~a4df0e646f8b3975: Delete: bdb/perl.DB_File/version.c BitKeeper/deleted/.del-5.004_01~d6830d0082702af7: Delete: bdb/perl.DB_File/patches/5.004_01 BitKeeper/deleted/.del-5.004_02~78b082dc80c91031: Delete: bdb/perl.DB_File/patches/5.004_02 BitKeeper/deleted/.del-5.004~4411ec2e3c9e008b: Delete: bdb/perl.DB_File/patches/5.004 BitKeeper/deleted/.del-sco.pl~1e795fe14fe4dcfe: Delete: bdb/perl.DB_File/hints/sco.pl BitKeeper/deleted/.del-5.004_03~33f274648b160d95: Delete: bdb/perl.DB_File/patches/5.004_03 BitKeeper/deleted/.del-5.004_04~8f3d1b3cf18bb20a: Delete: bdb/perl.DB_File/patches/5.004_04 BitKeeper/deleted/.del-5.004_05~9c0f02e7331e142: Delete: bdb/perl.DB_File/patches/5.004_05 BitKeeper/deleted/.del-5.005~c2108cb2e3c8d951: Delete: bdb/perl.DB_File/patches/5.005 BitKeeper/deleted/.del-5.005_01~3b45e9673afc4cfa: Delete: bdb/perl.DB_File/patches/5.005_01 BitKeeper/deleted/.del-5.005_02~9fe5766bb02a4522: Delete: bdb/perl.DB_File/patches/5.005_02 BitKeeper/deleted/.del-5.005_03~ffa1c38c19ae72ea: Delete: bdb/perl.DB_File/patches/5.005_03 BitKeeper/deleted/.del-5.6.0~373be3a5ce47be85: Delete: bdb/perl.DB_File/patches/5.6.0 BitKeeper/deleted/.del-db-btree.t~3231595a1c241eb3: Delete: bdb/perl.DB_File/t/db-btree.t BitKeeper/deleted/.del-db-hash.t~7c4ad0c795c7fad2: Delete: bdb/perl.DB_File/t/db-hash.t BitKeeper/deleted/.del-db-recno.t~6c2d3d80b9ba4a50: Delete: bdb/perl.DB_File/t/db-recno.t BitKeeper/deleted/.del-db_server.sed~cdb00ebcd48a64e2: Delete: bdb/rpc_server/db_server.sed BitKeeper/deleted/.del-db_server_proc.c~d46c8f409c3747f4: Delete: bdb/rpc_server/db_server_proc.c BitKeeper/deleted/.del-db_server_svc.sed~3f5e59f334fa4607: Delete: bdb/rpc_server/db_server_svc.sed BitKeeper/deleted/.del-db_server_util.c~a809f3a4629acda: Delete: bdb/rpc_server/db_server_util.c BitKeeper/deleted/.del-log.tcl~ff1b41f1355b97d7: Delete: bdb/test/log.tcl BitKeeper/deleted/.del-mpool.tcl~b0df4dc1b04db26c: Delete: bdb/test/mpool.tcl BitKeeper/deleted/.del-mutex.tcl~52fd5c73a150565: Delete: bdb/test/mutex.tcl BitKeeper/deleted/.del-txn.tcl~c4ff071550b5446e: Delete: bdb/test/txn.tcl BitKeeper/deleted/.del-README~e800a12a5392010a: Delete: bdb/test/upgrade/README BitKeeper/deleted/.del-pack-2.6.6.pl~89d5076d758d3e98: Delete: bdb/test/upgrade/generate-2.X/pack-2.6.6.pl BitKeeper/deleted/.del-test-2.6.patch~4a52dc83d447547b: Delete: bdb/test/upgrade/generate-2.X/test-2.6.patch
Diffstat (limited to 'bdb/btree')
-rw-r--r--bdb/btree/bt_compare.c14
-rw-r--r--bdb/btree/bt_conv.c30
-rw-r--r--bdb/btree/bt_curadj.c55
-rw-r--r--bdb/btree/bt_cursor.c1193
-rw-r--r--bdb/btree/bt_delete.c186
-rw-r--r--bdb/btree/bt_method.c33
-rw-r--r--bdb/btree/bt_open.c425
-rw-r--r--bdb/btree/bt_put.c165
-rw-r--r--bdb/btree/bt_rec.c494
-rw-r--r--bdb/btree/bt_reclaim.c45
-rw-r--r--bdb/btree/bt_recno.c430
-rw-r--r--bdb/btree/bt_rsearch.c85
-rw-r--r--bdb/btree/bt_search.c92
-rw-r--r--bdb/btree/bt_split.c323
-rw-r--r--bdb/btree/bt_stat.c203
-rw-r--r--bdb/btree/bt_upgrade.c24
-rw-r--r--bdb/btree/bt_verify.c526
-rw-r--r--bdb/btree/btree.src158
18 files changed, 2546 insertions, 1935 deletions
diff --git a/bdb/btree/bt_compare.c b/bdb/btree/bt_compare.c
index 91481c31366..cbe2a1a7170 100644
--- a/bdb/btree/bt_compare.c
+++ b/bdb/btree/bt_compare.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,7 +43,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_compare.c,v 11.12 2000/10/26 19:00:28 krinsky Exp $";
+static const char revid[] = "$Id: bt_compare.c,v 11.17 2002/03/27 04:30:42 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -51,8 +51,8 @@ static const char revid[] = "$Id: bt_compare.c,v 11.12 2000/10/26 19:00:28 krins
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
/*
* __bam_cmp --
@@ -92,7 +92,7 @@ __bam_cmp(dbp, dbt, h, indx, func, cmpp)
case P_LBTREE:
case P_LDUP:
case P_LRECNO:
- bk = GET_BKEYDATA(h, indx);
+ bk = GET_BKEYDATA(dbp, h, indx);
if (B_TYPE(bk->type) == B_OVERFLOW)
bo = (BOVERFLOW *)bk;
else {
@@ -125,7 +125,7 @@ __bam_cmp(dbp, dbt, h, indx, func, cmpp)
return (0);
}
- bi = GET_BINTERNAL(h, indx);
+ bi = GET_BINTERNAL(dbp, h, indx);
if (B_TYPE(bi->type) == B_OVERFLOW)
bo = (BOVERFLOW *)(bi->data);
else {
@@ -136,7 +136,7 @@ __bam_cmp(dbp, dbt, h, indx, func, cmpp)
}
break;
default:
- return (__db_pgfmt(dbp, PGNO(h)));
+ return (__db_pgfmt(dbp->dbenv, PGNO(h)));
}
/*
diff --git a/bdb/btree/bt_conv.c b/bdb/btree/bt_conv.c
index fd30f375f7c..4264b62ffdd 100644
--- a/bdb/btree/bt_conv.c
+++ b/bdb/btree/bt_conv.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_conv.c,v 11.6 2000/03/31 00:30:26 ubell Exp $";
+static const char revid[] = "$Id: bt_conv.c,v 11.13 2002/08/06 06:11:12 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -16,20 +16,21 @@ static const char revid[] = "$Id: bt_conv.c,v 11.6 2000/03/31 00:30:26 ubell Exp
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_swap.h"
-#include "btree.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_swap.h"
+#include "dbinc/btree.h"
/*
* __bam_pgin --
* Convert host-specific page layout from the host-independent format
* stored on disk.
*
- * PUBLIC: int __bam_pgin __P((DB_ENV *, db_pgno_t, void *, DBT *));
+ * PUBLIC: int __bam_pgin __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *));
*/
int
-__bam_pgin(dbenv, pg, pp, cookie)
+__bam_pgin(dbenv, dummydbp, pg, pp, cookie)
DB_ENV *dbenv;
+ DB *dummydbp;
db_pgno_t pg;
void *pp;
DBT *cookie;
@@ -38,12 +39,12 @@ __bam_pgin(dbenv, pg, pp, cookie)
PAGE *h;
pginfo = (DB_PGINFO *)cookie->data;
- if (!pginfo->needswap)
+ if (!F_ISSET(pginfo, DB_AM_SWAP))
return (0);
h = pp;
return (TYPE(h) == P_BTREEMETA ? __bam_mswap(pp) :
- __db_byteswap(dbenv, pg, pp, pginfo->db_pagesize, 1));
+ __db_byteswap(dbenv, dummydbp, pg, pp, pginfo->db_pagesize, 1));
}
/*
@@ -51,11 +52,12 @@ __bam_pgin(dbenv, pg, pp, cookie)
* Convert host-specific page layout to the host-independent format
* stored on disk.
*
- * PUBLIC: int __bam_pgout __P((DB_ENV *, db_pgno_t, void *, DBT *));
+ * PUBLIC: int __bam_pgout __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *));
*/
int
-__bam_pgout(dbenv, pg, pp, cookie)
+__bam_pgout(dbenv, dummydbp, pg, pp, cookie)
DB_ENV *dbenv;
+ DB *dummydbp;
db_pgno_t pg;
void *pp;
DBT *cookie;
@@ -64,12 +66,12 @@ __bam_pgout(dbenv, pg, pp, cookie)
PAGE *h;
pginfo = (DB_PGINFO *)cookie->data;
- if (!pginfo->needswap)
+ if (!F_ISSET(pginfo, DB_AM_SWAP))
return (0);
h = pp;
return (TYPE(h) == P_BTREEMETA ? __bam_mswap(pp) :
- __db_byteswap(dbenv, pg, pp, pginfo->db_pagesize, 0));
+ __db_byteswap(dbenv, dummydbp, pg, pp, pginfo->db_pagesize, 0));
}
/*
@@ -93,6 +95,8 @@ __bam_mswap(pg)
SWAP32(p); /* re_len */
SWAP32(p); /* re_pad */
SWAP32(p); /* root */
+ p += 92 * sizeof(u_int32_t); /* unused */
+ SWAP32(p); /* crypto_magic */
return (0);
}
diff --git a/bdb/btree/bt_curadj.c b/bdb/btree/bt_curadj.c
index 011acd2f4a1..50d3d422e49 100644
--- a/bdb/btree/bt_curadj.c
+++ b/bdb/btree/bt_curadj.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_curadj.c,v 11.20 2001/01/17 16:15:49 bostic Exp $";
+static const char revid[] = "$Id: bt_curadj.c,v 11.30 2002/07/03 19:03:48 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -16,9 +16,8 @@ static const char revid[] = "$Id: bt_curadj.c,v 11.20 2001/01/17 16:15:49 bostic
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-#include "txn.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
static int __bam_opd_cursor __P((DB *, DBC *, db_pgno_t, u_int32_t, u_int32_t));
@@ -203,10 +202,9 @@ __bam_ca_di(my_dbc, pgno, indx, adjust)
}
MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
- if (found != 0 && DB_LOGGING(my_dbc)) {
- if ((ret = __bam_curadj_log(dbenv,
- my_dbc->txn, &lsn, 0, dbp->log_fileid,
- DB_CA_DI, pgno, 0, 0, adjust, indx, 0)) != 0)
+ if (found != 0 && DBC_LOGGING(my_dbc)) {
+ if ((ret = __bam_curadj_log(dbp, my_dbc->txn,
+ &lsn, 0, DB_CA_DI, pgno, 0, 0, adjust, indx, 0)) != 0)
return (ret);
}
@@ -234,8 +232,13 @@ __bam_opd_cursor(dbp, dbc, first, tpgno, ti)
* Allocate a new cursor and create the stack. If duplicates
* are sorted, we've just created an off-page duplicate Btree.
* If duplicates aren't sorted, we've just created a Recno tree.
+ *
+ * Note that in order to get here at all, there shouldn't be
+ * an old off-page dup cursor--to augment the checking db_c_newopd
+ * will do, assert this.
*/
- if ((ret = __db_c_newopd(dbc, tpgno, &dbc_nopd)) != 0)
+ DB_ASSERT(orig_cp->opd == NULL);
+ if ((ret = __db_c_newopd(dbc, tpgno, orig_cp->opd, &dbc_nopd)) != 0)
return (ret);
cp = (BTREE_CURSOR *)dbc_nopd->internal;
@@ -321,17 +324,16 @@ loop: MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
return (ret);
if (my_txn != NULL && dbc->txn != my_txn)
found = 1;
- /* We released the MUTEX to get a cursor, start over. */
+ /* We released the mutex to get a cursor, start over. */
goto loop;
}
MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
}
MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
- if (found != 0 && DB_LOGGING(my_dbc)) {
- if ((ret = __bam_curadj_log(dbenv,
- my_dbc->txn, &lsn, 0, dbp->log_fileid,
- DB_CA_DUP, fpgno, tpgno, 0, first, fi, ti)) != 0)
+ if (found != 0 && DBC_LOGGING(my_dbc)) {
+ if ((ret = __bam_curadj_log(dbp, my_dbc->txn,
+ &lsn, 0, DB_CA_DUP, fpgno, tpgno, 0, first, fi, ti)) != 0)
return (ret);
}
return (0);
@@ -372,8 +374,16 @@ loop: MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
orig_cp = (BTREE_CURSOR *)dbc->internal;
+ /*
+ * A note on the orig_cp->opd != NULL requirement here:
+ * it's possible that there's a cursor that refers to
+ * the same duplicate set, but which has no opd cursor,
+ * because it refers to a different item and we took
+ * care of it while processing a previous record.
+ */
if (orig_cp->pgno != fpgno ||
orig_cp->indx != first ||
+ orig_cp->opd == NULL ||
((BTREE_CURSOR *)orig_cp->opd->internal)->indx
!= ti)
continue;
@@ -383,7 +393,7 @@ loop: MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
orig_cp->opd = NULL;
orig_cp->indx = fi;
/*
- * We released the MUTEX to free a cursor,
+ * We released the mutex to free a cursor,
* start over.
*/
goto loop;
@@ -440,10 +450,9 @@ __bam_ca_rsplit(my_dbc, fpgno, tpgno)
}
MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
- if (found != 0 && DB_LOGGING(my_dbc)) {
- if ((ret = __bam_curadj_log(dbenv,
- my_dbc->txn, &lsn, 0, dbp->log_fileid,
- DB_CA_RSPLIT, fpgno, tpgno, 0, 0, 0, 0)) != 0)
+ if (found != 0 && DBC_LOGGING(my_dbc)) {
+ if ((ret = __bam_curadj_log(dbp, my_dbc->txn,
+ &lsn, 0, DB_CA_RSPLIT, fpgno, tpgno, 0, 0, 0, 0)) != 0)
return (ret);
}
return (0);
@@ -512,9 +521,9 @@ __bam_ca_split(my_dbc, ppgno, lpgno, rpgno, split_indx, cleft)
}
MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
- if (found != 0 && DB_LOGGING(my_dbc)) {
- if ((ret = __bam_curadj_log(dbenv, my_dbc->txn,
- &lsn, 0, dbp->log_fileid, DB_CA_SPLIT, ppgno, rpgno,
+ if (found != 0 && DBC_LOGGING(my_dbc)) {
+ if ((ret = __bam_curadj_log(dbp,
+ my_dbc->txn, &lsn, 0, DB_CA_SPLIT, ppgno, rpgno,
cleft ? lpgno : PGNO_INVALID, 0, split_indx, 0)) != 0)
return (ret);
}
diff --git a/bdb/btree/bt_cursor.c b/bdb/btree/bt_cursor.c
index 84ab7c80744..14d90e8873d 100644
--- a/bdb/btree/bt_cursor.c
+++ b/bdb/btree/bt_cursor.c
@@ -1,31 +1,29 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_cursor.c,v 11.88 2001/01/11 18:19:49 bostic Exp $";
+static const char revid[] = "$Id: bt_cursor.c,v 11.147 2002/08/13 20:46:07 ubell Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
-#include <stdlib.h>
#include <string.h>
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "btree.h"
-#include "lock.h"
-#include "qam.h"
-#include "common_ext.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/btree.h"
+#include "dbinc/lock.h"
+static int __bam_bulk __P((DBC *, DBT *, u_int32_t));
static int __bam_c_close __P((DBC *, db_pgno_t, int *));
static int __bam_c_del __P((DBC *));
static int __bam_c_destroy __P((DBC *));
@@ -33,15 +31,16 @@ static int __bam_c_first __P((DBC *));
static int __bam_c_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
static int __bam_c_getstack __P((DBC *));
static int __bam_c_last __P((DBC *));
-static int __bam_c_next __P((DBC *, int));
+static int __bam_c_next __P((DBC *, int, int));
static int __bam_c_physdel __P((DBC *));
static int __bam_c_prev __P((DBC *));
static int __bam_c_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
-static void __bam_c_reset __P((BTREE_CURSOR *));
-static int __bam_c_search __P((DBC *, const DBT *, u_int32_t, int *));
+static int __bam_c_search __P((DBC *,
+ db_pgno_t, const DBT *, u_int32_t, int *));
static int __bam_c_writelock __P((DBC *));
-static int __bam_getboth_finddatum __P((DBC *, DBT *));
+static int __bam_getboth_finddatum __P((DBC *, DBT *, u_int32_t));
static int __bam_getbothc __P((DBC *, DBT *));
+static int __bam_get_prev __P((DBC *));
static int __bam_isopd __P((DBC *, db_pgno_t *));
/*
@@ -53,48 +52,60 @@ static int __bam_isopd __P((DBC *, db_pgno_t *));
* don't -- we don't duplicate locks when we duplicate cursors if we are
* running in a transaction environment as there's no point if locks are
* never discarded. This means that the cursor may or may not hold a lock.
+ * In the case where we are decending the tree we always want to
+ * unlock the held interior page so we use ACQUIRE_COUPLE.
*/
#undef ACQUIRE
-#define ACQUIRE(dbc, mode, lpgno, lock, fpgno, pagep, ret) {\
+#define ACQUIRE(dbc, mode, lpgno, lock, fpgno, pagep, ret) { \
+ DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \
if ((pagep) != NULL) { \
- ret = memp_fput((dbc)->dbp->mpf, pagep, 0); \
+ ret = __mpf->put(__mpf, pagep, 0); \
+ pagep = NULL; \
+ } else \
+ ret = 0; \
+ if ((ret) == 0 && STD_LOCKING(dbc)) \
+ ret = __db_lget(dbc, LCK_COUPLE, lpgno, mode, 0, &(lock));\
+ if ((ret) == 0) \
+ ret = __mpf->get(__mpf, &(fpgno), 0, &(pagep)); \
+}
+
+#undef ACQUIRE_COUPLE
+#define ACQUIRE_COUPLE(dbc, mode, lpgno, lock, fpgno, pagep, ret) { \
+ DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \
+ if ((pagep) != NULL) { \
+ ret = __mpf->put(__mpf, pagep, 0); \
pagep = NULL; \
} else \
ret = 0; \
if ((ret) == 0 && STD_LOCKING(dbc)) \
ret = __db_lget(dbc, \
- (lock).off == LOCK_INVALID ? 0 : LCK_COUPLE, \
- lpgno, mode, 0, &lock); \
- else \
- (lock).off = LOCK_INVALID; \
+ LCK_COUPLE_ALWAYS, lpgno, mode, 0, &(lock)); \
if ((ret) == 0) \
- ret = memp_fget((dbc)->dbp->mpf, &(fpgno), 0, &(pagep));\
+ ret = __mpf->get(__mpf, &(fpgno), 0, &(pagep)); \
}
/* Acquire a new page/lock for a cursor. */
#undef ACQUIRE_CUR
-#define ACQUIRE_CUR(dbc, mode, ret) { \
+#define ACQUIRE_CUR(dbc, mode, p, ret) { \
BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal; \
- ACQUIRE(dbc, mode, \
- __cp->pgno, __cp->lock, __cp->pgno, __cp->page, ret); \
- if ((ret) == 0) \
+ ACQUIRE(dbc, mode, p, __cp->lock, p, __cp->page, ret); \
+ if ((ret) == 0) { \
+ __cp->pgno = p; \
__cp->lock_mode = (mode); \
+ } \
}
/*
- * Acquire a new page/lock for a cursor, and move the cursor on success.
- * The reason that this is a separate macro is because we don't want to
- * set the pgno/indx fields in the cursor until we actually have the lock,
- * otherwise the cursor adjust routines will adjust the cursor even though
- * we're not really on the page.
+ * Acquire a new page/lock for a cursor and release the previous.
+ * This is typically used when decending a tree and we do not
+ * want to hold the interior nodes locked.
*/
-#undef ACQUIRE_CUR_SET
-#define ACQUIRE_CUR_SET(dbc, mode, p, ret) { \
+#undef ACQUIRE_CUR_COUPLE
+#define ACQUIRE_CUR_COUPLE(dbc, mode, p, ret) { \
BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal; \
- ACQUIRE(dbc, mode, p, __cp->lock, p, __cp->page, ret); \
+ ACQUIRE_COUPLE(dbc, mode, p, __cp->lock, p, __cp->page, ret); \
if ((ret) == 0) { \
- __cp->pgno = p; \
- __cp->indx = 0; \
+ __cp->pgno = p; \
__cp->lock_mode = (mode); \
} \
}
@@ -112,7 +123,7 @@ static int __bam_isopd __P((DBC *, db_pgno_t *));
if (STD_LOCKING(dbc) && \
__cp->lock_mode != DB_LOCK_WRITE && \
((ret) = __db_lget(dbc, \
- __cp->lock.off == LOCK_INVALID ? 0 : LCK_COUPLE, \
+ LOCK_ISSET(__cp->lock) ? LCK_COUPLE : 0, \
__cp->pgno, DB_LOCK_WRITE, 0, &__cp->lock)) == 0) \
__cp->lock_mode = DB_LOCK_WRITE; \
}
@@ -120,19 +131,19 @@ static int __bam_isopd __P((DBC *, db_pgno_t *));
/* Discard the current page/lock. */
#undef DISCARD
#define DISCARD(dbc, ldiscard, lock, pagep, ret) { \
+ DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \
int __t_ret; \
if ((pagep) != NULL) { \
- ret = memp_fput((dbc)->dbp->mpf, pagep, 0); \
+ ret = __mpf->put(__mpf, pagep, 0); \
pagep = NULL; \
} else \
ret = 0; \
- if ((lock).off != LOCK_INVALID) { \
- __t_ret = ldiscard ? \
- __LPUT((dbc), lock): __TLPUT((dbc), lock); \
- if (__t_ret != 0 && (ret) == 0) \
- ret = __t_ret; \
- (lock).off = LOCK_INVALID; \
- } \
+ if (ldiscard) \
+ __t_ret = __LPUT((dbc), lock); \
+ else \
+ __t_ret = __TLPUT((dbc), lock); \
+ if (__t_ret != 0 && (ret) == 0) \
+ ret = __t_ret; \
}
/* Discard the current page/lock for a cursor. */
@@ -146,12 +157,12 @@ static int __bam_isopd __P((DBC *, db_pgno_t *));
/* If on-page item is a deleted record. */
#undef IS_DELETED
-#define IS_DELETED(page, indx) \
- B_DISSET(GET_BKEYDATA(page, \
+#define IS_DELETED(dbp, page, indx) \
+ B_DISSET(GET_BKEYDATA(dbp, page, \
(indx) + (TYPE(page) == P_LBTREE ? O_INDX : 0))->type)
#undef IS_CUR_DELETED
#define IS_CUR_DELETED(dbc) \
- IS_DELETED((dbc)->internal->page, (dbc)->internal->indx)
+ IS_DELETED((dbc)->dbp, (dbc)->internal->page, (dbc)->internal->indx)
/*
* Test to see if two cursors could point to duplicates of the same key.
@@ -163,8 +174,8 @@ static int __bam_isopd __P((DBC *, db_pgno_t *));
*/
#undef IS_DUPLICATE
#define IS_DUPLICATE(dbc, i1, i2) \
- (((PAGE *)(dbc)->internal->page)->inp[i1] == \
- ((PAGE *)(dbc)->internal->page)->inp[i2])
+ (P_INP((dbc)->dbp,((PAGE *)(dbc)->internal->page))[i1] == \
+ P_INP((dbc)->dbp,((PAGE *)(dbc)->internal->page))[i2])
#undef IS_CUR_DUPLICATE
#define IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx) \
(F_ISSET(dbc, DBC_OPD) || \
@@ -172,22 +183,6 @@ static int __bam_isopd __P((DBC *, db_pgno_t *));
IS_DUPLICATE(dbc, (dbc)->internal->indx, orig_indx)))
/*
- * __bam_c_reset --
- * Initialize internal cursor structure.
- */
-static void
-__bam_c_reset(cp)
- BTREE_CURSOR *cp;
-{
- cp->csp = cp->sp;
- cp->lock.off = LOCK_INVALID;
- cp->lock_mode = DB_LOCK_NG;
- cp->recno = RECNO_OOB;
- cp->order = INVALID_ORDER;
- cp->flags = 0;
-}
-
-/*
* __bam_c_init --
* Initialize the access private portion of a cursor
*
@@ -198,35 +193,26 @@ __bam_c_init(dbc, dbtype)
DBC *dbc;
DBTYPE dbtype;
{
- BTREE *t;
- BTREE_CURSOR *cp;
- DB *dbp;
+ DB_ENV *dbenv;
int ret;
- u_int32_t minkey;
- dbp = dbc->dbp;
+ dbenv = dbc->dbp->dbenv;
/* Allocate/initialize the internal structure. */
- if (dbc->internal == NULL) {
- if ((ret = __os_malloc(dbp->dbenv,
- sizeof(BTREE_CURSOR), NULL, &cp)) != 0)
- return (ret);
- dbc->internal = (DBC_INTERNAL *)cp;
-
- cp->sp = cp->csp = cp->stack;
- cp->esp = cp->stack + sizeof(cp->stack) / sizeof(cp->stack[0]);
- } else
- cp = (BTREE_CURSOR *)dbc->internal;
- __bam_c_reset(cp);
+ if (dbc->internal == NULL && (ret =
+ __os_malloc(dbenv, sizeof(BTREE_CURSOR), &dbc->internal)) != 0)
+ return (ret);
/* Initialize methods. */
dbc->c_close = __db_c_close;
dbc->c_count = __db_c_count;
dbc->c_del = __db_c_del;
dbc->c_dup = __db_c_dup;
- dbc->c_get = __db_c_get;
+ dbc->c_get = dbc->c_real_get = __db_c_get;
+ dbc->c_pget = __db_c_pget;
dbc->c_put = __db_c_put;
if (dbtype == DB_BTREE) {
+ dbc->c_am_bulk = __bam_bulk;
dbc->c_am_close = __bam_c_close;
dbc->c_am_del = __bam_c_del;
dbc->c_am_destroy = __bam_c_destroy;
@@ -234,6 +220,7 @@ __bam_c_init(dbc, dbtype)
dbc->c_am_put = __bam_c_put;
dbc->c_am_writelock = __bam_c_writelock;
} else {
+ dbc->c_am_bulk = __bam_bulk;
dbc->c_am_close = __bam_c_close;
dbc->c_am_del = __ram_c_del;
dbc->c_am_destroy = __bam_c_destroy;
@@ -242,18 +229,6 @@ __bam_c_init(dbc, dbtype)
dbc->c_am_writelock = __bam_c_writelock;
}
- /*
- * The btree leaf page data structures require that two key/data pairs
- * (or four items) fit on a page, but other than that there's no fixed
- * requirement. The btree off-page duplicates only require two items,
- * to be exact, but requiring four for them as well seems reasonable.
- *
- * Recno uses the btree bt_ovflsize value -- it's close enough.
- */
- t = dbp->bt_internal;
- minkey = F_ISSET(dbc, DBC_OPD) ? 2 : t->bt_minkey;
- cp->ovflsize = B_MINKEY_TO_OVFLSIZE(minkey, dbp->pgsize);
-
return (0);
}
@@ -267,12 +242,13 @@ int
__bam_c_refresh(dbc)
DBC *dbc;
{
+ BTREE *t;
BTREE_CURSOR *cp;
DB *dbp;
dbp = dbc->dbp;
+ t = dbp->bt_internal;
cp = (BTREE_CURSOR *)dbc->internal;
- __bam_c_reset(cp);
/*
* If our caller set the root page number, it's because the root was
@@ -280,11 +256,32 @@ __bam_c_refresh(dbc)
* pull it out of our internal information.
*/
if (cp->root == PGNO_INVALID)
- cp->root = ((BTREE *)dbp->bt_internal)->bt_root;
+ cp->root = t->bt_root;
+
+ LOCK_INIT(cp->lock);
+ cp->lock_mode = DB_LOCK_NG;
+
+ cp->sp = cp->csp = cp->stack;
+ cp->esp = cp->stack + sizeof(cp->stack) / sizeof(cp->stack[0]);
+
+ /*
+ * The btree leaf page data structures require that two key/data pairs
+ * (or four items) fit on a page, but other than that there's no fixed
+ * requirement. The btree off-page duplicates only require two items,
+ * to be exact, but requiring four for them as well seems reasonable.
+ *
+ * Recno uses the btree bt_ovflsize value -- it's close enough.
+ */
+ cp->ovflsize = B_MINKEY_TO_OVFLSIZE(
+ dbp, F_ISSET(dbc, DBC_OPD) ? 2 : t->bt_minkey, dbp->pgsize);
+
+ cp->recno = RECNO_OOB;
+ cp->order = INVALID_ORDER;
+ cp->flags = 0;
/* Initialize for record numbers. */
if (F_ISSET(dbc, DBC_OPD) ||
- dbc->dbtype == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM)) {
+ dbc->dbtype == DB_RECNO || F_ISSET(dbp, DB_AM_RECNUM)) {
F_SET(cp, C_RECNUM);
/*
@@ -293,7 +290,7 @@ __bam_c_refresh(dbc)
* mutable record numbers.
*/
if ((F_ISSET(dbc, DBC_OPD) && dbc->dbtype == DB_RECNO) ||
- F_ISSET(dbp, DB_BT_RECNUM | DB_RE_RENUMBER))
+ F_ISSET(dbp, DB_AM_RECNUM | DB_AM_RENUMBER))
F_SET(cp, C_RENUMBER);
}
@@ -313,11 +310,12 @@ __bam_c_close(dbc, root_pgno, rmroot)
BTREE_CURSOR *cp, *cp_opd, *cp_c;
DB *dbp;
DBC *dbc_opd, *dbc_c;
+ DB_MPOOLFILE *mpf;
PAGE *h;
- u_int32_t num;
int cdb_lock, ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
cp_opd = (dbc_opd = cp->opd) == NULL ?
NULL : (BTREE_CURSOR *)dbc_opd->internal;
@@ -408,10 +406,10 @@ __bam_c_close(dbc, root_pgno, rmroot)
* We will not have been provided a root page number. Acquire
* one from the primary database.
*/
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &h)) != 0)
+ if ((ret = mpf->get(mpf, &cp->pgno, 0, &h)) != 0)
goto err;
- root_pgno = GET_BOVERFLOW(h, cp->indx + O_INDX)->pgno;
- if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ root_pgno = GET_BOVERFLOW(dbp, h, cp->indx + O_INDX)->pgno;
+ if ((ret = mpf->put(mpf, h, 0)) != 0)
goto err;
dbc_c = dbc_opd;
@@ -453,18 +451,14 @@ lock: cp_c = (BTREE_CURSOR *)dbc_c->internal;
* info in __db_c_get--the OPD is also a WRITEDUP.
*/
if (CDB_LOCKING(dbp->dbenv)) {
- DB_ASSERT(!F_ISSET(dbc, DBC_OPD) || F_ISSET(dbc, DBC_WRITEDUP));
- if (!F_ISSET(dbc, DBC_WRITER)) {
- if ((ret =
- lock_get(dbp->dbenv, dbc->locker, DB_LOCK_UPGRADE,
+ if (F_ISSET(dbc, DBC_WRITEDUP | DBC_WRITECURSOR)) {
+ if ((ret = dbp->dbenv->lock_get(
+ dbp->dbenv, dbc->locker, DB_LOCK_UPGRADE,
&dbc->lock_dbt, DB_LOCK_WRITE, &dbc->mylock)) != 0)
goto err;
cdb_lock = 1;
}
-
- cp_c->lock.off = LOCK_INVALID;
- if ((ret =
- memp_fget(dbp->mpf, &cp_c->pgno, 0, &cp_c->page)) != 0)
+ if ((ret = mpf->get(mpf, &cp_c->pgno, 0, &cp_c->page)) != 0)
goto err;
goto delete;
@@ -480,9 +474,7 @@ lock: cp_c = (BTREE_CURSOR *)dbc_c->internal;
* is responsible for acquiring any necessary locks before calling us.
*/
if (F_ISSET(dbc, DBC_OPD)) {
- cp_c->lock.off = LOCK_INVALID;
- if ((ret =
- memp_fget(dbp->mpf, &cp_c->pgno, 0, &cp_c->page)) != 0)
+ if ((ret = mpf->get(mpf, &cp_c->pgno, 0, &cp_c->page)) != 0)
goto err;
goto delete;
}
@@ -542,13 +534,13 @@ delete: /*
* in that case. So, if the off-page duplicate tree is empty at this
* point, we want to remove it.
*/
- if ((ret = memp_fget(dbp->mpf, &root_pgno, 0, &h)) != 0)
+ if ((ret = mpf->get(mpf, &root_pgno, 0, &h)) != 0)
goto err;
- if ((num = NUM_ENT(h)) == 0) {
+ if (NUM_ENT(h) == 0) {
if ((ret = __db_free(dbc, h)) != 0)
goto err;
} else {
- if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ if ((ret = mpf->put(mpf, h, 0)) != 0)
goto err;
goto done;
}
@@ -566,8 +558,7 @@ delete: /*
* the primary page.
*/
if (dbc_opd != NULL) {
- cp->lock.off = LOCK_INVALID;
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0)
+ if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0)
goto err;
if ((ret = __bam_c_physdel(dbc)) != 0)
goto err;
@@ -604,7 +595,7 @@ __bam_c_destroy(dbc)
DBC *dbc;
{
/* Discard the structures. */
- __os_free(dbc->internal, sizeof(BTREE_CURSOR));
+ __os_free(dbc->dbp->dbenv, dbc->internal);
return (0);
}
@@ -622,11 +613,13 @@ __bam_c_count(dbc, recnop)
{
BTREE_CURSOR *cp;
DB *dbp;
+ DB_MPOOLFILE *mpf;
db_indx_t indx, top;
db_recno_t recno;
int ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
/*
@@ -637,7 +630,7 @@ __bam_c_count(dbc, recnop)
* new locks, we have to have a read lock to even get here.
*/
if (cp->opd == NULL) {
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0)
+ if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0)
return (ret);
/*
@@ -654,14 +647,14 @@ __bam_c_count(dbc, recnop)
break;
*recnop = recno;
} else {
- if ((ret = memp_fget(dbp->mpf,
- &cp->opd->internal->root, 0, &cp->page)) != 0)
+ if ((ret =
+ mpf->get(mpf, &cp->opd->internal->root, 0, &cp->page)) != 0)
return (ret);
*recnop = RE_NREC(cp->page);
}
- ret = memp_fput(dbp->mpf, cp->page, 0);
+ ret = mpf->put(mpf, cp->page, 0);
cp->page = NULL;
return (ret);
@@ -677,9 +670,11 @@ __bam_c_del(dbc)
{
BTREE_CURSOR *cp;
DB *dbp;
+ DB_MPOOLFILE *mpf;
int ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
ret = 0;
@@ -706,25 +701,27 @@ __bam_c_del(dbc)
goto err;
cp->page = cp->csp->page;
} else {
- ACQUIRE_CUR(dbc, DB_LOCK_WRITE, ret);
+ ACQUIRE_CUR(dbc, DB_LOCK_WRITE, cp->pgno, ret);
if (ret != 0)
goto err;
}
/* Log the change. */
- if (DB_LOGGING(dbc) &&
- (ret = __bam_cdel_log(dbp->dbenv, dbc->txn, &LSN(cp->page), 0,
- dbp->log_fileid, PGNO(cp->page), &LSN(cp->page), cp->indx)) != 0)
- goto err;
+ if (DBC_LOGGING(dbc)) {
+ if ((ret = __bam_cdel_log(dbp, dbc->txn, &LSN(cp->page), 0,
+ PGNO(cp->page), &LSN(cp->page), cp->indx)) != 0)
+ goto err;
+ } else
+ LSN_NOT_LOGGED(LSN(cp->page));
/* Set the intent-to-delete flag on the page. */
if (TYPE(cp->page) == P_LBTREE)
- B_DSET(GET_BKEYDATA(cp->page, cp->indx + O_INDX)->type);
+ B_DSET(GET_BKEYDATA(dbp, cp->page, cp->indx + O_INDX)->type);
else
- B_DSET(GET_BKEYDATA(cp->page, cp->indx)->type);
+ B_DSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type);
/* Mark the page dirty. */
- ret = memp_fset(dbp->mpf, cp->page, DB_MPOOL_DIRTY);
+ ret = mpf->set(mpf, cp->page, DB_MPOOL_DIRTY);
err: /*
* If we've been successful so far and the tree has record numbers,
@@ -736,7 +733,7 @@ err: /*
(void)__bam_stkrel(dbc, 0);
} else
if (cp->page != NULL &&
- (t_ret = memp_fput(dbp->mpf, cp->page, 0)) != 0 && ret == 0)
+ (t_ret = mpf->put(mpf, cp->page, 0)) != 0 && ret == 0)
ret = t_ret;
cp->page = NULL;
@@ -771,7 +768,7 @@ __bam_c_dup(orig_dbc, new_dbc)
* holding inside a transaction because all the locks are retained
* until the transaction commits or aborts.
*/
- if (orig->lock.off != LOCK_INVALID && orig_dbc->txn == NULL) {
+ if (LOCK_ISSET(orig->lock) && orig_dbc->txn == NULL) {
if ((ret = __db_lget(new_dbc,
0, new->pgno, new->lock_mode, 0, &new->lock)) != 0)
return (ret);
@@ -796,11 +793,13 @@ __bam_c_get(dbc, key, data, flags, pgnop)
{
BTREE_CURSOR *cp;
DB *dbp;
+ DB_MPOOLFILE *mpf;
db_pgno_t orig_pgno;
db_indx_t orig_indx;
int exact, newopd, ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
orig_pgno = cp->pgno;
orig_indx = cp->indx;
@@ -820,7 +819,7 @@ __bam_c_get(dbc, key, data, flags, pgnop)
* write lock, but upgrading to a write lock has no better
* chance of succeeding now instead of later, so don't try.
*/
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0)
+ if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0)
goto err;
break;
case DB_FIRST:
@@ -829,9 +828,10 @@ __bam_c_get(dbc, key, data, flags, pgnop)
goto err;
break;
case DB_GET_BOTH:
+ case DB_GET_BOTH_RANGE:
/*
* There are two ways to get here based on DBcursor->c_get
- * with the DB_GET_BOTH flag set:
+ * with the DB_GET_BOTH/DB_GET_BOTH_RANGE flags set:
*
* 1. Searching a sorted off-page duplicate tree: do a tree
* search.
@@ -839,20 +839,34 @@ __bam_c_get(dbc, key, data, flags, pgnop)
* 2. Searching btree: do a tree search. If it returns a
* reference to off-page duplicate tree, return immediately
* and let our caller deal with it. If the search doesn't
- * return a reference to off-page duplicate tree, start an
- * on-page search.
+ * return a reference to off-page duplicate tree, continue
+ * with an on-page search.
*/
if (F_ISSET(dbc, DBC_OPD)) {
if ((ret = __bam_c_search(
- dbc, data, DB_GET_BOTH, &exact)) != 0)
- goto err;
- if (!exact) {
- ret = DB_NOTFOUND;
+ dbc, PGNO_INVALID, data, flags, &exact)) != 0)
goto err;
+ if (flags == DB_GET_BOTH) {
+ if (!exact) {
+ ret = DB_NOTFOUND;
+ goto err;
+ }
+ break;
}
+
+ /*
+ * We didn't require an exact match, so the search may
+ * may have returned an entry past the end of the page,
+ * or we may be referencing a deleted record. If so,
+ * move to the next entry.
+ */
+ if ((cp->indx == NUM_ENT(cp->page) ||
+ IS_CUR_DELETED(dbc)) &&
+ (ret = __bam_c_next(dbc, 1, 0)) != 0)
+ goto err;
} else {
if ((ret = __bam_c_search(
- dbc, key, DB_GET_BOTH, &exact)) != 0)
+ dbc, PGNO_INVALID, key, flags, &exact)) != 0)
return (ret);
if (!exact) {
ret = DB_NOTFOUND;
@@ -863,7 +877,8 @@ __bam_c_get(dbc, key, data, flags, pgnop)
newopd = 1;
break;
}
- if ((ret = __bam_getboth_finddatum(dbc, data)) != 0)
+ if ((ret =
+ __bam_getboth_finddatum(dbc, data, flags)) != 0)
goto err;
}
break;
@@ -882,11 +897,11 @@ __bam_c_get(dbc, key, data, flags, pgnop)
if ((ret = __bam_c_first(dbc)) != 0)
goto err;
} else
- if ((ret = __bam_c_next(dbc, 1)) != 0)
+ if ((ret = __bam_c_next(dbc, 1, 0)) != 0)
goto err;
break;
case DB_NEXT_DUP:
- if ((ret = __bam_c_next(dbc, 1)) != 0)
+ if ((ret = __bam_c_next(dbc, 1, 0)) != 0)
goto err;
if (!IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx)) {
ret = DB_NOTFOUND;
@@ -900,7 +915,7 @@ __bam_c_get(dbc, key, data, flags, pgnop)
goto err;
} else
do {
- if ((ret = __bam_c_next(dbc, 1)) != 0)
+ if ((ret = __bam_c_next(dbc, 1, 0)) != 0)
goto err;
} while (IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx));
break;
@@ -927,12 +942,14 @@ __bam_c_get(dbc, key, data, flags, pgnop)
case DB_SET:
case DB_SET_RECNO:
newopd = 1;
- if ((ret = __bam_c_search(dbc, key, flags, &exact)) != 0)
+ if ((ret = __bam_c_search(dbc,
+ PGNO_INVALID, key, flags, &exact)) != 0)
goto err;
break;
case DB_SET_RANGE:
newopd = 1;
- if ((ret = __bam_c_search(dbc, key, flags, &exact)) != 0)
+ if ((ret = __bam_c_search(dbc,
+ PGNO_INVALID, key, flags, &exact)) != 0)
goto err;
/*
@@ -942,7 +959,7 @@ __bam_c_get(dbc, key, data, flags, pgnop)
* the next entry.
*/
if (cp->indx == NUM_ENT(cp->page) || IS_CUR_DELETED(dbc))
- if ((ret = __bam_c_next(dbc, 0)) != 0)
+ if ((ret = __bam_c_next(dbc, 0, 0)) != 0)
goto err;
break;
default:
@@ -957,8 +974,15 @@ __bam_c_get(dbc, key, data, flags, pgnop)
if (newopd && pgnop != NULL)
(void)__bam_isopd(dbc, pgnop);
- /* Don't return the key, it was passed to us */
- if (flags == DB_SET)
+ /*
+ * Don't return the key, it was passed to us (this is true even if the
+ * application defines a compare function returning equality for more
+ * than one key value, since in that case which actual value we store
+ * in the database is undefined -- and particularly true in the case of
+ * duplicates where we only store one key value).
+ */
+ if (flags == DB_GET_BOTH ||
+ flags == DB_GET_BOTH_RANGE || flags == DB_SET)
F_SET(key, DB_DBT_ISSET);
err: /*
@@ -966,13 +990,596 @@ err: /*
* moved, clear the delete flag, DBcursor->c_get never references
* a deleted key, if it moved at all.
*/
- if (F_ISSET(cp, C_DELETED)
- && (cp->pgno != orig_pgno || cp->indx != orig_indx))
+ if (F_ISSET(cp, C_DELETED) &&
+ (cp->pgno != orig_pgno || cp->indx != orig_indx))
F_CLR(cp, C_DELETED);
return (ret);
}
+static int
+__bam_get_prev(dbc)
+ DBC *dbc;
+{
+ BTREE_CURSOR *cp;
+ DBT key, data;
+ db_pgno_t pgno;
+ int ret;
+
+ if ((ret = __bam_c_prev(dbc)) != 0)
+ return (ret);
+
+ if (__bam_isopd(dbc, &pgno)) {
+ cp = (BTREE_CURSOR *)dbc->internal;
+ if ((ret = __db_c_newopd(dbc, pgno, cp->opd, &cp->opd)) != 0)
+ return (ret);
+ if ((ret = cp->opd->c_am_get(cp->opd,
+ &key, &data, DB_LAST, NULL)) != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+/*
+ * __bam_bulk -- Return bulk data from a btree.
+ */
+static int
+__bam_bulk(dbc, data, flags)
+ DBC *dbc;
+ DBT *data;
+ u_int32_t flags;
+{
+ BKEYDATA *bk;
+ BOVERFLOW *bo;
+ BTREE_CURSOR *cp;
+ PAGE *pg;
+ db_indx_t *inp, indx, pg_keyoff;
+ int32_t *endp, key_off, *offp, *saveoffp;
+ u_int8_t *dbuf, *dp, *np;
+ u_int32_t key_size, size, space;
+ int adj, is_key, need_pg, next_key, no_dup;
+ int pagesize, rec_key, ret;
+
+ ret = 0;
+ key_off = 0;
+ size = 0;
+ pagesize = dbc->dbp->pgsize;
+ cp = (BTREE_CURSOR *)dbc->internal;
+
+ /*
+ * dp tracks the beginging of the page in the buffer.
+ * np is the next place to copy things into the buffer.
+ * dbuf always stays at the beging of the buffer.
+ */
+ dbuf = data->data;
+ np = dp = dbuf;
+
+ /* Keep track of space that is left. There is a termination entry */
+ space = data->ulen;
+ space -= sizeof(*offp);
+
+ /* Build the offset/size table from the end up. */
+ endp = (int32_t *)((u_int8_t *)dbuf + data->ulen);
+ endp--;
+ offp = endp;
+
+ key_size = 0;
+
+ /*
+ * Distinguish between BTREE and RECNO.
+ * There are no keys in RECNO. If MULTIPLE_KEY is specified
+ * then we return the record numbers.
+ * is_key indicates that multiple btree keys are returned.
+ * rec_key is set if we are returning record numbers.
+ * next_key is set if we are going after the next key rather than dup.
+ */
+ if (dbc->dbtype == DB_BTREE) {
+ is_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1: 0;
+ rec_key = 0;
+ next_key = is_key && LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP;
+ adj = 2;
+ } else {
+ is_key = 0;
+ rec_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1 : 0;
+ next_key = LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP;
+ adj = 1;
+ }
+ no_dup = LF_ISSET(DB_OPFLAGS_MASK) == DB_NEXT_NODUP;
+
+next_pg:
+ indx = cp->indx;
+ pg = cp->page;
+
+ inp = P_INP(dbc->dbp, pg);
+ /* The current page is not yet in the buffer. */
+ need_pg = 1;
+
+ /*
+ * Keep track of the offset of the current key on the page.
+ * If we are returning keys, set it to 0 first so we force
+ * the copy of the key to the buffer.
+ */
+ pg_keyoff = 0;
+ if (is_key == 0)
+ pg_keyoff = inp[indx];
+
+ do {
+ if (IS_DELETED(dbc->dbp, pg, indx)) {
+ if (dbc->dbtype != DB_RECNO)
+ continue;
+
+ cp->recno++;
+ /*
+ * If we are not returning recnos then we
+ * need to fill in every slot so the user
+ * can calculate the record numbers.
+ */
+ if (rec_key != 0)
+ continue;
+
+ space -= 2 * sizeof(*offp);
+ /* Check if space as underflowed. */
+ if (space > data->ulen)
+ goto back_up;
+
+ /* Just mark the empty recno slots. */
+ *offp-- = 0;
+ *offp-- = 0;
+ continue;
+ }
+
+ /*
+ * Check to see if we have a new key.
+ * If so, then see if we need to put the
+ * key on the page. If its already there
+ * then we just point to it.
+ */
+ if (is_key && pg_keyoff != inp[indx]) {
+ bk = GET_BKEYDATA(dbc->dbp, pg, indx);
+ if (B_TYPE(bk->type) == B_OVERFLOW) {
+ bo = (BOVERFLOW *)bk;
+ size = key_size = bo->tlen;
+ if (key_size > space)
+ goto get_key_space;
+ if ((ret = __bam_bulk_overflow(dbc,
+ bo->tlen, bo->pgno, np)) != 0)
+ return (ret);
+ space -= key_size;
+ key_off = (int32_t)(np - dbuf);
+ np += key_size;
+ } else {
+ if (need_pg) {
+ dp = np;
+ size = pagesize - HOFFSET(pg);
+ if (space < size) {
+get_key_space:
+ /* Nothing added, then error. */
+ if (offp == endp) {
+ data->size =
+ ALIGN(size +
+ pagesize,
+ sizeof(u_int32_t));
+ return (ENOMEM);
+ }
+ /*
+ * We need to back up to the
+ * last record put into the
+ * buffer so that it is
+ * CURRENT.
+ */
+ if (indx != 0)
+ indx -= P_INDX;
+ else {
+ if ((ret =
+ __bam_get_prev(
+ dbc)) != 0)
+ return (ret);
+ indx = cp->indx;
+ pg = cp->page;
+ }
+ break;
+ }
+ /*
+ * Move the data part of the page
+ * to the buffer.
+ */
+ memcpy(dp,
+ (u_int8_t *)pg + HOFFSET(pg), size);
+ need_pg = 0;
+ space -= size;
+ np += size;
+ }
+ key_size = bk->len;
+ key_off = (int32_t)(inp[indx] - HOFFSET(pg)
+ + dp - dbuf + SSZA(BKEYDATA, data));
+ pg_keyoff = inp[indx];
+ }
+ }
+
+ /*
+ * Reserve space for the pointers and sizes.
+ * Either key/data pair or just for a data item.
+ */
+ space -= (is_key ? 4 : 2) * sizeof(*offp);
+ if (rec_key)
+ space -= sizeof(*offp);
+
+ /* Check to see if space has underflowed. */
+ if (space > data->ulen)
+ goto back_up;
+
+ /*
+ * Determine if the next record is in the
+ * buffer already or if it needs to be copied in.
+ * If we have an off page dup, then copy as many
+ * as will fit into the buffer.
+ */
+ bk = GET_BKEYDATA(dbc->dbp, pg, indx + adj - 1);
+ if (B_TYPE(bk->type) == B_DUPLICATE) {
+ bo = (BOVERFLOW *)bk;
+ if (is_key) {
+ *offp-- = key_off;
+ *offp-- = key_size;
+ }
+ /*
+ * We pass the offset of the current key.
+ * On return we check to see if offp has
+ * moved to see if any data fit.
+ */
+ saveoffp = offp;
+ if ((ret = __bam_bulk_duplicates(dbc, bo->pgno,
+ dbuf, is_key ? offp + P_INDX : NULL,
+ &offp, &np, &space, no_dup)) != 0) {
+ if (ret == ENOMEM) {
+ size = space;
+ /* If nothing was added, then error. */
+ if (offp == saveoffp) {
+ offp += 2;
+ goto back_up;
+ }
+ goto get_space;
+ }
+ return (ret);
+ }
+ } else if (B_TYPE(bk->type) == B_OVERFLOW) {
+ bo = (BOVERFLOW *)bk;
+ size = bo->tlen;
+ if (size > space)
+ goto back_up;
+ if ((ret =
+ __bam_bulk_overflow(dbc,
+ bo->tlen, bo->pgno, np)) != 0)
+ return (ret);
+ space -= size;
+ if (is_key) {
+ *offp-- = key_off;
+ *offp-- = key_size;
+ } else if (rec_key)
+ *offp-- = cp->recno;
+ *offp-- = (int32_t)(np - dbuf);
+ np += size;
+ *offp-- = size;
+ } else {
+ if (need_pg) {
+ dp = np;
+ size = pagesize - HOFFSET(pg);
+ if (space < size) {
+back_up:
+ /*
+ * Back up the index so that the
+ * last record in the buffer is CURRENT
+ */
+ if (indx >= adj)
+ indx -= adj;
+ else {
+ if ((ret =
+ __bam_get_prev(dbc)) != 0 &&
+ ret != DB_NOTFOUND)
+ return (ret);
+ indx = cp->indx;
+ pg = cp->page;
+ }
+ if (dbc->dbtype == DB_RECNO)
+ cp->recno--;
+get_space:
+ /*
+ * See if we put anything in the
+ * buffer or if we are doing a DBP->get
+ * did we get all of the data.
+ */
+ if (offp >=
+ (is_key ? &endp[-1] : endp) ||
+ F_ISSET(dbc, DBC_TRANSIENT)) {
+ data->size = ALIGN(size +
+ data->ulen - space,
+ sizeof(u_int32_t));
+ return (ENOMEM);
+ }
+ break;
+ }
+ memcpy(dp, (u_int8_t *)pg + HOFFSET(pg), size);
+ need_pg = 0;
+ space -= size;
+ np += size;
+ }
+ /*
+ * Add the offsets and sizes to the end of the buffer.
+ * First add the key info then the data info.
+ */
+ if (is_key) {
+ *offp-- = key_off;
+ *offp-- = key_size;
+ } else if (rec_key)
+ *offp-- = cp->recno;
+ *offp-- = (int32_t)(inp[indx + adj - 1] - HOFFSET(pg)
+ + dp - dbuf + SSZA(BKEYDATA, data));
+ *offp-- = bk->len;
+ }
+ if (dbc->dbtype == DB_RECNO)
+ cp->recno++;
+ else if (no_dup) {
+ while (indx + adj < NUM_ENT(pg) &&
+ pg_keyoff == inp[indx + adj])
+ indx += adj;
+ }
+ /*
+ * Stop when we either run off the page or we
+ * move to the next key and we are not returning mulitple keys.
+ */
+ } while ((indx += adj) < NUM_ENT(pg) &&
+ (next_key || pg_keyoff == inp[indx]));
+
+ /* If we are off the page then try to the next page. */
+ if (ret == 0 && next_key && indx >= NUM_ENT(pg)) {
+ cp->indx = indx;
+ ret = __bam_c_next(dbc, 0, 1);
+ if (ret == 0)
+ goto next_pg;
+ if (ret != DB_NOTFOUND)
+ return (ret);
+ }
+
+ /*
+ * If we did a DBP->get we must error if we did not return
+ * all the data for the current key because there is
+ * no way to know if we did not get it all, nor any
+ * interface to fetch the balance.
+ */
+
+ if (ret == 0 &&
+ F_ISSET(dbc, DBC_TRANSIENT) && pg_keyoff == inp[indx]) {
+ data->size = (data->ulen - space) + size;
+ return (ENOMEM);
+ }
+ /*
+ * Must leave the index pointing at the last record fetched.
+ * If we are not fetching keys, we may have stepped to the
+ * next key.
+ */
+ if (next_key || pg_keyoff == inp[indx])
+ cp->indx = indx;
+ else
+ cp->indx = indx - P_INDX;
+
+ if (rec_key == 1)
+ *offp = (u_int32_t) RECNO_OOB;
+ else
+ *offp = (u_int32_t) -1;
+ return (0);
+}
+
+/*
+ * __bam_bulk_overflow --
+ * Dump overflow record into the buffer.
+ * The space requirements have already been checked.
+ * PUBLIC: int __bam_bulk_overflow
+ * PUBLIC: __P((DBC *, u_int32_t, db_pgno_t, u_int8_t *));
+ */
+int
+__bam_bulk_overflow(dbc, len, pgno, dp)
+ DBC *dbc;
+ u_int32_t len;
+ db_pgno_t pgno;
+ u_int8_t *dp;
+{
+ DBT dbt;
+
+ memset(&dbt, 0, sizeof(dbt));
+ F_SET(&dbt, DB_DBT_USERMEM);
+ dbt.ulen = len;
+ dbt.data = (void *)dp;
+ return (__db_goff(dbc->dbp, &dbt, len, pgno, NULL, NULL));
+}
+
+/*
+ * __bam_bulk_duplicates --
+ * Put as many off page duplicates as will fit into the buffer.
+ * This routine will adjust the cursor to reflect the position in
+ * the overflow tree.
+ * PUBLIC: int __bam_bulk_duplicates __P((DBC *,
+ * PUBLIC: db_pgno_t, u_int8_t *, int32_t *,
+ * PUBLIC: int32_t **, u_int8_t **, u_int32_t *, int));
+ */
+int
+__bam_bulk_duplicates(dbc, pgno, dbuf, keyoff, offpp, dpp, spacep, no_dup)
+ DBC *dbc;
+ db_pgno_t pgno;
+ u_int8_t *dbuf;
+ int32_t *keyoff, **offpp;
+ u_int8_t **dpp;
+ u_int32_t *spacep;
+ int no_dup;
+{
+ DB *dbp;
+ BKEYDATA *bk;
+ BOVERFLOW *bo;
+ BTREE_CURSOR *cp;
+ DBC *opd;
+ DBT key, data;
+ PAGE *pg;
+ db_indx_t indx, *inp;
+ int32_t *offp;
+ u_int32_t size, space;
+ u_int8_t *dp, *np;
+ int first, need_pg, pagesize, ret, t_ret;
+
+ ret = 0;
+
+ dbp = dbc->dbp;
+ cp = (BTREE_CURSOR *)dbc->internal;
+ opd = cp->opd;
+
+ if (opd == NULL) {
+ if ((ret = __db_c_newopd(dbc, pgno, NULL, &opd)) != 0)
+ return (ret);
+ cp->opd = opd;
+ if ((ret = opd->c_am_get(opd,
+ &key, &data, DB_FIRST, NULL)) != 0)
+ return (ret);
+ }
+
+ pagesize = opd->dbp->pgsize;
+ cp = (BTREE_CURSOR *)opd->internal;
+ space = *spacep;
+ /* Get current offset slot. */
+ offp = *offpp;
+
+ /*
+ * np is the next place to put data.
+ * dp is the begining of the current page in the buffer.
+ */
+ np = dp = *dpp;
+ first = 1;
+ indx = cp->indx;
+
+ do {
+ /* Fetch the current record. No initial move. */
+ if ((ret = __bam_c_next(opd, 0, 0)) != 0)
+ break;
+ pg = cp->page;
+ indx = cp->indx;
+ inp = P_INP(dbp, pg);
+ /* We need to copy the page to the buffer. */
+ need_pg = 1;
+
+ do {
+ if (IS_DELETED(dbp, pg, indx))
+ goto contin;
+ bk = GET_BKEYDATA(dbp, pg, indx);
+ space -= 2 * sizeof(*offp);
+ /* Allocate space for key if needed. */
+ if (first == 0 && keyoff != NULL)
+ space -= 2 * sizeof(*offp);
+
+ /* Did space underflow? */
+ if (space > *spacep) {
+ ret = ENOMEM;
+ if (first == 1) {
+ space = *spacep + -(int32_t)space;
+ if (need_pg)
+ space += pagesize - HOFFSET(pg);
+ }
+ break;
+ }
+ if (B_TYPE(bk->type) == B_OVERFLOW) {
+ bo = (BOVERFLOW *)bk;
+ size = bo->tlen;
+ if (size > space) {
+ ret = ENOMEM;
+ if (first == 1) {
+ space = *spacep + size;
+ }
+ break;
+ }
+ if (first == 0 && keyoff != NULL) {
+ *offp-- = keyoff[0];
+ *offp-- = keyoff[-1];
+ }
+ if ((ret = __bam_bulk_overflow(dbc,
+ bo->tlen, bo->pgno, np)) != 0)
+ return (ret);
+ space -= size;
+ *offp-- = (int32_t)(np - dbuf);
+ np += size;
+ } else {
+ if (need_pg) {
+ dp = np;
+ size = pagesize - HOFFSET(pg);
+ if (space < size) {
+ ret = ENOMEM;
+ /* Return space required. */
+ if (first == 1) {
+ space = *spacep + size;
+ }
+ break;
+ }
+ memcpy(dp,
+ (u_int8_t *)pg + HOFFSET(pg), size);
+ need_pg = 0;
+ space -= size;
+ np += size;
+ }
+ if (first == 0 && keyoff != NULL) {
+ *offp-- = keyoff[0];
+ *offp-- = keyoff[-1];
+ }
+ size = bk->len;
+ *offp-- = (int32_t)(inp[indx] - HOFFSET(pg)
+ + dp - dbuf + SSZA(BKEYDATA, data));
+ }
+ *offp-- = size;
+ first = 0;
+ if (no_dup)
+ break;
+contin:
+ indx++;
+ if (opd->dbtype == DB_RECNO)
+ cp->recno++;
+ } while (indx < NUM_ENT(pg));
+ if (no_dup)
+ break;
+ cp->indx = indx;
+
+ } while (ret == 0);
+
+ /* Return the updated information. */
+ *spacep = space;
+ *offpp = offp;
+ *dpp = np;
+
+ /*
+ * If we ran out of space back up the pointer.
+ * If we did not return any dups or reached the end, close the opd.
+ */
+ if (ret == ENOMEM) {
+ if (opd->dbtype == DB_RECNO) {
+ if (--cp->recno == 0)
+ goto close_opd;
+ } else if (indx != 0)
+ cp->indx--;
+ else {
+ t_ret = __bam_c_prev(opd);
+ if (t_ret == DB_NOTFOUND)
+ goto close_opd;
+ if (t_ret != 0)
+ ret = t_ret;
+ }
+ } else if (keyoff == NULL && ret == DB_NOTFOUND) {
+ cp->indx--;
+ if (opd->dbtype == DB_RECNO)
+ --cp->recno;
+ } else if (indx == 0 || ret == DB_NOTFOUND) {
+close_opd:
+ opd->c_close(opd);
+ ((BTREE_CURSOR *)dbc->internal)->opd = NULL;
+ }
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+
+ return (ret);
+}
+
/*
* __bam_getbothc --
* Search for a matching data item on a join.
@@ -984,9 +1591,11 @@ __bam_getbothc(dbc, data)
{
BTREE_CURSOR *cp;
DB *dbp;
+ DB_MPOOLFILE *mpf;
int cmp, exact, ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
/*
@@ -995,7 +1604,7 @@ __bam_getbothc(dbc, data)
* write lock, but upgrading to a write lock has no better
* chance of succeeding now instead of later, so don't try.
*/
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0)
+ if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0)
return (ret);
/*
@@ -1017,11 +1626,12 @@ __bam_getbothc(dbc, data)
return (DB_NOTFOUND);
/* Discard the current page, we're going to do a full search. */
- if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0)
+ if ((ret = mpf->put(mpf, cp->page, 0)) != 0)
return (ret);
cp->page = NULL;
- return (__bam_c_search(dbc, data, DB_GET_BOTH, &exact));
+ return (__bam_c_search(dbc,
+ PGNO_INVALID, data, DB_GET_BOTH, &exact));
}
/*
@@ -1038,7 +1648,7 @@ __bam_getbothc(dbc, data)
return (DB_NOTFOUND);
cp->indx += P_INDX;
- return (__bam_getboth_finddatum(dbc, data));
+ return (__bam_getboth_finddatum(dbc, data, DB_GET_BOTH));
}
/*
@@ -1046,9 +1656,10 @@ __bam_getbothc(dbc, data)
* Find a matching on-page data item.
*/
static int
-__bam_getboth_finddatum(dbc, data)
+__bam_getboth_finddatum(dbc, data, flags)
DBC *dbc;
DBT *data;
+ u_int32_t flags;
{
BTREE_CURSOR *cp;
DB *dbp;
@@ -1060,17 +1671,14 @@ __bam_getboth_finddatum(dbc, data)
/*
* Called (sometimes indirectly) from DBC->get to search on-page data
- * item(s) for a matching value. If the original flag was DB_GET_BOTH,
- * the cursor argument is set to the first data item for the key. If
- * the original flag was DB_GET_BOTHC, the cursor argument is set to
- * the first data item that we can potentially return. In both cases,
- * there may or may not be additional duplicate data items to search.
+ * item(s) for a matching value. If the original flag was DB_GET_BOTH
+ * or DB_GET_BOTH_RANGE, the cursor is set to the first undeleted data
+ * item for the key. If the original flag was DB_GET_BOTHC, the cursor
+ * argument is set to the first data item we can potentially return.
+ * In both cases, there may or may not be additional duplicate data
+ * items to search.
*
* If the duplicates are not sorted, do a linear search.
- *
- * If the duplicates are sorted, do a binary search. The reason for
- * this is that large pages and small key/data pairs result in large
- * numbers of on-page duplicates before they get pushed off-page.
*/
if (dbp->dup_compare == NULL) {
for (;; cp->indx += P_INDX) {
@@ -1085,41 +1693,62 @@ __bam_getboth_finddatum(dbc, data)
!IS_DUPLICATE(dbc, cp->indx, cp->indx + P_INDX))
break;
}
- } else {
- /*
- * Find the top and bottom of the duplicate set. Binary search
- * requires at least two items, don't loop if there's only one.
- */
- for (base = top = cp->indx;
- top < NUM_ENT(cp->page); top += P_INDX)
- if (!IS_DUPLICATE(dbc, cp->indx, top))
- break;
- if (base == (top - P_INDX)) {
- if ((ret = __bam_cmp(dbp, data,
- cp->page, cp->indx + O_INDX,
- dbp->dup_compare, &cmp)) != 0)
- return (ret);
- return (cmp == 0 ? 0 : DB_NOTFOUND);
- }
+ return (DB_NOTFOUND);
+ }
- for (lim =
- (top - base) / (db_indx_t)P_INDX; lim != 0; lim >>= 1) {
- cp->indx = base + ((lim >> 1) * P_INDX);
- if ((ret = __bam_cmp(dbp, data, cp->page,
- cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0)
- return (ret);
- if (cmp == 0) {
- if (!IS_CUR_DELETED(dbc))
- return (0);
- break;
- }
- if (cmp > 0) {
- base = cp->indx + P_INDX;
- --lim;
- }
+ /*
+ * If the duplicates are sorted, do a binary search. The reason for
+ * this is that large pages and small key/data pairs result in large
+ * numbers of on-page duplicates before they get pushed off-page.
+ *
+ * Find the top and bottom of the duplicate set. Binary search
+ * requires at least two items, don't loop if there's only one.
+ */
+ for (base = top = cp->indx; top < NUM_ENT(cp->page); top += P_INDX)
+ if (!IS_DUPLICATE(dbc, cp->indx, top))
+ break;
+ if (base == (top - P_INDX)) {
+ if ((ret = __bam_cmp(dbp, data,
+ cp->page, cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0)
+ return (ret);
+ return (cmp == 0 ||
+ (cmp < 0 && flags == DB_GET_BOTH_RANGE) ? 0 : DB_NOTFOUND);
+ }
+
+ for (lim = (top - base) / (db_indx_t)P_INDX; lim != 0; lim >>= 1) {
+ cp->indx = base + ((lim >> 1) * P_INDX);
+ if ((ret = __bam_cmp(dbp, data, cp->page,
+ cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0)
+ return (ret);
+ if (cmp == 0) {
+ /*
+ * XXX
+ * No duplicate duplicates in sorted duplicate sets,
+ * so there can be only one.
+ */
+ if (!IS_CUR_DELETED(dbc))
+ return (0);
+ break;
+ }
+ if (cmp > 0) {
+ base = cp->indx + P_INDX;
+ --lim;
}
}
- return (DB_NOTFOUND);
+
+ /* No match found; if we're looking for an exact match, we're done. */
+ if (flags == DB_GET_BOTH)
+ return (DB_NOTFOUND);
+
+ /*
+ * Base is the smallest index greater than the data item, may be zero
+ * or a last + O_INDX index, and may be deleted. Find an undeleted
+ * item.
+ */
+ cp->indx = base;
+ while (cp->indx < top && IS_CUR_DELETED(dbc))
+ cp->indx += P_INDX;
+ return (cp->indx < top ? 0 : DB_NOTFOUND);
}
/*
@@ -1136,19 +1765,22 @@ __bam_c_put(dbc, key, data, flags, pgnop)
BTREE_CURSOR *cp;
DB *dbp;
DBT dbt;
+ DB_MPOOLFILE *mpf;
+ db_pgno_t root_pgno;
u_int32_t iiop;
- int cmp, exact, needkey, ret, stack;
+ int cmp, exact, ret, stack;
void *arg;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
+ root_pgno = cp->root;
-split: needkey = ret = stack = 0;
+split: ret = stack = 0;
switch (flags) {
case DB_AFTER:
case DB_BEFORE:
case DB_CURRENT:
- needkey = 1;
iiop = flags;
/*
@@ -1182,7 +1814,7 @@ split: needkey = ret = stack = 0;
ACQUIRE_WRITE_LOCK(dbc, ret);
if (ret != 0)
goto err;
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0)
+ if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0)
goto err;
break;
case DB_KEYFIRST:
@@ -1192,15 +1824,22 @@ split: needkey = ret = stack = 0;
* Searching off-page, sorted duplicate tree: do a tree search
* for the correct item; __bam_c_search returns the smallest
* slot greater than the key, use it.
+ *
+ * See comment below regarding where we can start the search.
*/
if (F_ISSET(dbc, DBC_OPD)) {
- if ((ret =
- __bam_c_search(dbc, data, flags, &exact)) != 0)
+ if ((ret = __bam_c_search(dbc,
+ F_ISSET(cp, C_RECNUM) ? cp->root : root_pgno,
+ data, flags, &exact)) != 0)
goto err;
stack = 1;
/* Disallow "sorted" duplicate duplicates. */
if (exact) {
+ if (IS_DELETED(dbp, cp->page, cp->indx)) {
+ iiop = DB_CURRENT;
+ break;
+ }
ret = __db_duperr(dbp, flags);
goto err;
}
@@ -1208,8 +1847,17 @@ split: needkey = ret = stack = 0;
break;
}
- /* Searching a btree. */
- if ((ret = __bam_c_search(dbc, key,
+ /*
+ * Searching a btree.
+ *
+ * If we've done a split, we can start the search from the
+ * parent of the split page, which __bam_split returned
+ * for us in root_pgno, unless we're in a Btree with record
+ * numbering. In that case, we'll need the true root page
+ * in order to adjust the record count.
+ */
+ if ((ret = __bam_c_search(dbc,
+ F_ISSET(cp, C_RECNUM) ? cp->root : root_pgno, key,
flags == DB_KEYFIRST || dbp->dup_compare != NULL ?
DB_KEYFIRST : DB_KEYLAST, &exact)) != 0)
goto err;
@@ -1264,8 +1912,8 @@ split: needkey = ret = stack = 0;
*/
for (;; cp->indx += P_INDX) {
if ((ret = __bam_cmp(dbp, data, cp->page,
- cp->indx + O_INDX, dbp->dup_compare, &cmp)) !=0)
- return (ret);
+ cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0)
+ goto err;
if (cmp < 0) {
iiop = DB_BEFORE;
break;
@@ -1273,7 +1921,7 @@ split: needkey = ret = stack = 0;
/* Disallow "sorted" duplicate duplicates. */
if (cmp == 0) {
- if (IS_DELETED(cp->page, cp->indx)) {
+ if (IS_DELETED(dbp, cp->page, cp->indx)) {
iiop = DB_CURRENT;
break;
}
@@ -1282,8 +1930,8 @@ split: needkey = ret = stack = 0;
}
if (cp->indx + P_INDX >= NUM_ENT(cp->page) ||
- ((PAGE *)cp->page)->inp[cp->indx] !=
- ((PAGE *)cp->page)->inp[cp->indx + P_INDX]) {
+ P_INP(dbp, ((PAGE *)cp->page))[cp->indx] !=
+ P_INP(dbp, ((PAGE *)cp->page))[cp->indx + P_INDX]) {
iiop = DB_AFTER;
break;
}
@@ -1306,7 +1954,7 @@ split: needkey = ret = stack = 0;
flags == DB_BEFORE || flags == DB_CURRENT) {
memset(&dbt, 0, sizeof(DBT));
if ((ret = __db_ret(dbp, cp->page, 0, &dbt,
- &dbc->rkey.data, &dbc->rkey.ulen)) != 0)
+ &dbc->rkey->data, &dbc->rkey->ulen)) != 0)
goto err;
arg = &dbt;
} else
@@ -1327,7 +1975,7 @@ split: needkey = ret = stack = 0;
goto err;
/* Split the tree. */
- if ((ret = __bam_split(dbc, arg)) != 0)
+ if ((ret = __bam_split(dbc, arg, &root_pgno)) != 0)
return (ret);
goto split;
@@ -1361,22 +2009,22 @@ done: /*
* __bam_c_rget --
* Return the record number for a cursor.
*
- * PUBLIC: int __bam_c_rget __P((DBC *, DBT *, u_int32_t));
+ * PUBLIC: int __bam_c_rget __P((DBC *, DBT *));
*/
int
-__bam_c_rget(dbc, data, flags)
+__bam_c_rget(dbc, data)
DBC *dbc;
DBT *data;
- u_int32_t flags;
{
BTREE_CURSOR *cp;
DB *dbp;
DBT dbt;
+ DB_MPOOLFILE *mpf;
db_recno_t recno;
int exact, ret;
- COMPQUIET(flags, 0);
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
/*
@@ -1384,24 +2032,24 @@ __bam_c_rget(dbc, data, flags)
* Get a copy of the key.
* Release the page, making sure we don't release it twice.
*/
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0)
+ if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0)
return (ret);
memset(&dbt, 0, sizeof(DBT));
if ((ret = __db_ret(dbp, cp->page,
- cp->indx, &dbt, &dbc->rkey.data, &dbc->rkey.ulen)) != 0)
+ cp->indx, &dbt, &dbc->rkey->data, &dbc->rkey->ulen)) != 0)
goto err;
- ret = memp_fput(dbp->mpf, cp->page, 0);
+ ret = mpf->put(mpf, cp->page, 0);
cp->page = NULL;
if (ret != 0)
return (ret);
- if ((ret = __bam_search(dbc, &dbt,
+ if ((ret = __bam_search(dbc, PGNO_INVALID, &dbt,
F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND,
1, &recno, &exact)) != 0)
goto err;
- ret = __db_retcopy(dbp, data,
- &recno, sizeof(recno), &dbc->rdata.data, &dbc->rdata.ulen);
+ ret = __db_retcopy(dbp->dbenv, data,
+ &recno, sizeof(recno), &dbc->rdata->data, &dbc->rdata->ulen);
/* Release the stack. */
err: __bam_stkrel(dbc, 0);
@@ -1444,17 +2092,15 @@ __bam_c_first(dbc)
DBC *dbc;
{
BTREE_CURSOR *cp;
- DB *dbp;
db_pgno_t pgno;
int ret;
- dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
ret = 0;
/* Walk down the left-hand side of the tree. */
for (pgno = cp->root;;) {
- ACQUIRE_CUR_SET(dbc, DB_LOCK_READ, pgno, ret);
+ ACQUIRE_CUR_COUPLE(dbc, DB_LOCK_READ, pgno, ret);
if (ret != 0)
return (ret);
@@ -1462,7 +2108,7 @@ __bam_c_first(dbc)
if (ISLEAF(cp->page))
break;
- pgno = GET_BINTERNAL(cp->page, 0)->pgno;
+ pgno = GET_BINTERNAL(dbc->dbp, cp->page, 0)->pgno;
}
/* If we want a write lock instead of a read lock, get it now. */
@@ -1472,9 +2118,11 @@ __bam_c_first(dbc)
return (ret);
}
+ cp->indx = 0;
+
/* If on an empty page or a deleted record, move to the next one. */
if (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(dbc))
- if ((ret = __bam_c_next(dbc, 0)) != 0)
+ if ((ret = __bam_c_next(dbc, 0, 0)) != 0)
return (ret);
return (0);
@@ -1489,17 +2137,15 @@ __bam_c_last(dbc)
DBC *dbc;
{
BTREE_CURSOR *cp;
- DB *dbp;
db_pgno_t pgno;
int ret;
- dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
ret = 0;
/* Walk down the right-hand side of the tree. */
for (pgno = cp->root;;) {
- ACQUIRE_CUR_SET(dbc, DB_LOCK_READ, pgno, ret);
+ ACQUIRE_CUR_COUPLE(dbc, DB_LOCK_READ, pgno, ret);
if (ret != 0)
return (ret);
@@ -1507,8 +2153,8 @@ __bam_c_last(dbc)
if (ISLEAF(cp->page))
break;
- pgno =
- GET_BINTERNAL(cp->page, NUM_ENT(cp->page) - O_INDX)->pgno;
+ pgno = GET_BINTERNAL(dbc->dbp, cp->page,
+ NUM_ENT(cp->page) - O_INDX)->pgno;
}
/* If we want a write lock instead of a read lock, get it now. */
@@ -1535,18 +2181,16 @@ __bam_c_last(dbc)
* Move to the next record.
*/
static int
-__bam_c_next(dbc, initial_move)
+__bam_c_next(dbc, initial_move, deleted_okay)
DBC *dbc;
- int initial_move;
+ int initial_move, deleted_okay;
{
BTREE_CURSOR *cp;
- DB *dbp;
db_indx_t adjust;
db_lockmode_t lock_mode;
db_pgno_t pgno;
int ret;
- dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
ret = 0;
@@ -1566,7 +2210,7 @@ __bam_c_next(dbc, initial_move)
F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE : DB_LOCK_READ;
}
if (cp->page == NULL) {
- ACQUIRE_CUR(dbc, lock_mode, ret);
+ ACQUIRE_CUR(dbc, lock_mode, cp->pgno, ret);
if (ret != 0)
return (ret);
}
@@ -1587,12 +2231,13 @@ __bam_c_next(dbc, initial_move)
= NEXT_PGNO(cp->page)) == PGNO_INVALID)
return (DB_NOTFOUND);
- ACQUIRE_CUR_SET(dbc, lock_mode, pgno, ret);
+ ACQUIRE_CUR(dbc, lock_mode, pgno, ret);
if (ret != 0)
return (ret);
+ cp->indx = 0;
continue;
}
- if (IS_CUR_DELETED(dbc)) {
+ if (!deleted_okay && IS_CUR_DELETED(dbc)) {
cp->indx += adjust;
continue;
}
@@ -1610,13 +2255,11 @@ __bam_c_prev(dbc)
DBC *dbc;
{
BTREE_CURSOR *cp;
- DB *dbp;
db_indx_t adjust;
db_lockmode_t lock_mode;
db_pgno_t pgno;
int ret;
- dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
ret = 0;
@@ -1636,7 +2279,7 @@ __bam_c_prev(dbc)
F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE : DB_LOCK_READ;
}
if (cp->page == NULL) {
- ACQUIRE_CUR(dbc, lock_mode, ret);
+ ACQUIRE_CUR(dbc, lock_mode, cp->pgno, ret);
if (ret != 0)
return (ret);
}
@@ -1648,7 +2291,7 @@ __bam_c_prev(dbc)
PREV_PGNO(cp->page)) == PGNO_INVALID)
return (DB_NOTFOUND);
- ACQUIRE_CUR_SET(dbc, lock_mode, pgno, ret);
+ ACQUIRE_CUR(dbc, lock_mode, pgno, ret);
if (ret != 0)
return (ret);
@@ -1671,8 +2314,9 @@ __bam_c_prev(dbc)
* Move to a specified record.
*/
static int
-__bam_c_search(dbc, key, flags, exactp)
+__bam_c_search(dbc, root_pgno, key, flags, exactp)
DBC *dbc;
+ db_pgno_t root_pgno;
const DBT *key;
u_int32_t flags;
int *exactp;
@@ -1681,7 +2325,7 @@ __bam_c_search(dbc, key, flags, exactp)
BTREE_CURSOR *cp;
DB *dbp;
PAGE *h;
- db_indx_t indx;
+ db_indx_t indx, *inp;
db_pgno_t bt_lpgno;
db_recno_t recno;
u_int32_t sflags;
@@ -1712,6 +2356,9 @@ __bam_c_search(dbc, key, flags, exactp)
case DB_GET_BOTH:
sflags = (F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND) | S_EXACT;
goto search;
+ case DB_GET_BOTH_RANGE:
+ sflags = (F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND);
+ goto search;
case DB_SET_RANGE:
sflags =
(F_ISSET(dbc, DBC_RMW) ? S_WRITE : S_READ) | S_DUPFIRST;
@@ -1758,6 +2405,7 @@ fast_search: /*
if (ret != 0)
goto fast_miss;
+ inp = P_INP(dbp, h);
/*
* It's okay if the page type isn't right or it's empty, it
* just means that the world changed.
@@ -1796,7 +2444,7 @@ fast_search: /*
if (flags == DB_KEYLAST)
goto fast_hit;
for (;
- indx > 0 && h->inp[indx - P_INDX] == h->inp[indx];
+ indx > 0 && inp[indx - P_INDX] == inp[indx];
indx -= P_INDX)
;
goto fast_hit;
@@ -1823,7 +2471,7 @@ try_begin: if (h->prev_pgno == PGNO_INVALID) {
goto fast_hit;
for (;
indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
- h->inp[indx] == h->inp[indx + P_INDX];
+ inp[indx] == inp[indx + P_INDX];
indx += P_INDX)
;
goto fast_hit;
@@ -1852,8 +2500,8 @@ fast_miss: /*
if (ret != 0)
return (ret);
-search: if ((ret =
- __bam_search(dbc, key, sflags, 1, NULL, exactp)) != 0)
+search: if ((ret = __bam_search(dbc, root_pgno,
+ key, sflags, 1, NULL, exactp)) != 0)
return (ret);
break;
default:
@@ -1870,12 +2518,15 @@ search: if ((ret =
/*
* If we inserted a key into the first or last slot of the tree,
* remember where it was so we can do it more quickly next time.
+ * If there are duplicates and we are inserting into the last slot,
+ * the cursor will point _to_ the last item, not after it, which
+ * is why we subtract P_INDX below.
*/
if (TYPE(cp->page) == P_LBTREE &&
(flags == DB_KEYFIRST || flags == DB_KEYLAST))
t->bt_lpgno =
(NEXT_PGNO(cp->page) == PGNO_INVALID &&
- cp->indx >= NUM_ENT(cp->page)) ||
+ cp->indx >= NUM_ENT(cp->page) - P_INDX) ||
(PREV_PGNO(cp->page) == PGNO_INVALID &&
cp->indx == 0) ? cp->pgno : PGNO_INVALID;
return (0);
@@ -1893,11 +2544,13 @@ __bam_c_physdel(dbc)
DB *dbp;
DBT key;
DB_LOCK lock;
+ DB_MPOOLFILE *mpf;
PAGE *h;
db_pgno_t pgno;
int delete_page, empty_page, exact, level, ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
delete_page = empty_page = ret = 0;
@@ -1911,7 +2564,7 @@ __bam_c_physdel(dbc)
* space will never be reused unless the exact same key is specified.
*/
if (delete_page &&
- !F_ISSET(dbc, DBC_OPD) && F_ISSET(dbp, DB_BT_REVSPLIT))
+ !F_ISSET(dbc, DBC_OPD) && F_ISSET(dbp, DB_AM_REVSPLITOFF))
delete_page = 0;
/*
@@ -1926,11 +2579,17 @@ __bam_c_physdel(dbc)
* To delete a leaf page other than an empty root page, we need a
* copy of a key from the page. Use the 0th page index since it's
* the last key the page held.
+ *
+ * !!!
+ * Note that because __bam_c_physdel is always called from a cursor
+ * close, it should be safe to use the cursor's own "my_rkey" memory
+ * to temporarily hold this key. We shouldn't own any returned-data
+ * memory of interest--if we do, we're in trouble anyway.
*/
if (delete_page) {
memset(&key, 0, sizeof(DBT));
if ((ret = __db_ret(dbp, cp->page,
- 0, &key, &dbc->rkey.data, &dbc->rkey.ulen)) != 0)
+ 0, &key, &dbc->my_rkey.data, &dbc->my_rkey.ulen)) != 0)
return (ret);
}
@@ -1940,7 +2599,7 @@ __bam_c_physdel(dbc)
* !!!
* The following operations to delete a page may deadlock. The easy
* scenario is if we're deleting an item because we're closing cursors
- * because we've already deadlocked and want to call txn_abort(). If
+ * because we've already deadlocked and want to call txn->abort. If
* we fail due to deadlock, we'll leave a locked, possibly empty page
* in the tree, which won't be empty long because we'll undo the delete
* when we undo the transaction's modifications.
@@ -1977,8 +2636,8 @@ __bam_c_physdel(dbc)
*/
for (level = LEAFLEVEL;; ++level) {
/* Acquire a page and its parent, locked. */
- if ((ret = __bam_search(
- dbc, &key, S_WRPAIR, level, NULL, &exact)) != 0)
+ if ((ret = __bam_search(dbc, PGNO_INVALID,
+ &key, S_WRPAIR, level, NULL, &exact)) != 0)
return (ret);
/*
@@ -2031,19 +2690,19 @@ __bam_c_physdel(dbc)
*/
switch (TYPE(h)) {
case P_IBTREE:
- pgno = GET_BINTERNAL(h, 0)->pgno;
+ pgno = GET_BINTERNAL(dbp, h, 0)->pgno;
break;
case P_IRECNO:
- pgno = GET_RINTERNAL(h, 0)->pgno;
+ pgno = GET_RINTERNAL(dbp, h, 0)->pgno;
break;
default:
- return (__db_pgfmt(dbp, PGNO(h)));
+ return (__db_pgfmt(dbp->dbenv, PGNO(h)));
}
if ((ret =
__db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, &lock)) != 0)
break;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
break;
BT_STK_PUSH(dbp->dbenv, cp, h, 0, lock, DB_LOCK_WRITE, ret);
if (ret != 0)
@@ -2076,10 +2735,12 @@ __bam_c_getstack(dbc)
BTREE_CURSOR *cp;
DB *dbp;
DBT dbt;
+ DB_MPOOLFILE *mpf;
PAGE *h;
int exact, ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
/*
@@ -2087,21 +2748,22 @@ __bam_c_getstack(dbc)
* routine has to already hold a read lock on the page, so there
* is no additional lock to acquire.
*/
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &h)) != 0)
+ if ((ret = mpf->get(mpf, &cp->pgno, 0, &h)) != 0)
return (ret);
/* Get a copy of a key from the page. */
memset(&dbt, 0, sizeof(DBT));
if ((ret = __db_ret(dbp,
- h, 0, &dbt, &dbc->rkey.data, &dbc->rkey.ulen)) != 0)
+ h, 0, &dbt, &dbc->rkey->data, &dbc->rkey->ulen)) != 0)
goto err;
/* Get a write-locked stack for the page. */
exact = 0;
- ret = __bam_search(dbc, &dbt, S_KEYFIRST, 1, NULL, &exact);
+ ret = __bam_search(dbc, PGNO_INVALID,
+ &dbt, S_KEYFIRST, 1, NULL, &exact);
err: /* Discard the key and the page. */
- if ((t_ret = memp_fput(dbp->mpf, h, 0)) != 0 && ret == 0)
+ if ((t_ret = mpf->put(mpf, h, 0)) != 0 && ret == 0)
ret = t_ret;
return (ret);
@@ -2122,7 +2784,8 @@ __bam_isopd(dbc, pgnop)
if (TYPE(dbc->internal->page) != P_LBTREE)
return (0);
- bo = GET_BOVERFLOW(dbc->internal->page, dbc->internal->indx + O_INDX);
+ bo = GET_BOVERFLOW(dbc->dbp,
+ dbc->internal->page, dbc->internal->indx + O_INDX);
if (B_TYPE(bo->type) == B_DUPLICATE) {
*pgnop = bo->pgno;
return (1);
diff --git a/bdb/btree/bt_delete.c b/bdb/btree/bt_delete.c
index 9725887882a..8c76ead2922 100644
--- a/bdb/btree/bt_delete.c
+++ b/bdb/btree/bt_delete.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,7 +43,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_delete.c,v 11.31 2001/01/17 18:48:46 bostic Exp $";
+static const char revid[] = "$Id: bt_delete.c,v 11.44 2002/07/03 19:03:49 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -53,88 +53,10 @@ static const char revid[] = "$Id: bt_delete.c,v 11.31 2001/01/17 18:48:46 bostic
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "btree.h"
-#include "lock.h"
-
-/*
- * __bam_delete --
- * Delete the items referenced by a key.
- *
- * PUBLIC: int __bam_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
- */
-int
-__bam_delete(dbp, txn, key, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *key;
- u_int32_t flags;
-{
- DBC *dbc;
- DBT lkey;
- DBT data;
- u_int32_t f_init, f_next;
- int ret, t_ret;
-
- PANIC_CHECK(dbp->dbenv);
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->del");
- DB_CHECK_TXN(dbp, txn);
-
- /* Check for invalid flags. */
- if ((ret =
- __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
- return (ret);
-
- /* Allocate a cursor. */
- if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
- return (ret);
-
- DEBUG_LWRITE(dbc, txn, "bam_delete", key, NULL, flags);
-
- /*
- * Walk a cursor through the key/data pairs, deleting as we go. Set
- * the DB_DBT_USERMEM flag, as this might be a threaded application
- * and the flags checking will catch us. We don't actually want the
- * keys or data, so request a partial of length 0.
- */
- memset(&lkey, 0, sizeof(lkey));
- F_SET(&lkey, DB_DBT_USERMEM | DB_DBT_PARTIAL);
- memset(&data, 0, sizeof(data));
- F_SET(&data, DB_DBT_USERMEM | DB_DBT_PARTIAL);
-
- /*
- * If locking (and we haven't already acquired CDB locks), set the
- * read-modify-write flag.
- */
- f_init = DB_SET;
- f_next = DB_NEXT_DUP;
- if (STD_LOCKING(dbc)) {
- f_init |= DB_RMW;
- f_next |= DB_RMW;
- }
-
- /* Walk through the set of key/data pairs, deleting as we go. */
- if ((ret = dbc->c_get(dbc, key, &data, f_init)) != 0)
- goto err;
- for (;;) {
- if ((ret = dbc->c_del(dbc, 0)) != 0)
- goto err;
- if ((ret = dbc->c_get(dbc, &lkey, &data, f_next)) != 0) {
- if (ret == DB_NOTFOUND) {
- ret = 0;
- break;
- }
- goto err;
- }
- }
-
-err: /* Discard the cursor. */
- if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/btree.h"
+#include "dbinc/lock.h"
/*
* __bam_ditem --
@@ -151,14 +73,18 @@ __bam_ditem(dbc, h, indx)
BINTERNAL *bi;
BKEYDATA *bk;
DB *dbp;
+ DB_MPOOLFILE *mpf;
u_int32_t nbytes;
int ret;
+ db_indx_t *inp;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
+ inp = P_INP(dbp, h);
switch (TYPE(h)) {
case P_IBTREE:
- bi = GET_BINTERNAL(h, indx);
+ bi = GET_BINTERNAL(dbp, h, indx);
switch (B_TYPE(bi->type)) {
case B_DUPLICATE:
case B_KEYDATA:
@@ -171,7 +97,7 @@ __bam_ditem(dbc, h, indx)
return (ret);
break;
default:
- return (__db_pgfmt(dbp, PGNO(h)));
+ return (__db_pgfmt(dbp->dbenv, PGNO(h)));
}
break;
case P_IRECNO:
@@ -195,7 +121,7 @@ __bam_ditem(dbc, h, indx)
* won't work!
*/
if (indx + P_INDX < (u_int32_t)NUM_ENT(h) &&
- h->inp[indx] == h->inp[indx + P_INDX])
+ inp[indx] == inp[indx + P_INDX])
return (__bam_adjindx(dbc,
h, indx, indx + O_INDX, 0));
/*
@@ -203,14 +129,14 @@ __bam_ditem(dbc, h, indx)
* doesn't matter if we delete the key item before or
* after the data item for the purposes of this one.
*/
- if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
+ if (indx > 0 && inp[indx] == inp[indx - P_INDX])
return (__bam_adjindx(dbc,
h, indx, indx - P_INDX, 0));
}
/* FALLTHROUGH */
case P_LDUP:
case P_LRECNO:
- bk = GET_BKEYDATA(h, indx);
+ bk = GET_BKEYDATA(dbp, h, indx);
switch (B_TYPE(bk->type)) {
case B_DUPLICATE:
nbytes = BOVERFLOW_SIZE;
@@ -218,24 +144,24 @@ __bam_ditem(dbc, h, indx)
case B_OVERFLOW:
nbytes = BOVERFLOW_SIZE;
if ((ret = __db_doff(
- dbc, (GET_BOVERFLOW(h, indx))->pgno)) != 0)
+ dbc, (GET_BOVERFLOW(dbp, h, indx))->pgno)) != 0)
return (ret);
break;
case B_KEYDATA:
nbytes = BKEYDATA_SIZE(bk->len);
break;
default:
- return (__db_pgfmt(dbp, PGNO(h)));
+ return (__db_pgfmt(dbp->dbenv, PGNO(h)));
}
break;
default:
- return (__db_pgfmt(dbp, PGNO(h)));
+ return (__db_pgfmt(dbp->dbenv, PGNO(h)));
}
/* Delete the item and mark the page dirty. */
if ((ret = __db_ditem(dbc, h, indx, nbytes)) != 0)
return (ret);
- if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->set(mpf, h, DB_MPOOL_DIRTY)) != 0)
return (ret);
return (0);
@@ -255,33 +181,37 @@ __bam_adjindx(dbc, h, indx, indx_copy, is_insert)
int is_insert;
{
DB *dbp;
- db_indx_t copy;
+ DB_MPOOLFILE *mpf;
+ db_indx_t copy, *inp;
int ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
+ inp = P_INP(dbp, h);
/* Log the change. */
- if (DB_LOGGING(dbc) &&
- (ret = __bam_adj_log(dbp->dbenv, dbc->txn, &LSN(h),
- 0, dbp->log_fileid, PGNO(h), &LSN(h), indx, indx_copy,
- (u_int32_t)is_insert)) != 0)
- return (ret);
+ if (DBC_LOGGING(dbc)) {
+ if ((ret = __bam_adj_log(dbp, dbc->txn, &LSN(h), 0,
+ PGNO(h), &LSN(h), indx, indx_copy, (u_int32_t)is_insert)) != 0)
+ return (ret);
+ } else
+ LSN_NOT_LOGGED(LSN(h));
/* Shuffle the indices and mark the page dirty. */
if (is_insert) {
- copy = h->inp[indx_copy];
+ copy = inp[indx_copy];
if (indx != NUM_ENT(h))
- memmove(&h->inp[indx + O_INDX], &h->inp[indx],
+ memmove(&inp[indx + O_INDX], &inp[indx],
sizeof(db_indx_t) * (NUM_ENT(h) - indx));
- h->inp[indx] = copy;
+ inp[indx] = copy;
++NUM_ENT(h);
} else {
--NUM_ENT(h);
if (indx != NUM_ENT(h))
- memmove(&h->inp[indx], &h->inp[indx + O_INDX],
+ memmove(&inp[indx], &inp[indx + O_INDX],
sizeof(db_indx_t) * (NUM_ENT(h) - indx));
}
- if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->set(mpf, h, DB_MPOOL_DIRTY)) != 0)
return (ret);
return (0);
@@ -303,6 +233,7 @@ __bam_dpages(dbc, stack_epg)
DB *dbp;
DBT a, b;
DB_LOCK c_lock, p_lock;
+ DB_MPOOLFILE *mpf;
EPG *epg;
PAGE *child, *parent;
db_indx_t nitems;
@@ -311,6 +242,7 @@ __bam_dpages(dbc, stack_epg)
int done, ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
/*
@@ -328,8 +260,7 @@ __bam_dpages(dbc, stack_epg)
*/
ret = 0;
for (epg = cp->sp; epg < stack_epg; ++epg) {
- if ((t_ret =
- memp_fput(dbp->mpf, epg->page, 0)) != 0 && ret == 0)
+ if ((t_ret = mpf->put(mpf, epg->page, 0)) != 0 && ret == 0)
ret = t_ret;
(void)__TLPUT(dbc, epg->lock);
}
@@ -364,7 +295,7 @@ __bam_dpages(dbc, stack_epg)
pgno = PGNO(epg->page);
nitems = NUM_ENT(epg->page);
- if ((ret = memp_fput(dbp->mpf, epg->page, 0)) != 0)
+ if ((ret = mpf->put(mpf, epg->page, 0)) != 0)
goto err_inc;
(void)__TLPUT(dbc, epg->lock);
@@ -394,7 +325,7 @@ __bam_dpages(dbc, stack_epg)
err_inc: ++epg;
err: for (; epg <= cp->csp; ++epg) {
if (epg->page != NULL)
- (void)memp_fput(dbp->mpf, epg->page, 0);
+ (void)mpf->put(mpf, epg->page, 0);
(void)__TLPUT(dbc, epg->lock);
}
BT_STK_CLR(cp);
@@ -415,14 +346,15 @@ err: for (; epg <= cp->csp; ++epg) {
for (done = 0; !done;) {
/* Initialize. */
parent = child = NULL;
- p_lock.off = c_lock.off = LOCK_INVALID;
+ LOCK_INIT(p_lock);
+ LOCK_INIT(c_lock);
/* Lock the root. */
pgno = root_pgno;
if ((ret =
__db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, &p_lock)) != 0)
goto stop;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &parent)) != 0)
+ if ((ret = mpf->get(mpf, &pgno, 0, &parent)) != 0)
goto stop;
if (NUM_ENT(parent) != 1)
@@ -434,7 +366,7 @@ err: for (; epg <= cp->csp; ++epg) {
* If this is overflow, then try to delete it.
* The child may or may not still point at it.
*/
- bi = GET_BINTERNAL(parent, 0);
+ bi = GET_BINTERNAL(dbp, parent, 0);
if (B_TYPE(bi->type) == B_OVERFLOW)
if ((ret = __db_doff(dbc,
((BOVERFLOW *)bi->data)->pgno)) != 0)
@@ -442,7 +374,7 @@ err: for (; epg <= cp->csp; ++epg) {
pgno = bi->pgno;
break;
case P_IRECNO:
- pgno = GET_RINTERNAL(parent, 0)->pgno;
+ pgno = GET_RINTERNAL(dbp, parent, 0)->pgno;
break;
default:
goto stop;
@@ -452,24 +384,24 @@ err: for (; epg <= cp->csp; ++epg) {
if ((ret =
__db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, &c_lock)) != 0)
goto stop;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &child)) != 0)
+ if ((ret = mpf->get(mpf, &pgno, 0, &child)) != 0)
goto stop;
/* Log the change. */
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
memset(&a, 0, sizeof(a));
a.data = child;
a.size = dbp->pgsize;
memset(&b, 0, sizeof(b));
- b.data = P_ENTRY(parent, 0);
+ b.data = P_ENTRY(dbp, parent, 0);
b.size = TYPE(parent) == P_IRECNO ? RINTERNAL_SIZE :
BINTERNAL_SIZE(((BINTERNAL *)b.data)->len);
- if ((ret =
- __bam_rsplit_log(dbp->dbenv, dbc->txn, &child->lsn,
- 0, dbp->log_fileid, PGNO(child), &a, PGNO(parent),
- RE_NREC(parent), &b, &parent->lsn)) != 0)
+ if ((ret = __bam_rsplit_log(dbp, dbc->txn,
+ &child->lsn, 0, PGNO(child), &a, PGNO(parent),
+ RE_NREC(parent), &b, &parent->lsn)) != 0)
goto stop;
- }
+ } else
+ LSN_NOT_LOGGED(child->lsn);
/*
* Make the switch.
@@ -491,9 +423,9 @@ err: for (; epg <= cp->csp; ++epg) {
RE_NREC_SET(parent, rcnt);
/* Mark the pages dirty. */
- if ((ret = memp_fset(dbp->mpf, parent, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->set(mpf, parent, DB_MPOOL_DIRTY)) != 0)
goto stop;
- if ((ret = memp_fset(dbp->mpf, child, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->set(mpf, child, DB_MPOOL_DIRTY)) != 0)
goto stop;
/* Adjust the cursors. */
@@ -514,15 +446,13 @@ err: for (; epg <= cp->csp; ++epg) {
if (0) {
stop: done = 1;
}
- if (p_lock.off != LOCK_INVALID)
- (void)__TLPUT(dbc, p_lock);
+ (void)__TLPUT(dbc, p_lock);
if (parent != NULL &&
- (t_ret = memp_fput(dbp->mpf, parent, 0)) != 0 && ret == 0)
+ (t_ret = mpf->put(mpf, parent, 0)) != 0 && ret == 0)
ret = t_ret;
- if (c_lock.off != LOCK_INVALID)
- (void)__TLPUT(dbc, c_lock);
+ (void)__TLPUT(dbc, c_lock);
if (child != NULL &&
- (t_ret = memp_fput(dbp->mpf, child, 0)) != 0 && ret == 0)
+ (t_ret = mpf->put(mpf, child, 0)) != 0 && ret == 0)
ret = t_ret;
}
diff --git a/bdb/btree/bt_method.c b/bdb/btree/bt_method.c
index 5e3af27d033..aa27ed6bab9 100644
--- a/bdb/btree/bt_method.c
+++ b/bdb/btree/bt_method.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1999, 2000
+ * Copyright (c) 1999-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_method.c,v 11.20 2000/11/30 00:58:28 ubell Exp $";
+static const char revid[] = "$Id: bt_method.c,v 11.29 2002/04/21 13:17:04 margo Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -16,9 +16,9 @@ static const char revid[] = "$Id: bt_method.c,v 11.20 2000/11/30 00:58:28 ubell
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-#include "qam.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+#include "dbinc/qam.h"
static int __bam_set_bt_compare
__P((DB *, int (*)(DB *, const DBT *, const DBT *)));
@@ -82,7 +82,8 @@ __bam_db_close(dbp)
{
BTREE *t;
- t = dbp->bt_internal;
+ if ((t = dbp->bt_internal) == NULL)
+ return (0);
/* Recno */
/* Close any backing source file descriptor. */
if (t->re_fp != NULL)
@@ -90,9 +91,9 @@ __bam_db_close(dbp)
/* Free any backing source file name. */
if (t->re_source != NULL)
- __os_freestr(t->re_source);
+ __os_free(dbp->dbenv, t->re_source);
- __os_free(t, sizeof(BTREE));
+ __os_free(dbp->dbenv, t);
dbp->bt_internal = NULL;
return (0);
@@ -127,7 +128,7 @@ __bam_set_flags(dbp, flagsp)
if (LF_ISSET(DB_DUP | DB_DUPSORT)) {
/* DB_DUP/DB_DUPSORT is incompatible with DB_RECNUM. */
- if (F_ISSET(dbp, DB_BT_RECNUM))
+ if (F_ISSET(dbp, DB_AM_RECNUM))
goto incompat;
if (LF_ISSET(DB_DUPSORT)) {
@@ -145,12 +146,12 @@ __bam_set_flags(dbp, flagsp)
if (F_ISSET(dbp, DB_AM_DUP))
goto incompat;
- F_SET(dbp, DB_BT_RECNUM);
+ F_SET(dbp, DB_AM_RECNUM);
LF_CLR(DB_RECNUM);
}
if (LF_ISSET(DB_REVSPLITOFF)) {
- F_SET(dbp, DB_BT_REVSPLIT);
+ F_SET(dbp, DB_AM_REVSPLITOFF);
LF_CLR(DB_REVSPLITOFF);
}
@@ -279,12 +280,12 @@ __ram_set_flags(dbp, flagsp)
DB_ILLEGAL_METHOD(dbp, DB_OK_RECNO);
if (LF_ISSET(DB_RENUMBER)) {
- F_SET(dbp, DB_RE_RENUMBER);
+ F_SET(dbp, DB_AM_RENUMBER);
LF_CLR(DB_RENUMBER);
}
if (LF_ISSET(DB_SNAPSHOT)) {
- F_SET(dbp, DB_RE_SNAPSHOT);
+ F_SET(dbp, DB_AM_SNAPSHOT);
LF_CLR(DB_SNAPSHOT);
}
@@ -310,7 +311,7 @@ __ram_set_re_delim(dbp, re_delim)
t = dbp->bt_internal;
t->re_delim = re_delim;
- F_SET(dbp, DB_RE_DELIMITER);
+ F_SET(dbp, DB_AM_DELIMITER);
return (0);
}
@@ -336,7 +337,7 @@ __ram_set_re_len(dbp, re_len)
q = dbp->q_internal;
q->re_len = re_len;
- F_SET(dbp, DB_RE_FIXEDLEN);
+ F_SET(dbp, DB_AM_FIXEDLEN);
return (0);
}
@@ -362,7 +363,7 @@ __ram_set_re_pad(dbp, re_pad)
q = dbp->q_internal;
q->re_pad = re_pad;
- F_SET(dbp, DB_RE_PAD);
+ F_SET(dbp, DB_AM_PAD);
return (0);
}
diff --git a/bdb/btree/bt_open.c b/bdb/btree/bt_open.c
index 405c1880f5e..0b72391c267 100644
--- a/bdb/btree/bt_open.c
+++ b/bdb/btree/bt_open.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,7 +43,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_open.c,v 11.42 2000/11/30 00:58:28 ubell Exp $";
+static const char revid[] = "$Id: bt_open.c,v 11.76 2002/09/04 19:06:42 margo Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -54,33 +54,38 @@ static const char revid[] = "$Id: bt_open.c,v 11.42 2000/11/30 00:58:28 ubell Ex
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_swap.h"
-#include "btree.h"
-#include "db_shash.h"
-#include "lock.h"
-#include "log.h"
-#include "mp.h"
+#include "dbinc/crypto.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_swap.h"
+#include "dbinc/btree.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/lock.h"
+#include "dbinc/log.h"
+#include "dbinc/fop.h"
+
+static void __bam_init_meta __P((DB *, BTMETA *, db_pgno_t, DB_LSN *));
/*
* __bam_open --
* Open a btree.
*
- * PUBLIC: int __bam_open __P((DB *, const char *, db_pgno_t, u_int32_t));
+ * PUBLIC: int __bam_open __P((DB *,
+ * PUBLIC: DB_TXN *, const char *, db_pgno_t, u_int32_t));
*/
int
-__bam_open(dbp, name, base_pgno, flags)
+__bam_open(dbp, txn, name, base_pgno, flags)
DB *dbp;
+ DB_TXN *txn;
const char *name;
db_pgno_t base_pgno;
u_int32_t flags;
{
BTREE *t;
+ COMPQUIET(name, NULL);
t = dbp->bt_internal;
/* Initialize the remaining fields/methods of the DB. */
- dbp->del = __bam_delete;
dbp->key_range = __bam_key_range;
dbp->stat = __bam_stat;
@@ -99,8 +104,8 @@ __bam_open(dbp, name, base_pgno, flags)
* Verify that the bt_minkey value specified won't cause the
* calculation of ovflsize to underflow [#2406] for this pagesize.
*/
- if (B_MINKEY_TO_OVFLSIZE(t->bt_minkey, dbp->pgsize) >
- B_MINKEY_TO_OVFLSIZE(DEFMINKEYPAGE, dbp->pgsize)) {
+ if (B_MINKEY_TO_OVFLSIZE(dbp, t->bt_minkey, dbp->pgsize) >
+ B_MINKEY_TO_OVFLSIZE(dbp, DEFMINKEYPAGE, dbp->pgsize)) {
__db_err(dbp->dbenv,
"bt_minkey value of %lu too high for page size of %lu",
(u_long)t->bt_minkey, (u_long)dbp->pgsize);
@@ -108,7 +113,7 @@ __bam_open(dbp, name, base_pgno, flags)
}
/* Start up the tree. */
- return (__bam_read_root(dbp, name, base_pgno, flags));
+ return (__bam_read_root(dbp, txn, base_pgno, flags));
}
/*
@@ -143,6 +148,7 @@ __bam_metachk(dbp, name, btm)
name, (u_long)vers);
return (DB_OLD_VERSION);
case 8:
+ case 9:
break;
default:
__db_err(dbenv,
@@ -187,13 +193,13 @@ __bam_metachk(dbp, name, btm)
if (F_ISSET(&btm->dbmeta, BTM_RECNUM)) {
if (dbp->type != DB_BTREE)
goto wrong_type;
- F_SET(dbp, DB_BT_RECNUM);
+ F_SET(dbp, DB_AM_RECNUM);
if ((ret = __db_fcchk(dbenv,
- "DB->open", dbp->flags, DB_AM_DUP, DB_BT_RECNUM)) != 0)
+ "DB->open", dbp->flags, DB_AM_DUP, DB_AM_RECNUM)) != 0)
return (ret);
} else
- if (F_ISSET(dbp, DB_BT_RECNUM)) {
+ if (F_ISSET(dbp, DB_AM_RECNUM)) {
__db_err(dbenv,
"%s: DB_RECNUM specified to open method but not set in database",
name);
@@ -203,9 +209,9 @@ __bam_metachk(dbp, name, btm)
if (F_ISSET(&btm->dbmeta, BTM_FIXEDLEN)) {
if (dbp->type != DB_RECNO)
goto wrong_type;
- F_SET(dbp, DB_RE_FIXEDLEN);
+ F_SET(dbp, DB_AM_FIXEDLEN);
} else
- if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN)) {
__db_err(dbenv,
"%s: DB_FIXEDLEN specified to open method but not set in database",
name);
@@ -215,9 +221,9 @@ __bam_metachk(dbp, name, btm)
if (F_ISSET(&btm->dbmeta, BTM_RENUMBER)) {
if (dbp->type != DB_RECNO)
goto wrong_type;
- F_SET(dbp, DB_RE_RENUMBER);
+ F_SET(dbp, DB_AM_RENUMBER);
} else
- if (F_ISSET(dbp, DB_RE_RENUMBER)) {
+ if (F_ISSET(dbp, DB_AM_RENUMBER)) {
__db_err(dbenv,
"%s: DB_RENUMBER specified to open method but not set in database",
name);
@@ -266,116 +272,129 @@ wrong_type:
/*
* __bam_read_root --
- * Check (and optionally create) a tree.
+ * Read the root page and check a tree.
*
- * PUBLIC: int __bam_read_root __P((DB *, const char *, db_pgno_t, u_int32_t));
+ * PUBLIC: int __bam_read_root __P((DB *, DB_TXN *, db_pgno_t, u_int32_t));
*/
int
-__bam_read_root(dbp, name, base_pgno, flags)
+__bam_read_root(dbp, txn, base_pgno, flags)
DB *dbp;
- const char *name;
+ DB_TXN *txn;
db_pgno_t base_pgno;
u_int32_t flags;
{
BTMETA *meta;
BTREE *t;
DBC *dbc;
- DB_LSN orig_lsn;
DB_LOCK metalock;
- PAGE *root;
- int locked, ret, t_ret;
+ DB_MPOOLFILE *mpf;
+ int ret, t_ret;
- ret = 0;
- t = dbp->bt_internal;
meta = NULL;
- root = NULL;
- locked = 0;
+ t = dbp->bt_internal;
+ LOCK_INIT(metalock);
+ mpf = dbp->mpf;
+ ret = 0;
- /*
- * Get a cursor. If DB_CREATE is specified, we may be creating
- * the root page, and to do that safely in CDB we need a write
- * cursor. In STD_LOCKING mode, we'll synchronize using the
- * meta page lock instead.
- */
- if ((ret = dbp->cursor(dbp, dbp->open_txn,
- &dbc, LF_ISSET(DB_CREATE) && CDB_LOCKING(dbp->dbenv) ?
- DB_WRITECURSOR : 0)) != 0)
+ /* Get a cursor. */
+ if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0)
return (ret);
- /* Get, and optionally create the metadata page. */
+ /* Get the metadata page. */
if ((ret =
__db_lget(dbc, 0, base_pgno, DB_LOCK_READ, 0, &metalock)) != 0)
goto err;
- if ((ret = memp_fget(
- dbp->mpf, &base_pgno, DB_MPOOL_CREATE, (PAGE **)&meta)) != 0)
+ if ((ret = mpf->get(mpf, &base_pgno, 0, (PAGE **)&meta)) != 0)
goto err;
/*
- * If the magic number is correct, we're not creating the tree.
- * Correct any fields that may not be right. Note, all of the
- * local flags were set by DB->open.
+ * If the magic number is set, the tree has been created. Correct
+ * any fields that may not be right. Note, all of the local flags
+ * were set by DB->open.
+ *
+ * Otherwise, we'd better be in recovery or abort, in which case the
+ * metadata page will be created/initialized elsewhere.
*/
-again: if (meta->dbmeta.magic != 0) {
- t->bt_maxkey = meta->maxkey;
- t->bt_minkey = meta->minkey;
- t->re_pad = meta->re_pad;
- t->re_len = meta->re_len;
-
- t->bt_meta = base_pgno;
- t->bt_root = meta->root;
-
- (void)memp_fput(dbp->mpf, meta, 0);
- meta = NULL;
- goto done;
- }
+ DB_ASSERT(meta->dbmeta.magic != 0 ||
+ IS_RECOVERING(dbp->dbenv) || F_ISSET(dbp, DB_AM_RECOVER));
- /* In recovery if it's not there it will be created elsewhere.*/
- if (IS_RECOVERING(dbp->dbenv))
- goto done;
-
- /* If we're doing CDB; we now have to get the write lock. */
- if (CDB_LOCKING(dbp->dbenv)) {
- /*
- * We'd better have DB_CREATE set if we're actually doing
- * the create.
- */
- DB_ASSERT(LF_ISSET(DB_CREATE));
- if ((ret = lock_get(dbp->dbenv, dbc->locker, DB_LOCK_UPGRADE,
- &dbc->lock_dbt, DB_LOCK_WRITE, &dbc->mylock)) != 0)
- goto err;
- }
+ t->bt_maxkey = meta->maxkey;
+ t->bt_minkey = meta->minkey;
+ t->re_pad = meta->re_pad;
+ t->re_len = meta->re_len;
+
+ t->bt_meta = base_pgno;
+ t->bt_root = meta->root;
/*
- * If we are doing locking, relase the read lock and get a write lock.
- * We want to avoid deadlock.
+ * !!!
+ * If creating a subdatabase, we've already done an insert when
+ * we put the subdatabase's entry into the master database, so
+ * our last-page-inserted value is wrongly initialized for the
+ * master database, not the subdatabase we're creating. I'm not
+ * sure where the *right* place to clear this value is, it's not
+ * intuitively obvious that it belongs here.
*/
- if (locked == 0 && STD_LOCKING(dbc)) {
- if ((ret = __LPUT(dbc, metalock)) != 0)
- goto err;
- if ((ret = __db_lget(dbc,
- 0, base_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
- goto err;
- locked = 1;
- goto again;
- }
+ t->bt_lpgno = PGNO_INVALID;
+
+ /* We must initialize last_pgno, it could be stale. */
+ if (!LF_ISSET(DB_RDONLY) && dbp->meta_pgno == PGNO_BASE_MD) {
+ mpf->last_pgno(mpf, &meta->dbmeta.last_pgno);
+ ret = mpf->put(mpf, meta, DB_MPOOL_DIRTY);
+ } else
+ ret = mpf->put(mpf, meta, 0);
+ meta = NULL;
+
+err: /* Put the metadata page back. */
+ if (meta != NULL && (t_ret = mpf->put(mpf, meta, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0)
+ ret = t_ret;
+
+ if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+ return (ret);
+}
+
+/*
+ * __bam_init_meta --
+ *
+ * Initialize a btree meta-data page. The following fields may need
+ * to be updated later: last_pgno, root.
+ */
+static void
+__bam_init_meta(dbp, meta, pgno, lsnp)
+ DB *dbp;
+ BTMETA *meta;
+ db_pgno_t pgno;
+ DB_LSN *lsnp;
+{
+ BTREE *t;
- /* Initialize the tree structure metadata information. */
- orig_lsn = meta->dbmeta.lsn;
memset(meta, 0, sizeof(BTMETA));
- meta->dbmeta.lsn = orig_lsn;
- meta->dbmeta.pgno = base_pgno;
+ meta->dbmeta.lsn = *lsnp;
+ meta->dbmeta.pgno = pgno;
meta->dbmeta.magic = DB_BTREEMAGIC;
meta->dbmeta.version = DB_BTREEVERSION;
meta->dbmeta.pagesize = dbp->pgsize;
+ if (F_ISSET(dbp, DB_AM_CHKSUM))
+ FLD_SET(meta->dbmeta.metaflags, DBMETA_CHKSUM);
+ if (F_ISSET(dbp, DB_AM_ENCRYPT)) {
+ meta->dbmeta.encrypt_alg =
+ ((DB_CIPHER *)dbp->dbenv->crypto_handle)->alg;
+ DB_ASSERT(meta->dbmeta.encrypt_alg != 0);
+ meta->crypto_magic = meta->dbmeta.magic;
+ }
meta->dbmeta.type = P_BTREEMETA;
meta->dbmeta.free = PGNO_INVALID;
+ meta->dbmeta.last_pgno = pgno;
if (F_ISSET(dbp, DB_AM_DUP))
F_SET(&meta->dbmeta, BTM_DUP);
- if (F_ISSET(dbp, DB_RE_FIXEDLEN))
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN))
F_SET(&meta->dbmeta, BTM_FIXEDLEN);
- if (F_ISSET(dbp, DB_BT_RECNUM))
+ if (F_ISSET(dbp, DB_AM_RECNUM))
F_SET(&meta->dbmeta, BTM_RECNUM);
- if (F_ISSET(dbp, DB_RE_RENUMBER))
+ if (F_ISSET(dbp, DB_AM_RENUMBER))
F_SET(&meta->dbmeta, BTM_RENUMBER);
if (F_ISSET(dbp, DB_AM_SUBDB))
F_SET(&meta->dbmeta, BTM_SUBDB);
@@ -385,14 +404,165 @@ again: if (meta->dbmeta.magic != 0) {
F_SET(&meta->dbmeta, BTM_RECNO);
memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN);
+ t = dbp->bt_internal;
meta->maxkey = t->bt_maxkey;
meta->minkey = t->bt_minkey;
meta->re_len = t->re_len;
meta->re_pad = t->re_pad;
+}
- /* If necessary, log the meta-data and root page creates. */
- if ((ret = __db_log_page(dbp,
- name, &orig_lsn, base_pgno, (PAGE *)meta)) != 0)
+/*
+ * __bam_new_file --
+ * Create the necessary pages to begin a new database file.
+ *
+ * This code appears more complex than it is because of the two cases (named
+ * and unnamed). The way to read the code is that for each page being created,
+ * there are three parts: 1) a "get page" chunk (which either uses malloc'd
+ * memory or calls mpf->get), 2) the initialization, and 3) the "put page"
+ * chunk which either does a fop write or an mpf->put.
+ *
+ * PUBLIC: int __bam_new_file __P((DB *, DB_TXN *, DB_FH *, const char *));
+ */
+int
+__bam_new_file(dbp, txn, fhp, name)
+ DB *dbp;
+ DB_TXN *txn;
+ DB_FH *fhp;
+ const char *name;
+{
+ BTMETA *meta;
+ DB_ENV *dbenv;
+ DB_LSN lsn;
+ DB_MPOOLFILE *mpf;
+ DB_PGINFO pginfo;
+ DBT pdbt;
+ PAGE *root;
+ db_pgno_t pgno;
+ int ret;
+ void *buf;
+
+ dbenv = dbp->dbenv;
+ mpf = dbp->mpf;
+ root = NULL;
+ meta = NULL;
+ memset(&pdbt, 0, sizeof(pdbt));
+
+ /* Build meta-data page. */
+
+ if (name == NULL) {
+ pgno = PGNO_BASE_MD;
+ ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &meta);
+ } else {
+ pginfo.db_pagesize = dbp->pgsize;
+ pginfo.flags =
+ F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP));
+ pginfo.type = dbp->type;
+ pdbt.data = &pginfo;
+ pdbt.size = sizeof(pginfo);
+ ret = __os_calloc(dbp->dbenv, 1, dbp->pgsize, &buf);
+ meta = (BTMETA *)buf;
+ }
+ if (ret != 0)
+ return (ret);
+
+ LSN_NOT_LOGGED(lsn);
+ __bam_init_meta(dbp, meta, PGNO_BASE_MD, &lsn);
+ meta->root = 1;
+ meta->dbmeta.last_pgno = 1;
+
+ if (name == NULL)
+ ret = mpf->put(mpf, meta, DB_MPOOL_DIRTY);
+ else {
+ if ((ret = __db_pgout(dbenv, PGNO_BASE_MD, meta, &pdbt)) != 0)
+ goto err;
+ ret = __fop_write(dbenv,
+ txn, name, DB_APP_DATA, fhp, 0, buf, dbp->pgsize, 1);
+ }
+ if (ret != 0)
+ goto err;
+ meta = NULL;
+
+ /* Now build root page. */
+ if (name == NULL) {
+ pgno = 1;
+ if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &root)) != 0)
+ goto err;
+ } else {
+#ifdef DIAGNOSTIC
+ memset(buf, dbp->pgsize, 0);
+#endif
+ root = (PAGE *)buf;
+ }
+
+ P_INIT(root, dbp->pgsize, 1, PGNO_INVALID, PGNO_INVALID,
+ LEAFLEVEL, dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE);
+ LSN_NOT_LOGGED(root->lsn);
+
+ if (name == NULL)
+ ret = mpf->put(mpf, root, DB_MPOOL_DIRTY);
+ else {
+ if ((ret = __db_pgout(dbenv, root->pgno, root, &pdbt)) != 0)
+ goto err;
+ ret = __fop_write(dbenv, txn,
+ name, DB_APP_DATA, fhp, dbp->pgsize, buf, dbp->pgsize, 1);
+ }
+ if (ret != 0)
+ goto err;
+ root = NULL;
+
+err: if (name != NULL)
+ __os_free(dbenv, buf);
+ else {
+ if (meta != NULL)
+ (void)mpf->put(mpf, meta, 0);
+ if (root != NULL)
+ (void)mpf->put(mpf, root, 0);
+ }
+ return (ret);
+}
+
+/*
+ * __bam_new_subdb --
+ * Create a metadata page and a root page for a new btree.
+ *
+ * PUBLIC: int __bam_new_subdb __P((DB *, DB *, DB_TXN *));
+ */
+int
+__bam_new_subdb(mdbp, dbp, txn)
+ DB *mdbp, *dbp;
+ DB_TXN *txn;
+{
+ BTMETA *meta;
+ DBC *dbc;
+ DB_ENV *dbenv;
+ DB_LOCK metalock;
+ DB_LSN lsn;
+ DB_MPOOLFILE *mpf;
+ PAGE *root;
+ int ret, t_ret;
+
+ dbenv = mdbp->dbenv;
+ mpf = mdbp->mpf;
+ dbc = NULL;
+ meta = NULL;
+ root = NULL;
+
+ if ((ret = mdbp->cursor(mdbp, txn,
+ &dbc, CDB_LOCKING(dbenv) ? DB_WRITECURSOR : 0)) != 0)
+ return (ret);
+
+ /* Get, and optionally create the metadata page. */
+ if ((ret = __db_lget(dbc,
+ 0, dbp->meta_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
+ goto err;
+ if ((ret = mpf->get(mpf, &dbp->meta_pgno, DB_MPOOL_CREATE, &meta)) != 0)
+ goto err;
+
+ /* Build meta-data page. */
+ lsn = meta->dbmeta.lsn;
+ __bam_init_meta(dbp, meta, dbp->meta_pgno, &lsn);
+ if ((ret = __db_log_page(mdbp,
+ txn, &meta->dbmeta.lsn, dbp->meta_pgno, (PAGE *)meta)) != 0)
goto err;
/* Create and initialize a root page. */
@@ -401,68 +571,35 @@ again: if (meta->dbmeta.magic != 0) {
goto err;
root->level = LEAFLEVEL;
- if (dbp->open_txn != NULL && (ret = __bam_root_log(dbp->dbenv,
- dbp->open_txn, &meta->dbmeta.lsn, 0, dbp->log_fileid,
+ if (DBENV_LOGGING(dbenv) &&
+ (ret = __bam_root_log(mdbp, txn, &meta->dbmeta.lsn, 0,
meta->dbmeta.pgno, root->pgno, &meta->dbmeta.lsn)) != 0)
goto err;
meta->root = root->pgno;
-
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOGMETA, ret, name);
- if ((ret = __db_log_page(dbp,
- name, &root->lsn, root->pgno, root)) != 0)
+ if ((ret =
+ __db_log_page(mdbp, txn, &root->lsn, root->pgno, root)) != 0)
goto err;
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, name);
-
- t->bt_meta = base_pgno;
- t->bt_root = root->pgno;
/* Release the metadata and root pages. */
- if ((ret = memp_fput(dbp->mpf, meta, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->put(mpf, meta, DB_MPOOL_DIRTY)) != 0)
goto err;
meta = NULL;
- if ((ret = memp_fput(dbp->mpf, root, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->put(mpf, root, DB_MPOOL_DIRTY)) != 0)
goto err;
root = NULL;
-
- /*
- * Flush the metadata and root pages to disk.
- *
- * !!!
- * It's not useful to return not-yet-flushed here -- convert it to
- * an error.
- */
- if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE) {
- __db_err(dbp->dbenv, "Metapage flush failed");
- ret = EINVAL;
- }
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name);
-
-done: /*
- * !!!
- * We already did an insert and so the last-page-inserted has been
- * set. I'm not sure where the *right* place to clear this value
- * is, it's not intuitively obvious that it belongs here.
- */
- t->bt_lpgno = PGNO_INVALID;
-
err:
-DB_TEST_RECOVERY_LABEL
- /* Put any remaining pages back. */
if (meta != NULL)
- if ((t_ret = memp_fput(dbp->mpf, meta, 0)) != 0 &&
- ret == 0)
+ if ((t_ret = mpf->put(mpf, meta, 0)) != 0 && ret == 0)
ret = t_ret;
if (root != NULL)
- if ((t_ret = memp_fput(dbp->mpf, root, 0)) != 0 &&
- ret == 0)
+ if ((t_ret = mpf->put(mpf, root, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ if (LOCK_ISSET(metalock))
+ if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0)
+ ret = t_ret;
+ if (dbc != NULL)
+ if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
ret = t_ret;
-
- /* We can release the metapage lock when we are done. */
- if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0)
- ret = t_ret;
-
- if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
return (ret);
}
diff --git a/bdb/btree/bt_put.c b/bdb/btree/bt_put.c
index 19a04526d1b..39bd2024e76 100644
--- a/bdb/btree/bt_put.c
+++ b/bdb/btree/bt_put.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,7 +43,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_put.c,v 11.46 2001/01/17 18:48:46 bostic Exp $";
+static const char revid[] = "$Id: bt_put.c,v 11.69 2002/08/06 06:11:12 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -53,12 +53,16 @@ static const char revid[] = "$Id: bt_put.c,v 11.46 2001/01/17 18:48:46 bostic Ex
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+static int __bam_build
+ __P((DBC *, u_int32_t, DBT *, PAGE *, u_int32_t, u_int32_t));
static int __bam_dup_convert __P((DBC *, PAGE *, u_int32_t));
static int __bam_ovput
__P((DBC *, u_int32_t, db_pgno_t, PAGE *, u_int32_t, DBT *));
+static u_int32_t
+ __bam_partsize __P((DB *, u_int32_t, DBT *, PAGE *, u_int32_t));
/*
* __bam_iitem --
@@ -77,6 +81,7 @@ __bam_iitem(dbc, key, data, op, flags)
BTREE_CURSOR *cp;
DB *dbp;
DBT bk_hdr, tdbt;
+ DB_MPOOLFILE *mpf;
PAGE *h;
db_indx_t indx;
u_int32_t data_size, have_bytes, need_bytes, needed;
@@ -85,6 +90,7 @@ __bam_iitem(dbc, key, data, op, flags)
COMPQUIET(bk, NULL);
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
t = dbp->bt_internal;
h = cp->page;
@@ -95,7 +101,7 @@ __bam_iitem(dbc, key, data, op, flags)
* Fixed-length records with partial puts: it's an error to specify
* anything other simple overwrite.
*/
- if (F_ISSET(dbp, DB_RE_FIXEDLEN) &&
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN) &&
F_ISSET(data, DB_DBT_PARTIAL) && data->dlen != data->size) {
data_size = data->size;
goto len_err;
@@ -110,16 +116,18 @@ __bam_iitem(dbc, key, data, op, flags)
* the fixed-length record size.
*/
data_size = F_ISSET(data, DB_DBT_PARTIAL) ?
- __bam_partsize(op, data, h, indx) : data->size;
+ __bam_partsize(dbp, op, data, h, indx) : data->size;
padrec = 0;
- if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN)) {
if (data_size > t->re_len) {
len_err: __db_err(dbp->dbenv,
"Length improper for fixed length record %lu",
(u_long)data_size);
return (EINVAL);
}
- if (data_size < t->re_len) {
+
+ /* Records that are deleted anyway needn't be padded out. */
+ if (!LF_ISSET(BI_DELETED) && data_size < t->re_len) {
padrec = 1;
data_size = t->re_len;
}
@@ -146,8 +154,8 @@ len_err: __db_err(dbp->dbenv,
*/
if (op == DB_CURRENT && dbp->dup_compare != NULL) {
if ((ret = __bam_cmp(dbp, data, h,
- indx + (TYPE(h) == P_LBTREE ? O_INDX : 0),
- dbp->dup_compare, &cmp)) != 0)
+ indx + (TYPE(h) == P_LBTREE ? O_INDX : 0),
+ dbp->dup_compare, &cmp)) != 0)
return (ret);
if (cmp != 0) {
__db_err(dbp->dbenv,
@@ -190,7 +198,7 @@ len_err: __db_err(dbp->dbenv,
*/
bigkey = 0;
if (op == DB_CURRENT) {
- bk = GET_BKEYDATA(h,
+ bk = GET_BKEYDATA(dbp, h,
indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
if (B_TYPE(bk->type) == B_KEYDATA)
have_bytes = BKEYDATA_PSIZE(bk->len);
@@ -221,7 +229,7 @@ len_err: __db_err(dbp->dbenv,
* The t->bt_maxkey test here may be insufficient -- do we have to
* check in the btree split code, so we don't undo it there!?!?
*/
- if (P_FREESPACE(h) < needed ||
+ if (P_FREESPACE(dbp, h) < needed ||
(t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey))
return (DB_NEEDSPLIT);
@@ -328,6 +336,11 @@ len_err: __db_err(dbp->dbenv,
/* Add the data. */
if (bigdata) {
+ /*
+ * We do not have to handle deleted (BI_DELETED) records
+ * in this case; the actual records should never be created.
+ */
+ DB_ASSERT(!LF_ISSET(BI_DELETED));
if ((ret = __bam_ovput(dbc,
B_OVERFLOW, PGNO_INVALID, h, indx, data)) != 0)
return (ret);
@@ -347,7 +360,7 @@ len_err: __db_err(dbp->dbenv,
if (ret != 0)
return (ret);
}
- if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->set(mpf, h, DB_MPOOL_DIRTY)) != 0)
return (ret);
/*
@@ -375,7 +388,7 @@ len_err: __db_err(dbp->dbenv,
* up at least 25% of the space on the page. If it does, move it onto
* its own page.
*/
- if (dupadjust && P_FREESPACE(h) <= dbp->pgsize / 2) {
+ if (dupadjust && P_FREESPACE(dbp, h) <= dbp->pgsize / 2) {
if ((ret = __bam_dup_convert(dbc, h, indx - O_INDX)) != 0)
return (ret);
}
@@ -390,11 +403,10 @@ len_err: __db_err(dbp->dbenv,
/*
* __bam_partsize --
* Figure out how much space a partial data item is in total.
- *
- * PUBLIC: u_int32_t __bam_partsize __P((u_int32_t, DBT *, PAGE *, u_int32_t));
*/
-u_int32_t
-__bam_partsize(op, data, h, indx)
+static u_int32_t
+__bam_partsize(dbp, op, data, h, indx)
+ DB *dbp;
u_int32_t op, indx;
DBT *data;
PAGE *h;
@@ -413,38 +425,18 @@ __bam_partsize(op, data, h, indx)
* Otherwise, it's the data provided plus any already existing data
* that we're not replacing.
*/
- bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
+ bk = GET_BKEYDATA(dbp, h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
nbytes =
B_TYPE(bk->type) == B_OVERFLOW ? ((BOVERFLOW *)bk)->tlen : bk->len;
- /*
- * There are really two cases here:
- *
- * Case 1: We are replacing some bytes that do not exist (i.e., they
- * are past the end of the record). In this case the number of bytes
- * we are replacing is irrelevant and all we care about is how many
- * bytes we are going to add from offset. So, the new record length
- * is going to be the size of the new bytes (size) plus wherever those
- * new bytes begin (doff).
- *
- * Case 2: All the bytes we are replacing exist. Therefore, the new
- * size is the oldsize (nbytes) minus the bytes we are replacing (dlen)
- * plus the bytes we are adding (size).
- */
- if (nbytes < data->doff + data->dlen) /* Case 1 */
- return (data->doff + data->size);
-
- return (nbytes + data->size - data->dlen); /* Case 2 */
+ return (__db_partsize(nbytes, data));
}
/*
* __bam_build --
* Build the real record for a partial put, or short fixed-length record.
- *
- * PUBLIC: int __bam_build __P((DBC *, u_int32_t,
- * PUBLIC: DBT *, PAGE *, u_int32_t, u_int32_t));
*/
-int
+static int
__bam_build(dbc, op, dbt, h, indx, nbytes)
DBC *dbc;
u_int32_t op, indx, nbytes;
@@ -454,9 +446,8 @@ __bam_build(dbc, op, dbt, h, indx, nbytes)
BKEYDATA *bk, tbk;
BOVERFLOW *bo;
BTREE *t;
- BTREE_CURSOR *cp;
DB *dbp;
- DBT copy;
+ DBT copy, *rdata;
u_int32_t len, tlen;
u_int8_t *p;
int ret;
@@ -464,26 +455,26 @@ __bam_build(dbc, op, dbt, h, indx, nbytes)
COMPQUIET(bo, NULL);
dbp = dbc->dbp;
- cp = (BTREE_CURSOR *) dbc->internal;
t = dbp->bt_internal;
/* We use the record data return memory, it's only a short-term use. */
- if (dbc->rdata.ulen < nbytes) {
+ rdata = &dbc->my_rdata;
+ if (rdata->ulen < nbytes) {
if ((ret = __os_realloc(dbp->dbenv,
- nbytes, NULL, &dbc->rdata.data)) != 0) {
- dbc->rdata.ulen = 0;
- dbc->rdata.data = NULL;
+ nbytes, &rdata->data)) != 0) {
+ rdata->ulen = 0;
+ rdata->data = NULL;
return (ret);
}
- dbc->rdata.ulen = nbytes;
+ rdata->ulen = nbytes;
}
/*
* We use nul or pad bytes for any part of the record that isn't
* specified; get it over with.
*/
- memset(dbc->rdata.data,
- F_ISSET(dbp, DB_RE_FIXEDLEN) ? t->re_pad : 0, nbytes);
+ memset(rdata->data,
+ F_ISSET(dbp, DB_AM_FIXEDLEN) ? t->re_pad : 0, nbytes);
/*
* In the next clauses, we need to do three things: a) set p to point
@@ -495,14 +486,15 @@ __bam_build(dbc, op, dbt, h, indx, nbytes)
* the chase.
*/
if (!F_ISSET(dbt, DB_DBT_PARTIAL) || op != DB_CURRENT) {
- p = (u_int8_t *)dbc->rdata.data + dbt->doff;
+ p = (u_int8_t *)rdata->data + dbt->doff;
tlen = dbt->doff;
goto user_copy;
}
/* Find the current record. */
if (indx < NUM_ENT(h)) {
- bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
+ bk = GET_BKEYDATA(dbp, h, indx + (TYPE(h) == P_LBTREE ?
+ O_INDX : 0));
bo = (BOVERFLOW *)bk;
} else {
bk = &tbk;
@@ -516,12 +508,12 @@ __bam_build(dbc, op, dbt, h, indx, nbytes)
*/
memset(&copy, 0, sizeof(copy));
if ((ret = __db_goff(dbp, &copy, bo->tlen,
- bo->pgno, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
+ bo->pgno, &rdata->data, &rdata->ulen)) != 0)
return (ret);
/* Skip any leading data from the original record. */
tlen = dbt->doff;
- p = (u_int8_t *)dbc->rdata.data + dbt->doff;
+ p = (u_int8_t *)rdata->data + dbt->doff;
/*
* Copy in any trailing data from the original record.
@@ -542,10 +534,10 @@ __bam_build(dbc, op, dbt, h, indx, nbytes)
}
} else {
/* Copy in any leading data from the original record. */
- memcpy(dbc->rdata.data,
+ memcpy(rdata->data,
bk->data, dbt->doff > bk->len ? bk->len : dbt->doff);
tlen = dbt->doff;
- p = (u_int8_t *)dbc->rdata.data + dbt->doff;
+ p = (u_int8_t *)rdata->data + dbt->doff;
/* Copy in any trailing data from the original record. */
len = dbt->doff + dbt->dlen;
@@ -564,11 +556,11 @@ user_copy:
tlen += dbt->size;
/* Set the DBT to reference our new record. */
- dbc->rdata.size = F_ISSET(dbp, DB_RE_FIXEDLEN) ? t->re_len : tlen;
- dbc->rdata.dlen = 0;
- dbc->rdata.doff = 0;
- dbc->rdata.flags = 0;
- *dbt = dbc->rdata;
+ rdata->size = F_ISSET(dbp, DB_AM_FIXEDLEN) ? t->re_len : tlen;
+ rdata->dlen = 0;
+ rdata->doff = 0;
+ rdata->flags = 0;
+ *dbt = *rdata;
return (0);
}
@@ -591,6 +583,7 @@ __bam_ritem(dbc, h, indx, data)
db_indx_t cnt, lo, ln, min, off, prefix, suffix;
int32_t nbytes;
int ret;
+ db_indx_t *inp;
u_int8_t *p, *t;
dbp = dbc->dbp;
@@ -600,10 +593,10 @@ __bam_ritem(dbc, h, indx, data)
* to insert and whether it fits is handled in the caller. All we do
* here is manage the page shuffling.
*/
- bk = GET_BKEYDATA(h, indx);
+ bk = GET_BKEYDATA(dbp, h, indx);
/* Log the change. */
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
/*
* We might as well check to see if the two data items share
* a common prefix and suffix -- it can save us a lot of log
@@ -627,17 +620,18 @@ __bam_ritem(dbc, h, indx, data)
orig.size = bk->len - (prefix + suffix);
repl.data = (u_int8_t *)data->data + prefix;
repl.size = data->size - (prefix + suffix);
- if ((ret = __bam_repl_log(dbp->dbenv, dbc->txn,
- &LSN(h), 0, dbp->log_fileid, PGNO(h), &LSN(h),
- (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type),
+ if ((ret = __bam_repl_log(dbp, dbc->txn, &LSN(h), 0, PGNO(h),
+ &LSN(h), (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type),
&orig, &repl, (u_int32_t)prefix, (u_int32_t)suffix)) != 0)
return (ret);
- }
+ } else
+ LSN_NOT_LOGGED(LSN(h));
/*
* Set references to the first in-use byte on the page and the
* first byte of the item being replaced.
*/
+ inp = P_INP(dbp, h);
p = (u_int8_t *)h + HOFFSET(h);
t = (u_int8_t *)bk;
@@ -648,19 +642,19 @@ __bam_ritem(dbc, h, indx, data)
* the regions overlap.
*/
lo = BKEYDATA_SIZE(bk->len);
- ln = BKEYDATA_SIZE(data->size);
+ ln = (db_indx_t)BKEYDATA_SIZE(data->size);
if (lo != ln) {
nbytes = lo - ln; /* Signed difference. */
if (p == t) /* First index is fast. */
- h->inp[indx] += nbytes;
+ inp[indx] += nbytes;
else { /* Else, shift the page. */
memmove(p + nbytes, p, t - p);
/* Adjust the indices' offsets. */
- off = h->inp[indx];
+ off = inp[indx];
for (cnt = 0; cnt < NUM_ENT(h); ++cnt)
- if (h->inp[cnt] <= off)
- h->inp[cnt] += nbytes;
+ if (inp[cnt] <= off)
+ inp[cnt] += nbytes;
}
/* Clean up the page and adjust the item's reference. */
@@ -688,30 +682,31 @@ __bam_dup_convert(dbc, h, indx)
PAGE *h;
u_int32_t indx;
{
- BTREE_CURSOR *cp;
BKEYDATA *bk;
DB *dbp;
DBT hdr;
+ DB_MPOOLFILE *mpf;
PAGE *dp;
- db_indx_t cnt, cpindx, dindx, first, sz;
+ db_indx_t cnt, cpindx, dindx, first, *inp, sz;
int ret;
dbp = dbc->dbp;
- cp = (BTREE_CURSOR *)dbc->internal;
+ mpf = dbp->mpf;
+ inp = P_INP(dbp, h);
/*
* Count the duplicate records and calculate how much room they're
* using on the page.
*/
- while (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
+ while (indx > 0 && inp[indx] == inp[indx - P_INDX])
indx -= P_INDX;
for (cnt = 0, sz = 0, first = indx;; ++cnt, indx += P_INDX) {
- if (indx >= NUM_ENT(h) || h->inp[first] != h->inp[indx])
+ if (indx >= NUM_ENT(h) || inp[first] != inp[indx])
break;
- bk = GET_BKEYDATA(h, indx);
+ bk = GET_BKEYDATA(dbp, h, indx);
sz += B_TYPE(bk->type) == B_KEYDATA ?
BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
- bk = GET_BKEYDATA(h, indx + O_INDX);
+ bk = GET_BKEYDATA(dbp, h, indx + O_INDX);
sz += B_TYPE(bk->type) == B_KEYDATA ?
BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
}
@@ -766,7 +761,7 @@ __bam_dup_convert(dbc, h, indx)
* deleted entries are discarded (if the deleted entry is
* overflow, then free up those pages).
*/
- bk = GET_BKEYDATA(h, dindx + 1);
+ bk = GET_BKEYDATA(dbp, h, dindx + 1);
hdr.data = bk;
hdr.size = B_TYPE(bk->type) == B_KEYDATA ?
BKEYDATA_SIZE(bk->len) : BOVERFLOW_SIZE;
@@ -778,7 +773,7 @@ __bam_dup_convert(dbc, h, indx)
*/
if (B_TYPE(bk->type) == B_OVERFLOW &&
(ret = __db_doff(dbc,
- (GET_BOVERFLOW(h, dindx + 1))->pgno)) != 0)
+ (GET_BOVERFLOW(dbp, h, dindx + 1))->pgno)) != 0)
goto err;
} else {
if ((ret = __db_pitem(
@@ -802,7 +797,7 @@ __bam_dup_convert(dbc, h, indx)
/* Put in a new data item that points to the duplicates page. */
if ((ret = __bam_ovput(dbc,
- B_DUPLICATE, dp->pgno, h, first + 1, NULL)) != 0)
+ B_DUPLICATE, dp->pgno, h, first + 1, NULL)) != 0)
goto err;
/* Adjust cursors for all the above movments. */
@@ -810,9 +805,9 @@ __bam_dup_convert(dbc, h, indx)
PGNO(h), first + P_INDX, first + P_INDX - indx)) != 0)
goto err;
- return (memp_fput(dbp->mpf, dp, DB_MPOOL_DIRTY));
+ return (mpf->put(mpf, dp, DB_MPOOL_DIRTY));
-err: (void)__db_free(dbc, dp);
+err: (void)mpf->put(mpf, dp, 0);
return (ret);
}
diff --git a/bdb/btree/bt_rec.c b/bdb/btree/bt_rec.c
index 24dc9bc6a6e..b6443547aa5 100644
--- a/bdb/btree/bt_rec.c
+++ b/bdb/btree/bt_rec.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_rec.c,v 11.35 2001/01/10 16:24:47 ubell Exp $";
+static const char revid[] = "$Id: bt_rec.c,v 11.57 2002/08/06 16:53:53 ubell Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -18,287 +18,17 @@ static const char revid[] = "$Id: bt_rec.c,v 11.35 2001/01/10 16:24:47 ubell Exp
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "hash.h"
-#include "btree.h"
-#include "log.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/btree.h"
+#include "dbinc/lock.h"
+#include "dbinc/log.h"
#define IS_BTREE_PAGE(pagep) \
(TYPE(pagep) == P_IBTREE || \
TYPE(pagep) == P_LBTREE || TYPE(pagep) == P_LDUP)
/*
- * __bam_pg_alloc_recover --
- * Recovery function for pg_alloc.
- *
- * PUBLIC: int __bam_pg_alloc_recover
- * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__bam_pg_alloc_recover(dbenv, dbtp, lsnp, op, info)
- DB_ENV *dbenv;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __bam_pg_alloc_args *argp;
- DB *file_dbp;
- DBC *dbc;
- DBMETA *meta;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- db_pgno_t pgno;
- int cmp_n, cmp_p, level, modified, ret;
-
- REC_PRINT(__bam_pg_alloc_print);
- REC_INTRO(__bam_pg_alloc_read, 0);
-
- /*
- * Fix up the allocated page. If we're redoing the operation, we have
- * to get the page (creating it if it doesn't exist), and update its
- * LSN. If we're undoing the operation, we have to reset the page's
- * LSN and put it on the free list.
- *
- * Fix up the metadata page. If we're redoing the operation, we have
- * to get the metadata page and update its LSN and its free pointer.
- * If we're undoing the operation and the page was ever created, we put
- * it on the freelist.
- */
- pgno = PGNO_BASE_MD;
- meta = NULL;
- if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) {
- /* The metadata page must always exist on redo. */
- if (DB_REDO(op)) {
- (void)__db_pgerr(file_dbp, pgno);
- goto out;
- } else
- goto done;
- }
- if ((ret = memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
- /*
- * We specify creation and check for it later, because this
- * operation was supposed to create the page, and even in
- * the undo case it's going to get linked onto the freelist
- * which we're also fixing up.
- */
- (void)__db_pgerr(file_dbp, argp->pgno);
- goto err;
- }
-
- /* Fix up the allocated page. */
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(pagep));
- cmp_p = log_compare(&LSN(pagep), &argp->page_lsn);
-
- /*
- * If an inital allocation is aborted and then reallocated
- * during an archival restore the log record will have
- * an LSN for the page but the page will be empty.
- */
- if (IS_ZERO_LSN(LSN(pagep)))
- cmp_p = 0;
- CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->page_lsn);
- /*
- * If we we rolled back this allocation previously during an
- * archive restore, the page may have the LSN of the meta page
- * at the point of the roll back. This will be no more
- * than the LSN of the metadata page at the time of this allocation.
- */
- if (DB_REDO(op) &&
- (cmp_p == 0 ||
- (IS_ZERO_LSN(argp->page_lsn) &&
- log_compare(&LSN(pagep), &argp->meta_lsn) <= 0))) {
- /* Need to redo update described. */
- switch (argp->ptype) {
- case P_LBTREE:
- case P_LRECNO:
- case P_LDUP:
- level = LEAFLEVEL;
- break;
- default:
- level = 0;
- break;
- }
- P_INIT(pagep, file_dbp->pgsize,
- argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype);
-
- pagep->lsn = *lsnp;
- modified = 1;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- /*
- * Undo the allocation, reinitialize the page and
- * link its next pointer to the free list.
- */
- P_INIT(pagep, file_dbp->pgsize,
- argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
-
- pagep->lsn = argp->page_lsn;
- modified = 1;
- }
-
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
- goto err;
- }
-
- /*
- * If the page was newly created, put it on the limbo list.
- */
- if (IS_ZERO_LSN(LSN(pagep)) &&
- IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) {
- /* Put the page in limbo.*/
- if ((ret = __db_add_limbo(dbenv,
- info, argp->fileid, argp->pgno, 1)) != 0)
- goto err;
- }
-
- /* Fix up the metadata page. */
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(meta));
- cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
- CHECK_LSN(op, cmp_p, &LSN(meta), &argp->meta_lsn);
- if (cmp_p == 0 && DB_REDO(op)) {
- /* Need to redo update described. */
- LSN(meta) = *lsnp;
- meta->free = argp->next;
- modified = 1;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- /* Need to undo update described. */
- LSN(meta) = argp->meta_lsn;
-
- /*
- * If the page has a zero LSN then its newly created
- * and will go into limbo rather than directly on the
- * free list.
- */
- if (!IS_ZERO_LSN(argp->page_lsn))
- meta->free = argp->pgno;
- modified = 1;
- }
- if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0)
- goto out;
- /*
- * This could be the metapage from a subdb which is read from disk
- * to recover its creation.
- */
- if (F_ISSET(file_dbp, DB_AM_SUBDB))
- switch (argp->type) {
- case P_BTREEMETA:
- case P_HASHMETA:
- case P_QAMMETA:
- file_dbp->sync(file_dbp, 0);
- break;
- }
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
- if (0) {
-err:
- if (meta != NULL)
- (void)memp_fput(mpf, meta, 0);
- }
-out: REC_CLOSE;
-}
-
-/*
- * __bam_pg_free_recover --
- * Recovery function for pg_free.
- *
- * PUBLIC: int __bam_pg_free_recover
- * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__bam_pg_free_recover(dbenv, dbtp, lsnp, op, info)
- DB_ENV *dbenv;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __bam_pg_free_args *argp;
- DB *file_dbp;
- DBC *dbc;
- DBMETA *meta;
- DB_LSN copy_lsn;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- db_pgno_t pgno;
- int cmp_n, cmp_p, modified, ret;
-
- COMPQUIET(info, NULL);
- REC_PRINT(__bam_pg_free_print);
- REC_INTRO(__bam_pg_free_read, 1);
-
- /*
- * Fix up the freed page. If we're redoing the operation we get the
- * page and explicitly discard its contents, then update its LSN. If
- * we're undoing the operation, we get the page and restore its header.
- * Create the page if necessary, we may be freeing an aborted
- * create.
- */
- if ((ret = memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
- goto out;
- modified = 0;
- __ua_memcpy(&copy_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
- cmp_n = log_compare(lsnp, &LSN(pagep));
- cmp_p = log_compare(&LSN(pagep), &copy_lsn);
- CHECK_LSN(op, cmp_p, &LSN(pagep), &copy_lsn);
- if (DB_REDO(op) &&
- (cmp_p == 0 ||
- (IS_ZERO_LSN(copy_lsn) &&
- log_compare(&LSN(pagep), &argp->meta_lsn) <= 0))) {
- /* Need to redo update described. */
- P_INIT(pagep, file_dbp->pgsize,
- argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
- pagep->lsn = *lsnp;
-
- modified = 1;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- /* Need to undo update described. */
- memcpy(pagep, argp->header.data, argp->header.size);
-
- modified = 1;
- }
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
- goto out;
-
- /*
- * Fix up the metadata page. If we're redoing or undoing the operation
- * we get the page and update its LSN and free pointer.
- */
- pgno = PGNO_BASE_MD;
- if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) {
- /* The metadata page must always exist. */
- (void)__db_pgerr(file_dbp, pgno);
- goto out;
- }
-
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(meta));
- cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
- CHECK_LSN(op, cmp_p, &LSN(meta), &argp->meta_lsn);
- if (cmp_p == 0 && DB_REDO(op)) {
- /* Need to redo the deallocation. */
- meta->free = argp->pgno;
- LSN(meta) = *lsnp;
- modified = 1;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- /* Need to undo the deallocation. */
- meta->free = argp->next;
- LSN(meta) = argp->meta_lsn;
- modified = 1;
- }
- if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0)
- goto out;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: REC_CLOSE;
-}
-
-/*
* __bam_split_recover --
* Recovery function for split.
*
@@ -320,7 +50,7 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info)
PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp;
db_pgno_t pgno, root_pgno;
u_int32_t ptype;
- int cmp, l_update, p_update, r_update, rc, ret, rootsplit, t_ret;
+ int cmp, l_update, p_update, r_update, rc, ret, ret_l, rootsplit, t_ret;
COMPQUIET(info, NULL);
REC_PRINT(__bam_split_print);
@@ -345,16 +75,16 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info)
* so it's got to be aligned. Copying it into allocated memory is
* the only way to guarantee this.
*/
- if ((ret = __os_malloc(dbenv, argp->pg.size, NULL, &sp)) != 0)
+ if ((ret = __os_malloc(dbenv, argp->pg.size, &sp)) != 0)
goto out;
memcpy(sp, argp->pg.data, argp->pg.size);
pgno = PGNO(sp);
root_pgno = argp->root_pgno;
- rootsplit = pgno == root_pgno;
- if (memp_fget(mpf, &argp->left, 0, &lp) != 0)
+ rootsplit = root_pgno != PGNO_INVALID;
+ if ((ret_l = mpf->get(mpf, &argp->left, 0, &lp)) != 0)
lp = NULL;
- if (memp_fget(mpf, &argp->right, 0, &rp) != 0)
+ if (mpf->get(mpf, &argp->right, 0, &rp) != 0)
rp = NULL;
if (DB_REDO(op)) {
@@ -368,8 +98,8 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info)
* same reason.
*/
if (rootsplit) {
- if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) {
- (void)__db_pgerr(file_dbp, pgno);
+ if ((ret = mpf->get(mpf, &pgno, 0, &pp)) != 0) {
+ __db_pgerr(file_dbp, pgno, ret);
pp = NULL;
goto out;
}
@@ -377,7 +107,7 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info)
CHECK_LSN(op, cmp, &LSN(pp), &LSN(argp->pg.data));
p_update = cmp == 0;
} else if (lp == NULL) {
- (void)__db_pgerr(file_dbp, argp->left);
+ __db_pgerr(file_dbp, argp->left, ret_l);
goto out;
}
@@ -400,10 +130,8 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info)
goto check_next;
/* Allocate and initialize new left/right child pages. */
- if ((ret =
- __os_malloc(dbenv, file_dbp->pgsize, NULL, &_lp)) != 0
- || (ret =
- __os_malloc(dbenv, file_dbp->pgsize, NULL, &_rp)) != 0)
+ if ((ret = __os_malloc(dbenv, file_dbp->pgsize, &_lp)) != 0 ||
+ (ret = __os_malloc(dbenv, file_dbp->pgsize, &_rp)) != 0)
goto out;
if (rootsplit) {
P_INIT(_lp, file_dbp->pgsize, argp->left,
@@ -431,31 +159,31 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info)
goto out;
/* If the left child is wrong, update it. */
- if (lp == NULL && (ret =
- memp_fget(mpf, &argp->left, DB_MPOOL_CREATE, &lp)) != 0) {
- (void)__db_pgerr(file_dbp, argp->left);
+ if (lp == NULL && (ret = mpf->get(
+ mpf, &argp->left, DB_MPOOL_CREATE, &lp)) != 0) {
+ __db_pgerr(file_dbp, argp->left, ret);
lp = NULL;
goto out;
}
if (l_update) {
memcpy(lp, _lp, file_dbp->pgsize);
lp->lsn = *lsnp;
- if ((ret = memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->put(mpf, lp, DB_MPOOL_DIRTY)) != 0)
goto out;
lp = NULL;
}
/* If the right child is wrong, update it. */
- if (rp == NULL && (ret = memp_fget(mpf,
- &argp->right, DB_MPOOL_CREATE, &rp)) != 0) {
- (void)__db_pgerr(file_dbp, argp->right);
+ if (rp == NULL && (ret = mpf->get(
+ mpf, &argp->right, DB_MPOOL_CREATE, &rp)) != 0) {
+ __db_pgerr(file_dbp, argp->right, ret);
rp = NULL;
goto out;
}
if (r_update) {
memcpy(rp, _rp, file_dbp->pgsize);
rp->lsn = *lsnp;
- if ((ret = memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->put(mpf, rp, DB_MPOOL_DIRTY)) != 0)
goto out;
rp = NULL;
}
@@ -477,11 +205,11 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info)
P_INIT(pp, file_dbp->pgsize, root_pgno,
PGNO_INVALID, PGNO_INVALID, _lp->level + 1, ptype);
- RE_NREC_SET(pp,
- rc ? __bam_total(_lp) + __bam_total(_rp) : 0);
+ RE_NREC_SET(pp, rc ? __bam_total(file_dbp, _lp) +
+ __bam_total(file_dbp, _rp) : 0);
pp->lsn = *lsnp;
- if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->put(mpf, pp, DB_MPOOL_DIRTY)) != 0)
goto out;
pp = NULL;
}
@@ -494,8 +222,8 @@ check_next: /*
* page must exist because we're redoing the operation.
*/
if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) {
- if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
- (void)__db_pgerr(file_dbp, argp->npgno);
+ if ((ret = mpf->get(mpf, &argp->npgno, 0, &np)) != 0) {
+ __db_pgerr(file_dbp, argp->npgno, ret);
np = NULL;
goto out;
}
@@ -505,7 +233,7 @@ check_next: /*
PREV_PGNO(np) = argp->right;
np->lsn = *lsnp;
if ((ret =
- memp_fput(mpf, np, DB_MPOOL_DIRTY)) != 0)
+ mpf->put(mpf, np, DB_MPOOL_DIRTY)) != 0)
goto out;
np = NULL;
}
@@ -518,13 +246,13 @@ check_next: /*
* the adds onto the page that caused the split, and there's
* really no undo-ing to be done.
*/
- if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) {
+ if ((ret = mpf->get(mpf, &pgno, 0, &pp)) != 0) {
pp = NULL;
goto lrundo;
}
if (log_compare(lsnp, &LSN(pp)) == 0) {
memcpy(pp, argp->pg.data, argp->pg.size);
- if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->put(mpf, pp, DB_MPOOL_DIRTY)) != 0)
goto out;
pp = NULL;
}
@@ -542,7 +270,7 @@ lrundo: if ((rootsplit && lp != NULL) || rp != NULL) {
log_compare(lsnp, &LSN(lp)) == 0) {
lp->lsn = argp->llsn;
if ((ret =
- memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
+ mpf->put(mpf, lp, DB_MPOOL_DIRTY)) != 0)
goto out;
lp = NULL;
}
@@ -550,7 +278,7 @@ lrundo: if ((rootsplit && lp != NULL) || rp != NULL) {
log_compare(lsnp, &LSN(rp)) == 0) {
rp->lsn = argp->rlsn;
if ((ret =
- memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
+ mpf->put(mpf, rp, DB_MPOOL_DIRTY)) != 0)
goto out;
rp = NULL;
}
@@ -565,14 +293,14 @@ lrundo: if ((rootsplit && lp != NULL) || rp != NULL) {
* if there's nothing to undo.
*/
if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) {
- if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->npgno, 0, &np)) != 0) {
np = NULL;
goto done;
}
if (log_compare(lsnp, &LSN(np)) == 0) {
PREV_PGNO(np) = argp->left;
np->lsn = argp->nlsn;
- if (memp_fput(mpf, np, DB_MPOOL_DIRTY))
+ if (mpf->put(mpf, np, DB_MPOOL_DIRTY))
goto out;
np = NULL;
}
@@ -583,22 +311,22 @@ done: *lsnp = argp->prev_lsn;
ret = 0;
out: /* Free any pages that weren't dirtied. */
- if (pp != NULL && (t_ret = memp_fput(mpf, pp, 0)) != 0 && ret == 0)
+ if (pp != NULL && (t_ret = mpf->put(mpf, pp, 0)) != 0 && ret == 0)
ret = t_ret;
- if (lp != NULL && (t_ret = memp_fput(mpf, lp, 0)) != 0 && ret == 0)
+ if (lp != NULL && (t_ret = mpf->put(mpf, lp, 0)) != 0 && ret == 0)
ret = t_ret;
- if (np != NULL && (t_ret = memp_fput(mpf, np, 0)) != 0 && ret == 0)
+ if (np != NULL && (t_ret = mpf->put(mpf, np, 0)) != 0 && ret == 0)
ret = t_ret;
- if (rp != NULL && (t_ret = memp_fput(mpf, rp, 0)) != 0 && ret == 0)
+ if (rp != NULL && (t_ret = mpf->put(mpf, rp, 0)) != 0 && ret == 0)
ret = t_ret;
/* Free any allocated space. */
if (_lp != NULL)
- __os_free(_lp, file_dbp->pgsize);
+ __os_free(dbenv, _lp);
if (_rp != NULL)
- __os_free(_rp, file_dbp->pgsize);
+ __os_free(dbenv, _rp);
if (sp != NULL)
- __os_free(sp, argp->pg.size);
+ __os_free(dbenv, sp);
REC_CLOSE;
}
@@ -627,23 +355,24 @@ __bam_rsplit_recover(dbenv, dbtp, lsnp, op, info)
db_pgno_t pgno, root_pgno;
int cmp_n, cmp_p, modified, ret;
+ pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__bam_rsplit_print);
REC_INTRO(__bam_rsplit_read, 1);
/* Fix the root page. */
pgno = root_pgno = argp->root_pgno;
- if ((ret = memp_fget(mpf, &pgno, 0, &pagep)) != 0) {
+ if ((ret = mpf->get(mpf, &pgno, 0, &pagep)) != 0) {
/* The root page must always exist if we are going forward. */
if (DB_REDO(op)) {
- __db_pgerr(file_dbp, pgno);
+ __db_pgerr(file_dbp, pgno, ret);
goto out;
}
/* This must be the root of an OPD tree. */
DB_ASSERT(root_pgno !=
((BTREE *)file_dbp->bt_internal)->bt_root);
ret = 0;
- goto done;
+ goto do_page;
}
modified = 0;
cmp_n = log_compare(lsnp, &LSN(pagep));
@@ -666,22 +395,23 @@ __bam_rsplit_recover(dbenv, dbtp, lsnp, op, info)
pagep->lsn = argp->rootlsn;
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+ if ((ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
+do_page:
/*
* Fix the page copied over the root page. It's possible that the
* page never made it to disk, so if we're undo-ing and the page
* doesn't exist, it's okay and there's nothing further to do.
*/
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op))
goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
+ __db_pgerr(file_dbp, argp->pgno, ret);
goto out;
}
modified = 0;
- __ua_memcpy(&copy_lsn, &LSN(argp->pgdbt.data), sizeof(DB_LSN));
+ (void)__ua_memcpy(&copy_lsn, &LSN(argp->pgdbt.data), sizeof(DB_LSN));
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &copy_lsn);
CHECK_LSN(op, cmp_p, &LSN(pagep), &copy_lsn);
@@ -694,13 +424,16 @@ __bam_rsplit_recover(dbenv, dbtp, lsnp, op, info)
memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+ if ((ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
+ pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
-out: REC_CLOSE;
+out: if (pagep != NULL)
+ (void)mpf->put(mpf, pagep, 0);
+ REC_CLOSE;
}
/*
@@ -725,15 +458,16 @@ __bam_adj_recover(dbenv, dbtp, lsnp, op, info)
PAGE *pagep;
int cmp_n, cmp_p, modified, ret;
+ pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__bam_adj_print);
REC_INTRO(__bam_adj_read, 1);
/* Get the page; if it never existed and we're undoing, we're done. */
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op))
goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
+ __db_pgerr(file_dbp, argp->pgno, ret);
goto out;
}
@@ -745,7 +479,7 @@ __bam_adj_recover(dbenv, dbtp, lsnp, op, info)
/* Need to redo update described. */
if ((ret = __bam_adjindx(dbc,
pagep, argp->indx, argp->indx_copy, argp->is_insert)) != 0)
- goto err;
+ goto out;
LSN(pagep) = *lsnp;
modified = 1;
@@ -753,21 +487,21 @@ __bam_adj_recover(dbenv, dbtp, lsnp, op, info)
/* Need to undo update described. */
if ((ret = __bam_adjindx(dbc,
pagep, argp->indx, argp->indx_copy, !argp->is_insert)) != 0)
- goto err;
+ goto out;
LSN(pagep) = argp->lsn;
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+ if ((ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
+ pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
- if (0) {
-err: (void)memp_fput(mpf, pagep, 0);
- }
-out: REC_CLOSE;
+out: if (pagep != NULL)
+ (void)mpf->put(mpf, pagep, 0);
+ REC_CLOSE;
}
/*
@@ -793,15 +527,16 @@ __bam_cadjust_recover(dbenv, dbtp, lsnp, op, info)
PAGE *pagep;
int cmp_n, cmp_p, modified, ret;
+ pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__bam_cadjust_print);
REC_INTRO(__bam_cadjust_read, 1);
/* Get the page; if it never existed and we're undoing, we're done. */
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op))
goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
+ __db_pgerr(file_dbp, argp->pgno, ret);
goto out;
}
@@ -812,11 +547,13 @@ __bam_cadjust_recover(dbenv, dbtp, lsnp, op, info)
if (cmp_p == 0 && DB_REDO(op)) {
/* Need to redo update described. */
if (IS_BTREE_PAGE(pagep)) {
- GET_BINTERNAL(pagep, argp->indx)->nrecs += argp->adjust;
+ GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs +=
+ argp->adjust;
if (argp->opflags & CAD_UPDATEROOT)
RE_NREC_ADJ(pagep, argp->adjust);
} else {
- GET_RINTERNAL(pagep, argp->indx)->nrecs += argp->adjust;
+ GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs +=
+ argp->adjust;
if (argp->opflags & CAD_UPDATEROOT)
RE_NREC_ADJ(pagep, argp->adjust);
}
@@ -826,24 +563,29 @@ __bam_cadjust_recover(dbenv, dbtp, lsnp, op, info)
} else if (cmp_n == 0 && DB_UNDO(op)) {
/* Need to undo update described. */
if (IS_BTREE_PAGE(pagep)) {
- GET_BINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust;
+ GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs -=
+ argp->adjust;
if (argp->opflags & CAD_UPDATEROOT)
RE_NREC_ADJ(pagep, -(argp->adjust));
} else {
- GET_RINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust;
+ GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs -=
+ argp->adjust;
if (argp->opflags & CAD_UPDATEROOT)
RE_NREC_ADJ(pagep, -(argp->adjust));
}
LSN(pagep) = argp->lsn;
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+ if ((ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
+ pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
-out: REC_CLOSE;
+out: if (pagep != NULL)
+ (void)mpf->put(mpf, pagep, 0);
+ REC_CLOSE;
}
/*
@@ -869,15 +611,16 @@ __bam_cdel_recover(dbenv, dbtp, lsnp, op, info)
u_int32_t indx;
int cmp_n, cmp_p, modified, ret;
+ pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__bam_cdel_print);
REC_INTRO(__bam_cdel_read, 1);
/* Get the page; if it never existed and we're undoing, we're done. */
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op))
goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
+ __db_pgerr(file_dbp, argp->pgno, ret);
goto out;
}
@@ -888,27 +631,30 @@ __bam_cdel_recover(dbenv, dbtp, lsnp, op, info)
if (cmp_p == 0 && DB_REDO(op)) {
/* Need to redo update described. */
indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0);
- B_DSET(GET_BKEYDATA(pagep, indx)->type);
+ B_DSET(GET_BKEYDATA(file_dbp, pagep, indx)->type);
LSN(pagep) = *lsnp;
modified = 1;
} else if (cmp_n == 0 && DB_UNDO(op)) {
/* Need to undo update described. */
indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0);
- B_DCLR(GET_BKEYDATA(pagep, indx)->type);
+ B_DCLR(GET_BKEYDATA(file_dbp, pagep, indx)->type);
(void)__bam_ca_delete(file_dbp, argp->pgno, argp->indx, 0);
LSN(pagep) = argp->lsn;
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+ if ((ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
+ pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
-out: REC_CLOSE;
+out: if (pagep != NULL)
+ (void)mpf->put(mpf, pagep, 0);
+ REC_CLOSE;
}
/*
@@ -936,18 +682,19 @@ __bam_repl_recover(dbenv, dbtp, lsnp, op, info)
int cmp_n, cmp_p, modified, ret;
u_int8_t *p;
+ pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__bam_repl_print);
REC_INTRO(__bam_repl_read, 1);
/* Get the page; if it never existed and we're undoing, we're done. */
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op))
goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
+ __db_pgerr(file_dbp, argp->pgno, ret);
goto out;
}
- bk = GET_BKEYDATA(pagep, argp->indx);
+ bk = GET_BKEYDATA(file_dbp, pagep, argp->indx);
modified = 0;
cmp_n = log_compare(lsnp, &LSN(pagep));
@@ -961,8 +708,8 @@ __bam_repl_recover(dbenv, dbtp, lsnp, op, info)
*/
memset(&dbt, 0, sizeof(dbt));
dbt.size = argp->prefix + argp->suffix + argp->repl.size;
- if ((ret = __os_malloc(dbenv, dbt.size, NULL, &dbt.data)) != 0)
- goto err;
+ if ((ret = __os_malloc(dbenv, dbt.size, &dbt.data)) != 0)
+ goto out;
p = dbt.data;
memcpy(p, bk->data, argp->prefix);
p += argp->prefix;
@@ -971,9 +718,9 @@ __bam_repl_recover(dbenv, dbtp, lsnp, op, info)
memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix);
ret = __bam_ritem(dbc, pagep, argp->indx, &dbt);
- __os_free(dbt.data, dbt.size);
+ __os_free(dbenv, dbt.data);
if (ret != 0)
- goto err;
+ goto out;
LSN(pagep) = *lsnp;
modified = 1;
@@ -985,8 +732,8 @@ __bam_repl_recover(dbenv, dbtp, lsnp, op, info)
*/
memset(&dbt, 0, sizeof(dbt));
dbt.size = argp->prefix + argp->suffix + argp->orig.size;
- if ((ret = __os_malloc(dbenv, dbt.size, NULL, &dbt.data)) != 0)
- goto err;
+ if ((ret = __os_malloc(dbenv, dbt.size, &dbt.data)) != 0)
+ goto out;
p = dbt.data;
memcpy(p, bk->data, argp->prefix);
p += argp->prefix;
@@ -995,27 +742,27 @@ __bam_repl_recover(dbenv, dbtp, lsnp, op, info)
memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix);
ret = __bam_ritem(dbc, pagep, argp->indx, &dbt);
- __os_free(dbt.data, dbt.size);
+ __os_free(dbenv, dbt.data);
if (ret != 0)
- goto err;
+ goto out;
/* Reset the deleted flag, if necessary. */
if (argp->isdeleted)
- B_DSET(GET_BKEYDATA(pagep, argp->indx)->type);
+ B_DSET(GET_BKEYDATA(file_dbp, pagep, argp->indx)->type);
LSN(pagep) = argp->lsn;
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+ if ((ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
+ pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
- if (0) {
-err: (void)memp_fput(mpf, pagep, 0);
- }
-out: REC_CLOSE;
+out: if (pagep != NULL)
+ (void)mpf->put(mpf, pagep, 0);
+ REC_CLOSE;
}
/*
@@ -1040,14 +787,15 @@ __bam_root_recover(dbenv, dbtp, lsnp, op, info)
DB_MPOOLFILE *mpf;
int cmp_n, cmp_p, modified, ret;
+ meta = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__bam_root_print);
REC_INTRO(__bam_root_read, 0);
- if ((ret = memp_fget(mpf, &argp->meta_pgno, 0, &meta)) != 0) {
+ if ((ret = mpf->get(mpf, &argp->meta_pgno, 0, &meta)) != 0) {
/* The metadata page must always exist on redo. */
if (DB_REDO(op)) {
- (void)__db_pgerr(file_dbp, argp->meta_pgno);
+ __db_pgerr(file_dbp, argp->meta_pgno, ret);
goto out;
} else
goto done;
@@ -1068,13 +816,16 @@ __bam_root_recover(dbenv, dbtp, lsnp, op, info)
meta->dbmeta.lsn = argp->meta_lsn;
modified = 1;
}
- if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+ if ((ret = mpf->put(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
+ meta = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
-out: REC_CLOSE;
+out: if (meta != NULL)
+ (void)mpf->put(mpf, meta, 0);
+ REC_CLOSE;
}
/*
@@ -1116,7 +867,7 @@ __bam_curadj_recover(dbenv, dbtp, lsnp, op, info)
break;
case DB_CA_DUP:
if ((ret = __bam_ca_undodup(file_dbp, argp->first_indx,
- argp->from_pgno, argp->from_indx, argp->to_indx)) != 0)
+ argp->from_pgno, argp->from_indx, argp->to_indx)) != 0)
goto out;
break;
@@ -1181,7 +932,8 @@ __bam_rcuradj_recover(dbenv, dbtp, lsnp, op, info)
* this function know anything about how offpage dups work.
*/
if ((ret =
- __db_icursor(file_dbp, NULL, DB_RECNO, argp->root, 0, &rdbc)) != 0)
+ __db_icursor(file_dbp,
+ NULL, DB_RECNO, argp->root, 0, DB_LOCK_INVALIDID, &rdbc)) != 0)
goto out;
cp = (BTREE_CURSOR *)rdbc->internal;
diff --git a/bdb/btree/bt_reclaim.c b/bdb/btree/bt_reclaim.c
index 538d837c2d2..ae4554ea7d6 100644
--- a/bdb/btree/bt_reclaim.c
+++ b/bdb/btree/bt_reclaim.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1998, 1999, 2000
+ * Copyright (c) 1998-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_reclaim.c,v 11.5 2000/03/22 04:21:01 ubell Exp $";
+static const char revid[] = "$Id: bt_reclaim.c,v 11.11 2002/03/29 20:46:26 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -18,10 +18,8 @@ static const char revid[] = "$Id: bt_reclaim.c,v 11.5 2000/03/22 04:21:01 ubell
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "lock.h"
-#include "btree.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
/*
* __bam_reclaim --
@@ -51,3 +49,38 @@ __bam_reclaim(dbp, txn)
return (ret);
}
+
+/*
+ * __bam_truncate --
+ * Truncate a database.
+ *
+ * PUBLIC: int __bam_truncate __P((DB *, DB_TXN *, u_int32_t *));
+ */
+int
+__bam_truncate(dbp, txn, countp)
+ DB *dbp;
+ DB_TXN *txn;
+ u_int32_t *countp;
+{
+ DBC *dbc;
+ db_trunc_param trunc;
+ int ret, t_ret;
+
+ /* Acquire a cursor. */
+ if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0)
+ return (ret);
+
+ trunc.count = 0;
+ trunc.dbc = dbc;
+ /* Walk the tree, freeing pages. */
+ ret = __bam_traverse(dbc,
+ DB_LOCK_WRITE, dbc->internal->root, __db_truncate_callback, &trunc);
+
+ /* Discard the cursor. */
+ if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+
+ *countp = trunc.count;
+
+ return (ret);
+}
diff --git a/bdb/btree/bt_recno.c b/bdb/btree/bt_recno.c
index 6ac0cac350d..fab684f3a5f 100644
--- a/bdb/btree/bt_recno.c
+++ b/bdb/btree/bt_recno.c
@@ -1,36 +1,31 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1997, 1998, 1999, 2000
+ * Copyright (c) 1997-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_recno.c,v 11.65 2001/01/18 14:33:22 bostic Exp $";
+static const char revid[] = "$Id: bt_recno.c,v 11.106 2002/08/16 04:56:30 ubell Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <limits.h>
+#include <stdio.h>
#include <string.h>
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-#include "db_ext.h"
-#include "db_shash.h"
-#include "lock.h"
-#include "lock_ext.h"
-#include "qam.h"
-#include "txn.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/lock.h"
static int __ram_add __P((DBC *, db_recno_t *, DBT *, u_int32_t, u_int32_t));
-static int __ram_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
-static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
static int __ram_source __P((DB *));
static int __ram_sread __P((DBC *, db_recno_t));
static int __ram_update __P((DBC *, db_recno_t, int));
@@ -90,17 +85,32 @@ static int __ram_update __P((DBC *, db_recno_t, int));
* Do we need to log the current cursor adjustment?
*/
#define CURADJ_LOG(dbc) \
- (DB_LOGGING((dbc)) && (dbc)->txn != NULL && (dbc)->txn->parent != NULL)
+ (DBC_LOGGING((dbc)) && (dbc)->txn != NULL && (dbc)->txn->parent != NULL)
+
+/*
+ * After a search, copy the found page into the cursor, discarding any
+ * currently held lock.
+ */
+#define STACK_TO_CURSOR(cp) { \
+ (cp)->page = (cp)->csp->page; \
+ (cp)->pgno = (cp)->csp->page->pgno; \
+ (cp)->indx = (cp)->csp->indx; \
+ (void)__TLPUT(dbc, (cp)->lock); \
+ (cp)->lock = (cp)->csp->lock; \
+ (cp)->lock_mode = (cp)->csp->lock_mode; \
+}
/*
* __ram_open --
* Recno open function.
*
- * PUBLIC: int __ram_open __P((DB *, const char *, db_pgno_t, u_int32_t));
+ * PUBLIC: int __ram_open __P((DB *,
+ * PUBLIC: DB_TXN *, const char *, db_pgno_t, u_int32_t));
*/
int
-__ram_open(dbp, name, base_pgno, flags)
+__ram_open(dbp, txn, name, base_pgno, flags)
DB *dbp;
+ DB_TXN *txn;
const char *name;
db_pgno_t base_pgno;
u_int32_t flags;
@@ -109,15 +119,14 @@ __ram_open(dbp, name, base_pgno, flags)
DBC *dbc;
int ret, t_ret;
+ COMPQUIET(name, NULL);
t = dbp->bt_internal;
/* Initialize the remaining fields/methods of the DB. */
- dbp->del = __ram_delete;
- dbp->put = __ram_put;
dbp->stat = __bam_stat;
/* Start up the tree. */
- if ((ret = __bam_read_root(dbp, name, base_pgno, flags)) != 0)
+ if ((ret = __bam_read_root(dbp, txn, base_pgno, flags)) != 0)
return (ret);
/*
@@ -132,7 +141,7 @@ __ram_open(dbp, name, base_pgno, flags)
return (ret);
/* If we're snapshotting an underlying source file, do it now. */
- if (F_ISSET(dbp, DB_RE_SNAPSHOT)) {
+ if (F_ISSET(dbp, DB_AM_SNAPSHOT)) {
/* Allocate a cursor. */
if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
return (ret);
@@ -147,104 +156,38 @@ __ram_open(dbp, name, base_pgno, flags)
ret = t_ret;
}
- return (0);
-}
-
-/*
- * __ram_delete --
- * Recno db->del function.
- */
-static int
-__ram_delete(dbp, txn, key, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *key;
- u_int32_t flags;
-{
- BTREE_CURSOR *cp;
- DBC *dbc;
- db_recno_t recno;
- int ret, t_ret;
-
- PANIC_CHECK(dbp->dbenv);
-
- /* Check for invalid flags. */
- if ((ret = __db_delchk(dbp,
- key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
- return (ret);
-
- /* Acquire a cursor. */
- if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
- return (ret);
-
- DEBUG_LWRITE(dbc, txn, "ram_delete", key, NULL, flags);
-
- /* Check the user's record number and fill in as necessary. */
- if ((ret = __ram_getno(dbc, key, &recno, 0)) != 0)
- goto err;
-
- /* Do the delete. */
- cp = (BTREE_CURSOR *)dbc->internal;
- cp->recno = recno;
-
- ret = __ram_c_del(dbc);
-
- /* Release the cursor. */
-err: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
return (ret);
}
/*
- * __ram_put --
- * Recno db->put function.
+ * __ram_append --
+ * Recno append function.
+ *
+ * PUBLIC: int __ram_append __P((DBC *, DBT *, DBT *));
*/
-static int
-__ram_put(dbp, txn, key, data, flags)
- DB *dbp;
- DB_TXN *txn;
+int
+__ram_append(dbc, key, data)
+ DBC *dbc;
DBT *key, *data;
- u_int32_t flags;
{
- DBC *dbc;
- db_recno_t recno;
- int ret, t_ret;
-
- PANIC_CHECK(dbp->dbenv);
-
- /* Check for invalid flags. */
- if ((ret = __db_putchk(dbp,
- key, data, flags, F_ISSET(dbp, DB_AM_RDONLY), 0)) != 0)
- return (ret);
-
- /* Allocate a cursor. */
- if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
- return (ret);
+ BTREE_CURSOR *cp;
+ int ret;
- DEBUG_LWRITE(dbc, txn, "ram_put", key, data, flags);
+ cp = (BTREE_CURSOR *)dbc->internal;
/*
- * If we're appending to the tree, make sure we've read in all of
- * the backing source file. Otherwise, check the user's record
- * number and fill in as necessary. If we found the record or it
- * simply didn't exist, add the user's record.
+ * Make sure we've read in all of the backing source file. If
+ * we found the record or it simply didn't exist, add the
+ * user's record.
*/
- if (flags == DB_APPEND)
- ret = __ram_update(dbc, DB_MAX_RECORDS, 0);
- else
- ret = __ram_getno(dbc, key, &recno, 1);
+ ret = __ram_update(dbc, DB_MAX_RECORDS, 0);
if (ret == 0 || ret == DB_NOTFOUND)
- ret = __ram_add(dbc, &recno, data, flags, 0);
-
- /* Discard the cursor. */
- if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
+ ret = __ram_add(dbc, &cp->recno, data, DB_APPEND, 0);
- /* Return the record number if we're appending to the tree. */
- if (ret == 0 && flags == DB_APPEND)
- ret = __db_retcopy(dbp, key, &recno, sizeof(recno),
- &dbc->rkey.data, &dbc->rkey.ulen);
+ /* Return the record number. */
+ if (ret == 0)
+ ret = __db_retcopy(dbc->dbp->dbenv, key, &cp->recno,
+ sizeof(cp->recno), &dbc->rkey->data, &dbc->rkey->ulen);
return (ret);
}
@@ -295,9 +238,9 @@ __ram_c_del(dbc)
goto err;
}
stack = 1;
- cp->page = cp->csp->page;
- cp->pgno = cp->csp->page->pgno;
- cp->indx = cp->csp->indx;
+
+ /* Copy the page into the cursor. */
+ STACK_TO_CURSOR(cp);
/*
* If re-numbering records, the on-page deleted flag can only mean
@@ -310,7 +253,7 @@ __ram_c_del(dbc)
* delete records they never created, the latter is an error because
* if the record was "deleted", we could never have found it.
*/
- if (B_DISSET(GET_BKEYDATA(cp->page, cp->indx)->type)) {
+ if (B_DISSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type)) {
ret = DB_KEYEMPTY;
goto err;
}
@@ -321,9 +264,8 @@ __ram_c_del(dbc)
goto err;
__bam_adjust(dbc, -1);
if (__ram_ca(dbc, CA_DELETE) > 0 &&
- CURADJ_LOG(dbc) && (ret = __bam_rcuradj_log(dbp->dbenv,
- dbc->txn, &lsn, 0, dbp->log_fileid, CA_DELETE,
- cp->root, cp->recno, cp->order)) != 0)
+ CURADJ_LOG(dbc) && (ret = __bam_rcuradj_log(dbp, dbc->txn,
+ &lsn, 0, CA_DELETE, cp->root, cp->recno, cp->order)) != 0)
goto err;
/*
@@ -346,15 +288,15 @@ __ram_c_del(dbc)
* going to be emptied by removing the single reference
* to the emptied page (or one of its parents).
*/
- for (epg = cp->sp; epg <= cp->csp; ++epg)
- if (NUM_ENT(epg->page) <= 1)
+ for (epg = cp->csp; epg >= cp->sp; --epg)
+ if (NUM_ENT(epg->page) > 1)
break;
/*
* We want to delete a single item out of the last page
- * that we're not deleting, back up to that page.
+ * that we're not deleting.
*/
- ret = __bam_dpages(dbc, --epg);
+ ret = __bam_dpages(dbc, epg);
/*
* Regardless of the return from __bam_dpages, it will
@@ -412,6 +354,7 @@ __ram_c_get(dbc, key, data, flags, pgnop)
dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
+ LF_CLR(DB_MULTIPLE|DB_MULTIPLE_KEY);
retry: switch (flags) {
case DB_CURRENT:
/*
@@ -504,6 +447,7 @@ retry: switch (flags) {
goto err;
/* NOTREACHED */
case DB_GET_BOTH:
+ case DB_GET_BOTH_RANGE:
/*
* If we're searching a set of off-page dups, we start
* a new linear search from the first record. Otherwise,
@@ -531,6 +475,8 @@ retry: switch (flags) {
* read from the backing source file. Do it now for DB_CURRENT (if
* the current record was deleted we may need more records from the
* backing file for a DB_CURRENT operation), DB_FIRST and DB_NEXT.
+ * (We don't have to test for flags == DB_FIRST, because the switch
+ * statement above re-set flags to DB_NEXT in that case.)
*/
if ((flags == DB_NEXT || flags == DB_CURRENT) && ((ret =
__ram_update(dbc, cp->recno, 0)) != 0) && ret != DB_NOTFOUND)
@@ -547,16 +493,8 @@ retry: switch (flags) {
goto err;
}
- /*
- * Copy the page into the cursor, discarding any lock we
- * are currently holding.
- */
- cp->page = cp->csp->page;
- cp->pgno = cp->csp->page->pgno;
- cp->indx = cp->csp->indx;
- (void)__TLPUT(dbc, cp->lock);
- cp->lock = cp->csp->lock;
- cp->lock_mode = cp->csp->lock_mode;
+ /* Copy the page into the cursor. */
+ STACK_TO_CURSOR(cp);
/*
* If re-numbering records, the on-page deleted flag means this
@@ -567,21 +505,34 @@ retry: switch (flags) {
* walking through off-page duplicates, and fail if they were
* requested explicitly by the application.
*/
- if (B_DISSET(GET_BKEYDATA(cp->page, cp->indx)->type))
+ if (B_DISSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type))
switch (flags) {
case DB_NEXT:
case DB_PREV:
(void)__bam_stkrel(dbc, STK_CLRDBC);
goto retry;
case DB_GET_BOTH:
- (void)__bam_stkrel(dbc, STK_CLRDBC);
- continue;
+ case DB_GET_BOTH_RANGE:
+ /*
+ * If we're an OPD tree, we don't care about
+ * matching a record number on a DB_GET_BOTH
+ * -- everything belongs to the same tree. A
+ * normal recno should give up and return
+ * DB_NOTFOUND if the matching recno is deleted.
+ */
+ if (F_ISSET(dbc, DBC_OPD)) {
+ (void)__bam_stkrel(dbc, STK_CLRDBC);
+ continue;
+ }
+ ret = DB_NOTFOUND;
+ goto err;
default:
ret = DB_KEYEMPTY;
goto err;
}
- if (flags == DB_GET_BOTH || flags == DB_GET_BOTHC) {
+ if (flags == DB_GET_BOTH ||
+ flags == DB_GET_BOTHC || flags == DB_GET_BOTH_RANGE) {
if ((ret = __bam_cmp(dbp, data,
cp->page, cp->indx, __bam_defcmp, &cmp)) != 0)
return (ret);
@@ -598,10 +549,11 @@ retry: switch (flags) {
/* Return the key if the user didn't give us one. */
if (!F_ISSET(dbc, DBC_OPD)) {
- if (flags != DB_SET && flags != DB_SET_RANGE)
- ret = __db_retcopy(dbp,
- key, &cp->recno, sizeof(cp->recno),
- &dbc->rkey.data, &dbc->rkey.ulen);
+ if (flags != DB_GET_BOTH && flags != DB_GET_BOTH_RANGE &&
+ flags != DB_SET && flags != DB_SET_RANGE)
+ ret = __db_retcopy(dbp->dbenv,
+ key, &cp->recno, sizeof(cp->recno),
+ &dbc->rkey->data, &dbc->rkey->ulen);
F_SET(key, DB_DBT_ISSET);
}
@@ -637,23 +589,43 @@ __ram_c_put(dbc, key, data, flags, pgnop)
cp = (BTREE_CURSOR *)dbc->internal;
/*
- * DB_KEYFIRST and DB_KEYLAST will only be set if we're dealing with
- * an off-page duplicate tree, they can't be specified at user level.
- * Translate them into something else.
+ * DB_KEYFIRST and DB_KEYLAST mean different things if they're
+ * used in an off-page duplicate tree. If we're an off-page
+ * duplicate tree, they really mean "put at the beginning of the
+ * tree" and "put at the end of the tree" respectively, so translate
+ * them to something else.
*/
- switch (flags) {
- case DB_KEYFIRST:
- cp->recno = 1;
- flags = DB_BEFORE;
- break;
- case DB_KEYLAST:
- if ((ret = __ram_add(dbc, &cp->recno, data, DB_APPEND, 0)) != 0)
- return (ret);
- if (CURADJ_LOG(dbc) && (ret = __bam_rcuradj_log(dbp->dbenv,
- dbc->txn, &lsn, 0, dbp->log_fileid, CA_ICURRENT,
- cp->root, cp->recno, cp->order)))
- return (ret);
- return (0);
+ if (F_ISSET(dbc, DBC_OPD))
+ switch (flags) {
+ case DB_KEYFIRST:
+ cp->recno = 1;
+ flags = DB_BEFORE;
+ break;
+ case DB_KEYLAST:
+ if ((ret = __ram_add(dbc,
+ &cp->recno, data, DB_APPEND, 0)) != 0)
+ return (ret);
+ if (CURADJ_LOG(dbc) &&
+ (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0,
+ CA_ICURRENT, cp->root, cp->recno, cp->order)))
+ return (ret);
+ return (0);
+ }
+
+ /*
+ * Handle normal DB_KEYFIRST/DB_KEYLAST; for a recno, which has
+ * no duplicates, these are identical and mean "put the given
+ * datum at the given recno".
+ *
+ * Note that the code here used to be in __ram_put; now, we
+ * go through the access-method-common __db_put function, which
+ * handles DB_NOOVERWRITE, so we and __ram_add don't have to.
+ */
+ if (flags == DB_KEYFIRST || flags == DB_KEYLAST) {
+ ret = __ram_getno(dbc, key, &cp->recno, 1);
+ if (ret == 0 || ret == DB_NOTFOUND)
+ ret = __ram_add(dbc, &cp->recno, data, 0, 0);
+ return (ret);
}
/*
@@ -677,9 +649,8 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0)
*/
DB_ASSERT(exact || CD_ISSET(cp));
- cp->page = cp->csp->page;
- cp->pgno = cp->csp->page->pgno;
- cp->indx = cp->csp->indx;
+ /* Copy the page into the cursor. */
+ STACK_TO_CURSOR(cp);
ret = __bam_iitem(dbc, key, data, iiflags, 0);
t_ret = __bam_stkrel(dbc, STK_CLRDBC);
@@ -688,7 +659,7 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0)
ret = t_ret;
else if (ret == DB_NEEDSPLIT) {
arg = &cp->recno;
- if ((ret = __bam_split(dbc, arg)) != 0)
+ if ((ret = __bam_split(dbc, arg, NULL)) != 0)
goto err;
goto split;
}
@@ -709,8 +680,7 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0)
/* Only log if __ram_ca found any relevant cursors. */
if (nc > 0 && CURADJ_LOG(dbc) &&
- (ret = __bam_rcuradj_log(dbp->dbenv,
- dbc->txn, &lsn, 0, dbp->log_fileid, CA_IAFTER,
+ (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0, CA_IAFTER,
cp->root, cp->recno, cp->order)) != 0)
goto err;
break;
@@ -720,8 +690,7 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0)
/* Only log if __ram_ca found any relevant cursors. */
if (nc > 0 && CURADJ_LOG(dbc) &&
- (ret = __bam_rcuradj_log(dbp->dbenv,
- dbc->txn, &lsn, 0, dbp->log_fileid, CA_IBEFORE,
+ (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0, CA_IBEFORE,
cp->root, cp->recno, cp->order)) != 0)
goto err;
break;
@@ -734,8 +703,8 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0)
* Only log if __ram_ca found any relevant cursors.
*/
if (CD_ISSET(cp) && __ram_ca(dbc, CA_ICURRENT) > 0 &&
- CURADJ_LOG(dbc) && (ret = __bam_rcuradj_log(
- dbp->dbenv, dbc->txn, &lsn, 0, dbp->log_fileid,
+ CURADJ_LOG(dbc) &&
+ (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0,
CA_ICURRENT, cp->root, cp->recno, cp->order)) != 0)
goto err;
break;
@@ -743,8 +712,8 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0)
/* Return the key if we've created a new record. */
if (!F_ISSET(dbc, DBC_OPD) && (flags == DB_AFTER || flags == DB_BEFORE))
- ret = __db_retcopy(dbp, key, &cp->recno,
- sizeof(cp->recno), &dbc->rkey.data, &dbc->rkey.ulen);
+ ret = __db_retcopy(dbp->dbenv, key, &cp->recno,
+ sizeof(cp->recno), &dbc->rkey->data, &dbc->rkey->ulen);
/* The cursor was reset, no further delete adjustment is necessary. */
err: CD_CLR(cp);
@@ -940,13 +909,12 @@ __ram_update(dbc, recno, can_create)
int can_create;
{
BTREE *t;
- BTREE_CURSOR *cp;
DB *dbp;
+ DBT *rdata;
db_recno_t nrecs;
int ret;
dbp = dbc->dbp;
- cp = (BTREE_CURSOR *)dbc->internal;
t = dbp->bt_internal;
/*
@@ -976,27 +944,13 @@ __ram_update(dbc, recno, can_create)
if (!can_create || recno <= nrecs + 1)
return (0);
- dbc->rdata.dlen = 0;
- dbc->rdata.doff = 0;
- dbc->rdata.flags = 0;
- if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
- if (dbc->rdata.ulen < t->re_len) {
- if ((ret = __os_realloc(dbp->dbenv,
- t->re_len, NULL, &dbc->rdata.data)) != 0) {
- dbc->rdata.ulen = 0;
- dbc->rdata.data = NULL;
- return (ret);
- }
- dbc->rdata.ulen = t->re_len;
- }
- dbc->rdata.size = t->re_len;
- memset(dbc->rdata.data, t->re_pad, t->re_len);
- } else
- dbc->rdata.size = 0;
+ rdata = &dbc->my_rdata;
+ rdata->flags = 0;
+ rdata->size = 0;
while (recno > ++nrecs)
if ((ret = __ram_add(dbc,
- &nrecs, &dbc->rdata, 0, BI_DELETED)) != 0)
+ &nrecs, rdata, 0, BI_DELETED)) != 0)
return (ret);
return (0);
}
@@ -1017,9 +971,9 @@ __ram_source(dbp)
/* Find the real name, and swap out the one we had before. */
if ((ret = __db_appname(dbp->dbenv,
- DB_APP_DATA, NULL, t->re_source, 0, NULL, &source)) != 0)
+ DB_APP_DATA, t->re_source, 0, NULL, &source)) != 0)
return (ret);
- __os_freestr(t->re_source);
+ __os_free(dbp->dbenv, t->re_source);
t->re_source = source;
/*
@@ -1060,6 +1014,7 @@ __ram_writeback(dbp)
t = dbp->bt_internal;
dbenv = dbp->dbenv;
fp = NULL;
+ pad = NULL;
/* If the file wasn't modified, we're done. */
if (!t->re_modified)
@@ -1119,40 +1074,45 @@ __ram_writeback(dbp)
/*
* We step through the records, writing each one out. Use the record
* number and the dbp->get() function, instead of a cursor, so we find
- * and write out "deleted" or non-existent records.
+ * and write out "deleted" or non-existent records. The DB handle may
+ * be threaded, so allocate memory as we go.
*/
memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
key.size = sizeof(db_recno_t);
key.data = &keyno;
+ memset(&data, 0, sizeof(data));
+ F_SET(&data, DB_DBT_REALLOC);
/*
* We'll need the delimiter if we're doing variable-length records,
* and the pad character if we're doing fixed-length records.
*/
delim = t->re_delim;
- if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
- if ((ret = __os_malloc(dbenv, t->re_len, NULL, &pad)) != 0)
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN)) {
+ if ((ret = __os_malloc(dbenv, t->re_len, &pad)) != 0)
goto err;
memset(pad, t->re_pad, t->re_len);
- } else
- COMPQUIET(pad, NULL);
+ }
for (keyno = 1;; ++keyno) {
switch (ret = dbp->get(dbp, NULL, &key, &data, 0)) {
case 0:
- if (fwrite(data.data, 1, data.size, fp) != data.size)
+ if (data.size != 0 && (u_int32_t)fwrite(
+ data.data, 1, data.size, fp) != data.size)
goto write_err;
break;
case DB_KEYEMPTY:
- if (F_ISSET(dbp, DB_RE_FIXEDLEN) &&
- fwrite(pad, 1, t->re_len, fp) != t->re_len)
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN) &&
+ (u_int32_t)fwrite(pad, 1, t->re_len, fp) !=
+ t->re_len)
goto write_err;
break;
case DB_NOTFOUND:
ret = 0;
goto done;
+ default:
+ goto err;
}
- if (!F_ISSET(dbp, DB_RE_FIXEDLEN) &&
+ if (!F_ISSET(dbp, DB_AM_FIXEDLEN) &&
fwrite(&delim, 1, 1, fp) != 1) {
write_err: ret = errno;
__db_err(dbp->dbenv,
@@ -1174,6 +1134,12 @@ done: /* Close the file descriptor. */
if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
ret = t_ret;
+ /* Discard memory allocated to hold the data items. */
+ if (data.data != NULL)
+ __os_ufree(dbenv, data.data);
+ if (pad != NULL)
+ __os_free(dbenv, pad);
+
if (ret == 0)
t->re_modified = 0;
@@ -1191,7 +1157,7 @@ __ram_sread(dbc, top)
{
BTREE *t;
DB *dbp;
- DBT data;
+ DBT data, *rdata;
db_recno_t recno;
size_t len;
int ch, ret, was_modified;
@@ -1203,45 +1169,56 @@ __ram_sread(dbc, top)
if ((ret = __bam_nrecs(dbc, &recno)) != 0)
return (ret);
- /* Use the record data return memory, it's only a short-term use. */
- len = F_ISSET(dbp, DB_RE_FIXEDLEN) ? t->re_len : 256;
- if (dbc->rdata.ulen < len) {
+ /*
+ * Use the record key return memory, it's only a short-term use.
+ * The record data return memory is used by __bam_iitem, which
+ * we'll indirectly call, so use the key so as not to collide.
+ */
+ len = F_ISSET(dbp, DB_AM_FIXEDLEN) ? t->re_len : 256;
+ rdata = &dbc->my_rkey;
+ if (rdata->ulen < len) {
if ((ret = __os_realloc(
- dbp->dbenv, len, NULL, &dbc->rdata.data)) != 0) {
- dbc->rdata.ulen = 0;
- dbc->rdata.data = NULL;
+ dbp->dbenv, len, &rdata->data)) != 0) {
+ rdata->ulen = 0;
+ rdata->data = NULL;
return (ret);
}
- dbc->rdata.ulen = len;
+ rdata->ulen = (u_int32_t)len;
}
memset(&data, 0, sizeof(data));
while (recno < top) {
- data.data = dbc->rdata.data;
+ data.data = rdata->data;
data.size = 0;
- if (F_ISSET(dbp, DB_RE_FIXEDLEN))
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN))
for (len = t->re_len; len > 0; --len) {
- if ((ch = getc(t->re_fp)) == EOF)
- goto eof;
+ if ((ch = getc(t->re_fp)) == EOF) {
+ if (data.size == 0)
+ goto eof;
+ break;
+ }
((u_int8_t *)data.data)[data.size++] = ch;
}
else
for (;;) {
- if ((ch = getc(t->re_fp)) == EOF)
- goto eof;
+ if ((ch = getc(t->re_fp)) == EOF) {
+ if (data.size == 0)
+ goto eof;
+ break;
+ }
if (ch == t->re_delim)
break;
((u_int8_t *)data.data)[data.size++] = ch;
- if (data.size == dbc->rdata.ulen) {
+ if (data.size == rdata->ulen) {
if ((ret = __os_realloc(dbp->dbenv,
- dbc->rdata.ulen *= 2,
- NULL, &dbc->rdata.data)) != 0) {
- dbc->rdata.ulen = 0;
- dbc->rdata.data = NULL;
+ rdata->ulen *= 2,
+ &rdata->data)) != 0) {
+ rdata->ulen = 0;
+ rdata->data = NULL;
return (ret);
} else
- data.data = dbc->rdata.data;
+ data.data = rdata->data;
}
}
@@ -1281,7 +1258,6 @@ __ram_add(dbc, recnop, data, flags, bi_flags)
DBT *data;
u_int32_t flags, bi_flags;
{
- BKEYDATA *bk;
BTREE_CURSOR *cp;
int exact, ret, stack;
@@ -1292,9 +1268,9 @@ retry: /* Find the slot for insertion. */
S_INSERT | (flags == DB_APPEND ? S_APPEND : 0), 1, &exact)) != 0)
return (ret);
stack = 1;
- cp->page = cp->csp->page;
- cp->pgno = cp->csp->page->pgno;
- cp->indx = cp->csp->indx;
+
+ /* Copy the page into the cursor. */
+ STACK_TO_CURSOR(cp);
/*
* The application may modify the data based on the selected record
@@ -1305,24 +1281,6 @@ retry: /* Find the slot for insertion. */
goto err;
/*
- * If re-numbering records, the on-page deleted flag means this record
- * was implicitly created. If not re-numbering records, the on-page
- * deleted flag means this record was implicitly created, or, it was
- * deleted at some time.
- *
- * If DB_NOOVERWRITE is set and the item already exists in the tree,
- * return an error unless the item was either marked for deletion or
- * only implicitly created.
- */
- if (exact) {
- bk = GET_BKEYDATA(cp->page, cp->indx);
- if (!B_DISSET(bk->type) && flags == DB_NOOVERWRITE) {
- ret = DB_KEYEXIST;
- goto err;
- }
- }
-
- /*
* Select the arguments for __bam_iitem() and do the insert. If the
* key is an exact match, or we're replacing the data item with a
* new data item, replace the current item. If the key isn't an exact
@@ -1353,7 +1311,7 @@ retry: /* Find the slot for insertion. */
(void)__bam_stkrel(dbc, STK_CLRDBC);
stack = 0;
- if ((ret = __bam_split(dbc, recnop)) != 0)
+ if ((ret = __bam_split(dbc, recnop, NULL)) != 0)
goto err;
goto retry;
diff --git a/bdb/btree/bt_rsearch.c b/bdb/btree/bt_rsearch.c
index 7102cd715aa..a75181b44e2 100644
--- a/bdb/btree/bt_rsearch.c
+++ b/bdb/btree/bt_rsearch.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
/*
@@ -40,7 +40,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_rsearch.c,v 11.21 2000/03/28 21:50:04 ubell Exp $";
+static const char revid[] = "$Id: bt_rsearch.c,v 11.34 2002/07/03 19:03:50 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -48,10 +48,10 @@ static const char revid[] = "$Id: bt_rsearch.c,v 11.21 2000/03/28 21:50:04 ubell
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-#include "db_shash.h"
-#include "lock.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/lock.h"
/*
* __bam_rsearch --
@@ -70,6 +70,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
BTREE_CURSOR *cp;
DB *dbp;
DB_LOCK lock;
+ DB_MPOOLFILE *mpf;
PAGE *h;
RINTERNAL *ri;
db_indx_t adjust, deloffset, indx, top;
@@ -79,6 +80,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
int ret, stack;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
BT_STK_CLR(cp);
@@ -99,11 +101,11 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
* Retrieve the root page.
*/
pg = cp->root;
- stack = LF_ISSET(S_STACK);
+ stack = LF_ISSET(S_STACK) ? 1 : 0;
lock_mode = stack ? DB_LOCK_WRITE : DB_LOCK_READ;
if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
return (ret);
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
+ if ((ret = mpf->get(mpf, &pg, 0, &h)) != 0) {
/* Did not read it, so we can release the lock */
(void)__LPUT(dbc, lock);
return (ret);
@@ -120,12 +122,12 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
if (!stack &&
((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
(LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
(void)__LPUT(dbc, lock);
lock_mode = DB_LOCK_WRITE;
if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
return (ret);
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
+ if ((ret = mpf->get(mpf, &pg, 0, &h)) != 0) {
/* Did not read it, so we can release the lock */
(void)__LPUT(dbc, lock);
return (ret);
@@ -164,7 +166,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
* eliminate any concurrency. A possible fix
* would be to lock the last leaf page instead.
*/
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
(void)__TLPUT(dbc, lock);
return (DB_NOTFOUND);
}
@@ -202,8 +204,8 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
goto err;
}
}
- if (!B_DISSET(
- GET_BKEYDATA(h, indx + deloffset)->type) &&
+ if (!B_DISSET(GET_BKEYDATA(dbp, h,
+ indx + deloffset)->type) &&
++t_recno == recno)
break;
}
@@ -216,7 +218,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
return (0);
case P_IBTREE:
for (indx = 0, top = NUM_ENT(h);;) {
- bi = GET_BINTERNAL(h, indx);
+ bi = GET_BINTERNAL(dbp, h, indx);
if (++indx == top || total + bi->nrecs >= recno)
break;
total += bi->nrecs;
@@ -235,7 +237,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
return (0);
case P_IRECNO:
for (indx = 0, top = NUM_ENT(h);;) {
- ri = GET_RINTERNAL(h, indx);
+ ri = GET_RINTERNAL(dbp, h, indx);
if (++indx == top || total + ri->nrecs >= recno)
break;
total += ri->nrecs;
@@ -243,7 +245,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
pg = ri->pgno;
break;
default:
- return (__db_pgfmt(dbp, h->pgno));
+ return (__db_pgfmt(dbp->dbenv, h->pgno));
}
--indx;
@@ -276,12 +278,12 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
(h->level - 1) == LEAFLEVEL)
stack = 1;
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
lock_mode = stack &&
LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ;
if ((ret = __db_lget(dbc,
- LCK_COUPLE, pg, lock_mode, 0, &lock)) != 0) {
+ LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) {
/*
* If we fail, discard the lock we held. This
* is OK because this only happens when we are
@@ -292,7 +294,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
}
}
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0)
+ if ((ret = mpf->get(mpf, &pg, 0, &h)) != 0)
goto err;
}
/* NOTREACHED */
@@ -315,12 +317,14 @@ __bam_adjust(dbc, adjust)
{
BTREE_CURSOR *cp;
DB *dbp;
+ DB_MPOOLFILE *mpf;
EPG *epg;
PAGE *h;
db_pgno_t root_pgno;
int ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
root_pgno = cp->root;
@@ -328,22 +332,27 @@ __bam_adjust(dbc, adjust)
for (epg = cp->sp; epg <= cp->csp; ++epg) {
h = epg->page;
if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) {
- if (DB_LOGGING(dbc) &&
- (ret = __bam_cadjust_log(dbp->dbenv,
- dbc->txn, &LSN(h), 0, dbp->log_fileid,
- PGNO(h), &LSN(h), (u_int32_t)epg->indx, adjust,
- PGNO(h) == root_pgno ? CAD_UPDATEROOT : 0)) != 0)
- return (ret);
+ if (DBC_LOGGING(dbc)) {
+ if ((ret = __bam_cadjust_log(dbp, dbc->txn,
+ &LSN(h), 0, PGNO(h), &LSN(h),
+ (u_int32_t)epg->indx, adjust,
+ PGNO(h) == root_pgno ?
+ CAD_UPDATEROOT : 0)) != 0)
+ return (ret);
+ } else
+ LSN_NOT_LOGGED(LSN(h));
if (TYPE(h) == P_IBTREE)
- GET_BINTERNAL(h, epg->indx)->nrecs += adjust;
+ GET_BINTERNAL(dbp, h, epg->indx)->nrecs +=
+ adjust;
else
- GET_RINTERNAL(h, epg->indx)->nrecs += adjust;
+ GET_RINTERNAL(dbp, h, epg->indx)->nrecs +=
+ adjust;
if (PGNO(h) == root_pgno)
RE_NREC_ADJ(h, adjust);
- if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = mpf->set(mpf, h, DB_MPOOL_DIRTY)) != 0)
return (ret);
}
}
@@ -363,21 +372,23 @@ __bam_nrecs(dbc, rep)
{
DB *dbp;
DB_LOCK lock;
+ DB_MPOOLFILE *mpf;
PAGE *h;
db_pgno_t pgno;
int ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
pgno = dbc->internal->root;
if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &lock)) != 0)
return (ret);
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
return (ret);
*rep = RE_NREC(h);
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
(void)__TLPUT(dbc, lock);
return (0);
@@ -387,10 +398,11 @@ __bam_nrecs(dbc, rep)
* __bam_total --
* Return the number of records below a page.
*
- * PUBLIC: db_recno_t __bam_total __P((PAGE *));
+ * PUBLIC: db_recno_t __bam_total __P((DB *, PAGE *));
*/
db_recno_t
-__bam_total(h)
+__bam_total(dbp, h)
+ DB *dbp;
PAGE *h;
{
db_recno_t nrecs;
@@ -403,25 +415,26 @@ __bam_total(h)
case P_LBTREE:
/* Check for logically deleted records. */
for (indx = 0; indx < top; indx += P_INDX)
- if (!B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type))
+ if (!B_DISSET(
+ GET_BKEYDATA(dbp, h, indx + O_INDX)->type))
++nrecs;
break;
case P_LDUP:
/* Check for logically deleted records. */
for (indx = 0; indx < top; indx += O_INDX)
- if (!B_DISSET(GET_BKEYDATA(h, indx)->type))
+ if (!B_DISSET(GET_BKEYDATA(dbp, h, indx)->type))
++nrecs;
break;
case P_IBTREE:
for (indx = 0; indx < top; indx += O_INDX)
- nrecs += GET_BINTERNAL(h, indx)->nrecs;
+ nrecs += GET_BINTERNAL(dbp, h, indx)->nrecs;
break;
case P_LRECNO:
nrecs = NUM_ENT(h);
break;
case P_IRECNO:
for (indx = 0; indx < top; indx += O_INDX)
- nrecs += GET_RINTERNAL(h, indx)->nrecs;
+ nrecs += GET_RINTERNAL(dbp, h, indx)->nrecs;
break;
}
diff --git a/bdb/btree/bt_search.c b/bdb/btree/bt_search.c
index d822198f243..92b2106311d 100644
--- a/bdb/btree/bt_search.c
+++ b/bdb/btree/bt_search.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,7 +43,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_search.c,v 11.32 2001/01/17 20:19:46 bostic Exp $";
+static const char revid[] = "$Id: bt_search.c,v 11.43 2002/07/03 19:03:50 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -53,21 +53,22 @@ static const char revid[] = "$Id: bt_search.c,v 11.32 2001/01/17 20:19:46 bostic
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "btree.h"
-#include "lock.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/btree.h"
+#include "dbinc/lock.h"
/*
* __bam_search --
* Search a btree for a key.
*
- * PUBLIC: int __bam_search __P((DBC *,
+ * PUBLIC: int __bam_search __P((DBC *, db_pgno_t,
* PUBLIC: const DBT *, u_int32_t, int, db_recno_t *, int *));
*/
int
-__bam_search(dbc, key, flags, stop, recnop, exactp)
+__bam_search(dbc, root_pgno, key, flags, stop, recnop, exactp)
DBC *dbc;
+ db_pgno_t root_pgno;
const DBT *key;
u_int32_t flags;
int stop, *exactp;
@@ -77,8 +78,9 @@ __bam_search(dbc, key, flags, stop, recnop, exactp)
BTREE_CURSOR *cp;
DB *dbp;
DB_LOCK lock;
+ DB_MPOOLFILE *mpf;
PAGE *h;
- db_indx_t base, i, indx, lim;
+ db_indx_t base, i, indx, *inp, lim;
db_lockmode_t lock_mode;
db_pgno_t pg;
db_recno_t recno;
@@ -86,6 +88,7 @@ __bam_search(dbc, key, flags, stop, recnop, exactp)
int (*func) __P((DB *, const DBT *, const DBT *));
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
t = dbp->bt_internal;
recno = 0;
@@ -109,12 +112,12 @@ __bam_search(dbc, key, flags, stop, recnop, exactp)
* Retrieve the root page.
*/
try_again:
- pg = cp->root;
+ pg = root_pgno == PGNO_INVALID ? cp->root : root_pgno;
stack = LF_ISSET(S_STACK) && F_ISSET(cp, C_RECNUM);
lock_mode = stack ? DB_LOCK_WRITE : DB_LOCK_READ;
if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
return (ret);
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
+ if ((ret = mpf->get(mpf, &pg, 0, &h)) != 0) {
/* Did not read it, so we can release the lock */
(void)__LPUT(dbc, lock);
return (ret);
@@ -131,21 +134,21 @@ try_again:
if (!stack &&
((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
(LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
(void)__LPUT(dbc, lock);
lock_mode = DB_LOCK_WRITE;
if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
return (ret);
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
+ if ((ret = mpf->get(mpf, &pg, 0, &h)) != 0) {
/* Did not read it, so we can release the lock */
(void)__LPUT(dbc, lock);
return (ret);
}
- if (!((LF_ISSET(S_PARENT)
- && (u_int8_t)(stop + 1) >= h->level) ||
+ if (!((LF_ISSET(S_PARENT) &&
+ (u_int8_t)(stop + 1) >= h->level) ||
(LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
/* Someone else split the root, start over. */
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
(void)__LPUT(dbc, lock);
goto try_again;
}
@@ -158,6 +161,7 @@ try_again:
t->bt_compare;
for (;;) {
+ inp = P_INP(dbp, h);
/*
* Do a binary search on the current page. If we're searching
* a Btree leaf page, we have to walk the indices in groups of
@@ -199,7 +203,7 @@ try_again:
if (LF_ISSET(S_STK_ONLY)) {
BT_STK_NUM(dbp->dbenv, cp, h, base, ret);
__LPUT(dbc, lock);
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
return (ret);
}
@@ -232,21 +236,21 @@ try_again:
*/
next: if (recnop != NULL)
for (i = 0; i < indx; ++i)
- recno += GET_BINTERNAL(h, i)->nrecs;
+ recno += GET_BINTERNAL(dbp, h, i)->nrecs;
- pg = GET_BINTERNAL(h, indx)->pgno;
+ pg = GET_BINTERNAL(dbp, h, indx)->pgno;
if (LF_ISSET(S_STK_ONLY)) {
if (stop == h->level) {
BT_STK_NUM(dbp->dbenv, cp, h, indx, ret);
__LPUT(dbc, lock);
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
return (ret);
}
BT_STK_NUMPUSH(dbp->dbenv, cp, h, indx, ret);
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
if ((ret = __db_lget(dbc,
- LCK_COUPLE, pg, lock_mode, 0, &lock)) != 0) {
+ LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) {
/*
* Discard our lock and return on failure. This
* is OK because it only happens when descending
@@ -284,12 +288,12 @@ next: if (recnop != NULL)
(h->level - 1) == LEAFLEVEL)
stack = 1;
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
lock_mode = stack &&
LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ;
if ((ret = __db_lget(dbc,
- LCK_COUPLE, pg, lock_mode, 0, &lock)) != 0) {
+ LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) {
/*
* If we fail, discard the lock we held. This
* is OK because this only happens when we are
@@ -299,7 +303,7 @@ next: if (recnop != NULL)
goto err;
}
}
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0)
+ if ((ret = mpf->get(mpf, &pg, 0, &h)) != 0)
goto err;
}
/* NOTREACHED */
@@ -327,11 +331,11 @@ found: *exactp = 1;
if (TYPE(h) == P_LBTREE) {
if (LF_ISSET(S_DUPLAST))
while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
- h->inp[indx] == h->inp[indx + P_INDX])
+ inp[indx] == inp[indx + P_INDX])
indx += P_INDX;
else
while (indx > 0 &&
- h->inp[indx] == h->inp[indx - P_INDX])
+ inp[indx] == inp[indx - P_INDX])
indx -= P_INDX;
}
@@ -344,29 +348,29 @@ found: *exactp = 1;
if (LF_ISSET(S_DELNO)) {
deloffset = TYPE(h) == P_LBTREE ? O_INDX : 0;
if (LF_ISSET(S_DUPLAST))
- while (B_DISSET(GET_BKEYDATA(
+ while (B_DISSET(GET_BKEYDATA(dbp,
h, indx + deloffset)->type) && indx > 0 &&
- h->inp[indx] == h->inp[indx - adjust])
+ inp[indx] == inp[indx - adjust])
indx -= adjust;
else
- while (B_DISSET(GET_BKEYDATA(
+ while (B_DISSET(GET_BKEYDATA(dbp,
h, indx + deloffset)->type) &&
indx < (db_indx_t)(NUM_ENT(h) - adjust) &&
- h->inp[indx] == h->inp[indx + adjust])
+ inp[indx] == inp[indx + adjust])
indx += adjust;
/*
* If we weren't able to find a non-deleted duplicate, return
* DB_NOTFOUND.
*/
- if (B_DISSET(GET_BKEYDATA(h, indx + deloffset)->type))
+ if (B_DISSET(GET_BKEYDATA(dbp, h, indx + deloffset)->type))
goto notfound;
}
if (LF_ISSET(S_STK_ONLY)) {
BT_STK_NUM(dbp->dbenv, cp, h, indx, ret);
__LPUT(dbc, lock);
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
} else {
BT_STK_ENTER(dbp->dbenv, cp, h, indx, lock, lock_mode, ret);
if (ret != 0)
@@ -376,7 +380,7 @@ found: *exactp = 1;
notfound:
/* Keep the page locked for serializability. */
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
(void)__TLPUT(dbc, lock);
ret = DB_NOTFOUND;
@@ -398,10 +402,12 @@ __bam_stkrel(dbc, flags)
{
BTREE_CURSOR *cp;
DB *dbp;
+ DB_MPOOLFILE *mpf;
EPG *epg;
int ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
/*
@@ -414,10 +420,10 @@ __bam_stkrel(dbc, flags)
if (epg->page != NULL) {
if (LF_ISSET(STK_CLRDBC) && cp->page == epg->page) {
cp->page = NULL;
- cp->lock.off = LOCK_INVALID;
+ LOCK_INIT(cp->lock);
}
- if ((t_ret = memp_fput(
- dbp->mpf, epg->page, 0)) != 0 && ret == 0)
+ if ((t_ret =
+ mpf->put(mpf, epg->page, 0)) != 0 && ret == 0)
ret = t_ret;
/*
* XXX
@@ -428,12 +434,10 @@ __bam_stkrel(dbc, flags)
*/
epg->page = NULL;
}
- if (epg->lock.off != LOCK_INVALID) {
- if (LF_ISSET(STK_NOLOCK))
- (void)__LPUT(dbc, epg->lock);
- else
- (void)__TLPUT(dbc, epg->lock);
- }
+ if (LF_ISSET(STK_NOLOCK))
+ (void)__LPUT(dbc, epg->lock);
+ else
+ (void)__TLPUT(dbc, epg->lock);
}
/* Clear the stack, all pages have been released. */
@@ -463,7 +467,7 @@ __bam_stkgrow(dbenv, cp)
return (ret);
memcpy(p, cp->sp, entries * sizeof(EPG));
if (cp->sp != cp->stack)
- __os_free(cp->sp, entries * sizeof(EPG));
+ __os_free(dbenv, cp->sp);
cp->sp = p;
cp->csp = p + entries;
cp->esp = p + entries * 2;
diff --git a/bdb/btree/bt_split.c b/bdb/btree/bt_split.c
index f76337b1944..f3302a6905f 100644
--- a/bdb/btree/bt_split.c
+++ b/bdb/btree/bt_split.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
/*
@@ -40,7 +40,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_split.c,v 11.31 2000/12/22 19:08:27 bostic Exp $";
+static const char revid[] = "$Id: bt_split.c,v 11.58 2002/07/03 19:03:50 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -51,10 +51,10 @@ static const char revid[] = "$Id: bt_split.c,v 11.31 2000/12/22 19:08:27 bostic
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "lock.h"
-#include "btree.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/lock.h"
+#include "dbinc/btree.h"
static int __bam_broot __P((DBC *, PAGE *, PAGE *, PAGE *));
static int __bam_page __P((DBC *, EPG *, EPG *));
@@ -67,21 +67,19 @@ static int __ram_root __P((DBC *, PAGE *, PAGE *, PAGE *));
* __bam_split --
* Split a page.
*
- * PUBLIC: int __bam_split __P((DBC *, void *));
+ * PUBLIC: int __bam_split __P((DBC *, void *, db_pgno_t *));
*/
int
-__bam_split(dbc, arg)
+__bam_split(dbc, arg, root_pgnop)
DBC *dbc;
void *arg;
+ db_pgno_t *root_pgnop;
{
- BTREE *t;
BTREE_CURSOR *cp;
- DB *dbp;
enum { UP, DOWN } dir;
db_pgno_t root_pgno;
int exact, level, ret;
- dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
root_pgno = cp->root;
@@ -112,17 +110,20 @@ __bam_split(dbc, arg)
* split. This would be an easy change for this code, but I have no
* numbers that indicate it's worthwhile.
*/
- t = dbp->bt_internal;
for (dir = UP, level = LEAFLEVEL;; dir == UP ? ++level : --level) {
/*
* Acquire a page and its parent, locked.
*/
if ((ret = (dbc->dbtype == DB_BTREE ?
- __bam_search(dbc, arg, S_WRPAIR, level, NULL, &exact) :
+ __bam_search(dbc, PGNO_INVALID,
+ arg, S_WRPAIR, level, NULL, &exact) :
__bam_rsearch(dbc,
(db_recno_t *)arg, S_WRPAIR, level, &exact))) != 0)
return (ret);
+ if (root_pgnop != NULL)
+ *root_pgnop = cp->csp[0].page->pgno == root_pgno ?
+ root_pgno : cp->csp[-1].page->pgno;
/*
* Split the page if it still needs it (it's possible another
* thread of control has already split the page). If we are
@@ -130,7 +131,7 @@ __bam_split(dbc, arg)
* is no longer necessary.
*/
if (2 * B_MAXSIZEONPAGE(cp->ovflsize)
- <= (db_indx_t)P_FREESPACE(cp->csp[0].page)) {
+ <= (db_indx_t)P_FREESPACE(dbc->dbp, cp->csp[0].page)) {
__bam_stkrel(dbc, STK_NOLOCK);
return (0);
}
@@ -178,12 +179,14 @@ __bam_root(dbc, cp)
DB *dbp;
DBT log_dbt;
DB_LSN log_lsn;
+ DB_MPOOLFILE *mpf;
PAGE *lp, *rp;
db_indx_t split;
u_int32_t opflags;
int ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
/* Yeah, right. */
if (cp->page->level >= MAXBTREELEVEL) {
@@ -210,21 +213,22 @@ __bam_root(dbc, cp)
goto err;
/* Log the change. */
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
memset(&log_dbt, 0, sizeof(log_dbt));
log_dbt.data = cp->page;
log_dbt.size = dbp->pgsize;
ZERO_LSN(log_lsn);
opflags = F_ISSET(
(BTREE_CURSOR *)dbc->internal, C_RECNUM) ? SPL_NRECS : 0;
- if ((ret = __bam_split_log(dbp->dbenv, dbc->txn,
- &LSN(cp->page), 0, dbp->log_fileid, PGNO(lp), &LSN(lp),
- PGNO(rp), &LSN(rp), (u_int32_t)NUM_ENT(lp), 0, &log_lsn,
+ if ((ret = __bam_split_log(dbp,
+ dbc->txn, &LSN(cp->page), 0, PGNO(lp), &LSN(lp), PGNO(rp),
+ &LSN(rp), (u_int32_t)NUM_ENT(lp), 0, &log_lsn,
dbc->internal->root, &log_dbt, opflags)) != 0)
goto err;
- LSN(lp) = LSN(cp->page);
- LSN(rp) = LSN(cp->page);
- }
+ } else
+ LSN_NOT_LOGGED(LSN(cp->page));
+ LSN(lp) = LSN(cp->page);
+ LSN(rp) = LSN(cp->page);
/* Clean up the new root page. */
if ((ret = (dbc->dbtype == DB_RECNO ?
@@ -238,18 +242,18 @@ __bam_root(dbc, cp)
goto err;
/* Success -- write the real pages back to the store. */
- (void)memp_fput(dbp->mpf, cp->page, DB_MPOOL_DIRTY);
+ (void)mpf->put(mpf, cp->page, DB_MPOOL_DIRTY);
(void)__TLPUT(dbc, cp->lock);
- (void)memp_fput(dbp->mpf, lp, DB_MPOOL_DIRTY);
- (void)memp_fput(dbp->mpf, rp, DB_MPOOL_DIRTY);
+ (void)mpf->put(mpf, lp, DB_MPOOL_DIRTY);
+ (void)mpf->put(mpf, rp, DB_MPOOL_DIRTY);
return (0);
err: if (lp != NULL)
- (void)__db_free(dbc, lp);
+ (void)mpf->put(mpf, lp, 0);
if (rp != NULL)
- (void)__db_free(dbc, rp);
- (void)memp_fput(dbp->mpf, cp->page, 0);
+ (void)mpf->put(mpf, rp, 0);
+ (void)mpf->put(mpf, cp->page, 0);
(void)__TLPUT(dbc, cp->lock);
return (ret);
}
@@ -267,7 +271,8 @@ __bam_page(dbc, pp, cp)
DBT log_dbt;
DB_LSN log_lsn;
DB *dbp;
- DB_LOCK tplock;
+ DB_LOCK rplock, tplock;
+ DB_MPOOLFILE *mpf;
DB_LSN save_lsn;
PAGE *lp, *rp, *alloc_rp, *tp;
db_indx_t split;
@@ -275,8 +280,10 @@ __bam_page(dbc, pp, cp)
int ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
alloc_rp = lp = rp = tp = NULL;
- tplock.off = LOCK_INVALID;
+ LOCK_INIT(rplock);
+ LOCK_INIT(tplock);
ret = -1;
/*
@@ -296,7 +303,7 @@ __bam_page(dbc, pp, cp)
* up the tree badly, because we've violated the rule of always locking
* down the tree, and never up.
*/
- if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &rp)) != 0)
+ if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &rp)) != 0)
goto err;
P_INIT(rp, dbp->pgsize, 0,
ISINTERNAL(cp->page) ? PGNO_INVALID : PGNO(cp->page),
@@ -307,7 +314,7 @@ __bam_page(dbc, pp, cp)
* Create new left page for the split, and fill in everything
* except its LSN and next-page page number.
*/
- if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &lp)) != 0)
+ if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &lp)) != 0)
goto err;
P_INIT(lp, dbp->pgsize, PGNO(cp->page),
ISINTERNAL(cp->page) ? PGNO_INVALID : PREV_PGNO(cp->page),
@@ -351,8 +358,7 @@ __bam_page(dbc, pp, cp)
if ((ret = __db_lget(dbc,
0, NEXT_PGNO(cp->page), DB_LOCK_WRITE, 0, &tplock)) != 0)
goto err;
- if ((ret =
- memp_fget(dbp->mpf, &NEXT_PGNO(cp->page), 0, &tp)) != 0)
+ if ((ret = mpf->get(mpf, &NEXT_PGNO(cp->page), 0, &tp)) != 0)
goto err;
}
@@ -364,6 +370,15 @@ __bam_page(dbc, pp, cp)
goto err;
/*
+ * Lock the new page. We need to do this because someone
+ * could get here through bt_lpgno if this page was recently
+ * dealocated. They can't look at it before we commit.
+ */
+ if ((ret = __db_lget(dbc,
+ 0, PGNO(alloc_rp), DB_LOCK_WRITE, 0, &rplock)) != 0)
+ goto err;
+
+ /*
* Fix up the page numbers we didn't have before. We have to do this
* before calling __bam_pinsert because it may copy a page number onto
* the parent page and it takes the page number from its page argument.
@@ -376,29 +391,30 @@ __bam_page(dbc, pp, cp)
bc = (BTREE_CURSOR *)dbc->internal;
/* Log the change. */
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
memset(&log_dbt, 0, sizeof(log_dbt));
log_dbt.data = cp->page;
log_dbt.size = dbp->pgsize;
if (tp == NULL)
ZERO_LSN(log_lsn);
opflags = F_ISSET(bc, C_RECNUM) ? SPL_NRECS : 0;
- if ((ret = __bam_split_log(dbp->dbenv, dbc->txn,
- &LSN(cp->page), 0, dbp->log_fileid, PGNO(cp->page),
- &LSN(cp->page), PGNO(alloc_rp), &LSN(alloc_rp),
- (u_int32_t)NUM_ENT(lp),
+ if ((ret = __bam_split_log(dbp, dbc->txn, &LSN(cp->page), 0,
+ PGNO(cp->page), &LSN(cp->page), PGNO(alloc_rp),
+ &LSN(alloc_rp), (u_int32_t)NUM_ENT(lp),
tp == NULL ? 0 : PGNO(tp),
tp == NULL ? &log_lsn : &LSN(tp),
- bc->root, &log_dbt, opflags)) != 0)
+ PGNO_INVALID, &log_dbt, opflags)) != 0)
goto err;
- /* Update the LSNs for all involved pages. */
- LSN(alloc_rp) = LSN(cp->page);
- LSN(lp) = LSN(cp->page);
- LSN(rp) = LSN(cp->page);
- if (tp != NULL)
- LSN(tp) = LSN(cp->page);
- }
+ } else
+ LSN_NOT_LOGGED(LSN(cp->page));
+
+ /* Update the LSNs for all involved pages. */
+ LSN(alloc_rp) = LSN(cp->page);
+ LSN(lp) = LSN(cp->page);
+ LSN(rp) = LSN(cp->page);
+ if (tp != NULL)
+ LSN(tp) = LSN(cp->page);
/*
* Copy the left and right pages into place. There are two paths
@@ -411,13 +427,13 @@ __bam_page(dbc, pp, cp)
* do the copy.
*/
save_lsn = alloc_rp->lsn;
- memcpy(alloc_rp, rp, LOFFSET(rp));
+ memcpy(alloc_rp, rp, LOFFSET(dbp, rp));
memcpy((u_int8_t *)alloc_rp + HOFFSET(rp),
(u_int8_t *)rp + HOFFSET(rp), dbp->pgsize - HOFFSET(rp));
alloc_rp->lsn = save_lsn;
save_lsn = cp->page->lsn;
- memcpy(cp->page, lp, LOFFSET(lp));
+ memcpy(cp->page, lp, LOFFSET(dbp, lp));
memcpy((u_int8_t *)cp->page + HOFFSET(lp),
(u_int8_t *)lp + HOFFSET(lp), dbp->pgsize - HOFFSET(lp));
cp->page->lsn = save_lsn;
@@ -431,8 +447,8 @@ __bam_page(dbc, pp, cp)
PGNO(cp->page), PGNO(cp->page), PGNO(rp), split, 0)) != 0)
goto err;
- __os_free(lp, dbp->pgsize);
- __os_free(rp, dbp->pgsize);
+ __os_free(dbp->dbenv, lp);
+ __os_free(dbp->dbenv, rp);
/*
* Success -- write the real pages back to the store. As we never
@@ -440,45 +456,43 @@ __bam_page(dbc, pp, cp)
* releasing locks on the pages that reference it. We're finished
* modifying the page so it's not really necessary, but it's neater.
*/
- if ((t_ret =
- memp_fput(dbp->mpf, alloc_rp, DB_MPOOL_DIRTY)) != 0 && ret == 0)
+ if ((t_ret = mpf->put(mpf, alloc_rp, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
- if ((t_ret =
- memp_fput(dbp->mpf, pp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0)
+ (void)__TLPUT(dbc, rplock);
+ if ((t_ret = mpf->put(mpf, pp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
(void)__TLPUT(dbc, pp->lock);
- if ((t_ret =
- memp_fput(dbp->mpf, cp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0)
+ if ((t_ret = mpf->put(mpf, cp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
(void)__TLPUT(dbc, cp->lock);
if (tp != NULL) {
if ((t_ret =
- memp_fput(dbp->mpf, tp, DB_MPOOL_DIRTY)) != 0 && ret == 0)
+ mpf->put(mpf, tp, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
(void)__TLPUT(dbc, tplock);
}
return (ret);
err: if (lp != NULL)
- __os_free(lp, dbp->pgsize);
+ __os_free(dbp->dbenv, lp);
if (rp != NULL)
- __os_free(rp, dbp->pgsize);
+ __os_free(dbp->dbenv, rp);
if (alloc_rp != NULL)
- (void)__db_free(dbc, alloc_rp);
-
+ (void)mpf->put(mpf, alloc_rp, 0);
if (tp != NULL)
- (void)memp_fput(dbp->mpf, tp, 0);
- if (tplock.off != LOCK_INVALID)
- /* We never updated the next page, we can release it. */
- (void)__LPUT(dbc, tplock);
+ (void)mpf->put(mpf, tp, 0);
+
+ /* We never updated the new or next pages, we can release them. */
+ (void)__LPUT(dbc, rplock);
+ (void)__LPUT(dbc, tplock);
- (void)memp_fput(dbp->mpf, pp->page, 0);
+ (void)mpf->put(mpf, pp->page, 0);
if (ret == DB_NEEDSPLIT)
(void)__LPUT(dbc, pp->lock);
else
(void)__TLPUT(dbc, pp->lock);
- (void)memp_fput(dbp->mpf, cp->page, 0);
+ (void)mpf->put(mpf, cp->page, 0);
if (ret == DB_NEEDSPLIT)
(void)__LPUT(dbc, cp->lock);
else
@@ -529,7 +543,7 @@ __bam_broot(dbc, rootp, lp, rp)
B_TSET(bi.type, B_KEYDATA, 0);
bi.pgno = lp->pgno;
if (F_ISSET(cp, C_RECNUM)) {
- bi.nrecs = __bam_total(lp);
+ bi.nrecs = __bam_total(dbp, lp);
RE_NREC_SET(rootp, bi.nrecs);
}
hdr.data = &bi;
@@ -541,13 +555,13 @@ __bam_broot(dbc, rootp, lp, rp)
switch (TYPE(rp)) {
case P_IBTREE:
/* Copy the first key of the child page onto the root page. */
- child_bi = GET_BINTERNAL(rp, 0);
+ child_bi = GET_BINTERNAL(dbp, rp, 0);
bi.len = child_bi->len;
B_TSET(bi.type, child_bi->type, 0);
bi.pgno = rp->pgno;
if (F_ISSET(cp, C_RECNUM)) {
- bi.nrecs = __bam_total(rp);
+ bi.nrecs = __bam_total(dbp, rp);
RE_NREC_ADJ(rootp, bi.nrecs);
}
hdr.data = &bi;
@@ -567,14 +581,14 @@ __bam_broot(dbc, rootp, lp, rp)
case P_LDUP:
case P_LBTREE:
/* Copy the first key of the child page onto the root page. */
- child_bk = GET_BKEYDATA(rp, 0);
+ child_bk = GET_BKEYDATA(dbp, rp, 0);
switch (B_TYPE(child_bk->type)) {
case B_KEYDATA:
bi.len = child_bk->len;
B_TSET(bi.type, child_bk->type, 0);
bi.pgno = rp->pgno;
if (F_ISSET(cp, C_RECNUM)) {
- bi.nrecs = __bam_total(rp);
+ bi.nrecs = __bam_total(dbp, rp);
RE_NREC_ADJ(rootp, bi.nrecs);
}
hdr.data = &bi;
@@ -591,7 +605,7 @@ __bam_broot(dbc, rootp, lp, rp)
B_TSET(bi.type, child_bk->type, 0);
bi.pgno = rp->pgno;
if (F_ISSET(cp, C_RECNUM)) {
- bi.nrecs = __bam_total(rp);
+ bi.nrecs = __bam_total(dbp, rp);
RE_NREC_ADJ(rootp, bi.nrecs);
}
hdr.data = &bi;
@@ -609,11 +623,11 @@ __bam_broot(dbc, rootp, lp, rp)
return (ret);
break;
default:
- return (__db_pgfmt(dbp, rp->pgno));
+ return (__db_pgfmt(dbp->dbenv, rp->pgno));
}
break;
default:
- return (__db_pgfmt(dbp, rp->pgno));
+ return (__db_pgfmt(dbp->dbenv, rp->pgno));
}
return (0);
}
@@ -647,12 +661,12 @@ __ram_root(dbc, rootp, lp, rp)
/* Insert the left and right keys, set the header information. */
ri.pgno = lp->pgno;
- ri.nrecs = __bam_total(lp);
+ ri.nrecs = __bam_total(dbp, lp);
if ((ret = __db_pitem(dbc, rootp, 0, RINTERNAL_SIZE, &hdr, NULL)) != 0)
return (ret);
RE_NREC_SET(rootp, ri.nrecs);
ri.pgno = rp->pgno;
- ri.nrecs = __bam_total(rp);
+ ri.nrecs = __bam_total(dbp, rp);
if ((ret = __db_pitem(dbc, rootp, 1, RINTERNAL_SIZE, &hdr, NULL)) != 0)
return (ret);
RE_NREC_ADJ(rootp, ri.nrecs);
@@ -690,7 +704,8 @@ __bam_pinsert(dbc, parent, lchild, rchild, space_check)
ppage = parent->page;
/* If handling record numbers, count records split to the right page. */
- nrecs = F_ISSET(cp, C_RECNUM) && !space_check ? __bam_total(rchild) : 0;
+ nrecs = F_ISSET(cp, C_RECNUM) &&
+ !space_check ? __bam_total(dbp, rchild) : 0;
/*
* Now we insert the new page's first key into the parent page, which
@@ -721,10 +736,10 @@ __bam_pinsert(dbc, parent, lchild, rchild, space_check)
*/
switch (TYPE(rchild)) {
case P_IBTREE:
- child_bi = GET_BINTERNAL(rchild, 0);
+ child_bi = GET_BINTERNAL(dbp, rchild, 0);
nbytes = BINTERNAL_PSIZE(child_bi->len);
- if (P_FREESPACE(ppage) < nbytes)
+ if (P_FREESPACE(dbp, ppage) < nbytes)
return (DB_NEEDSPLIT);
if (space_check)
return (0);
@@ -753,7 +768,7 @@ __bam_pinsert(dbc, parent, lchild, rchild, space_check)
break;
case P_LDUP:
case P_LBTREE:
- child_bk = GET_BKEYDATA(rchild, 0);
+ child_bk = GET_BKEYDATA(dbp, rchild, 0);
switch (B_TYPE(child_bk->type)) {
case B_KEYDATA:
/*
@@ -783,7 +798,7 @@ __bam_pinsert(dbc, parent, lchild, rchild, space_check)
goto noprefix;
if (ppage->prev_pgno == PGNO_INVALID && off <= 1)
goto noprefix;
- tmp_bk = GET_BKEYDATA(lchild, NUM_ENT(lchild) -
+ tmp_bk = GET_BKEYDATA(dbp, lchild, NUM_ENT(lchild) -
(TYPE(lchild) == P_LDUP ? O_INDX : P_INDX));
if (B_TYPE(tmp_bk->type) != B_KEYDATA)
goto noprefix;
@@ -793,13 +808,13 @@ __bam_pinsert(dbc, parent, lchild, rchild, space_check)
memset(&b, 0, sizeof(b));
b.size = child_bk->len;
b.data = child_bk->data;
- nksize = func(dbp, &a, &b);
+ nksize = (u_int32_t)func(dbp, &a, &b);
if ((n = BINTERNAL_PSIZE(nksize)) < nbytes)
nbytes = n;
else
noprefix: nksize = child_bk->len;
- if (P_FREESPACE(ppage) < nbytes)
+ if (P_FREESPACE(dbp, ppage) < nbytes)
return (DB_NEEDSPLIT);
if (space_check)
return (0);
@@ -823,7 +838,7 @@ noprefix: nksize = child_bk->len;
case B_OVERFLOW:
nbytes = BINTERNAL_PSIZE(BOVERFLOW_SIZE);
- if (P_FREESPACE(ppage) < nbytes)
+ if (P_FREESPACE(dbp, ppage) < nbytes)
return (DB_NEEDSPLIT);
if (space_check)
return (0);
@@ -850,14 +865,14 @@ noprefix: nksize = child_bk->len;
return (ret);
break;
default:
- return (__db_pgfmt(dbp, rchild->pgno));
+ return (__db_pgfmt(dbp->dbenv, rchild->pgno));
}
break;
case P_IRECNO:
case P_LRECNO:
nbytes = RINTERNAL_PSIZE;
- if (P_FREESPACE(ppage) < nbytes)
+ if (P_FREESPACE(dbp, ppage) < nbytes)
return (DB_NEEDSPLIT);
if (space_check)
return (0);
@@ -873,7 +888,7 @@ noprefix: nksize = child_bk->len;
return (ret);
break;
default:
- return (__db_pgfmt(dbp, rchild->pgno));
+ return (__db_pgfmt(dbp->dbenv, rchild->pgno));
}
/*
@@ -882,17 +897,19 @@ noprefix: nksize = child_bk->len;
*/
if (F_ISSET(cp, C_RECNUM)) {
/* Log the change. */
- if (DB_LOGGING(dbc) &&
- (ret = __bam_cadjust_log(dbp->dbenv, dbc->txn,
- &LSN(ppage), 0, dbp->log_fileid, PGNO(ppage),
+ if (DBC_LOGGING(dbc)) {
+ if ((ret = __bam_cadjust_log(dbp, dbc->txn,
+ &LSN(ppage), 0, PGNO(ppage),
&LSN(ppage), parent->indx, -(int32_t)nrecs, 0)) != 0)
return (ret);
+ } else
+ LSN_NOT_LOGGED(LSN(ppage));
/* Update the left page count. */
if (dbc->dbtype == DB_RECNO)
- GET_RINTERNAL(ppage, parent->indx)->nrecs -= nrecs;
+ GET_RINTERNAL(dbp, ppage, parent->indx)->nrecs -= nrecs;
else
- GET_BINTERNAL(ppage, parent->indx)->nrecs -= nrecs;
+ GET_BINTERNAL(dbp, ppage, parent->indx)->nrecs -= nrecs;
}
return (0);
@@ -911,28 +928,52 @@ __bam_psplit(dbc, cp, lp, rp, splitret)
{
DB *dbp;
PAGE *pp;
- db_indx_t half, nbytes, off, splitp, top;
+ db_indx_t half, *inp, nbytes, off, splitp, top;
int adjust, cnt, iflag, isbigkey, ret;
dbp = dbc->dbp;
pp = cp->page;
+ inp = P_INP(dbp, pp);
adjust = TYPE(pp) == P_LBTREE ? P_INDX : O_INDX;
/*
* If we're splitting the first (last) page on a level because we're
* inserting (appending) a key to it, it's likely that the data is
* sorted. Moving a single item to the new page is less work and can
- * push the fill factor higher than normal. If we're wrong it's not
- * a big deal, we'll just do the split the right way next time.
+ * push the fill factor higher than normal. This is trivial when we
+ * are splitting a new page before the beginning of the tree, all of
+ * the interesting tests are against values of 0.
+ *
+ * Catching appends to the tree is harder. In a simple append, we're
+ * inserting an item that sorts past the end of the tree; the cursor
+ * will point past the last element on the page. But, in trees with
+ * duplicates, the cursor may point to the last entry on the page --
+ * in this case, the entry will also be the last element of a duplicate
+ * set (the last because the search call specified the S_DUPLAST flag).
+ * The only way to differentiate between an insert immediately before
+ * the last item in a tree or an append after a duplicate set which is
+ * also the last item in the tree is to call the comparison function.
+ * When splitting internal pages during an append, the search code
+ * guarantees the cursor always points to the largest page item less
+ * than the new internal entry. To summarize, we want to catch three
+ * possible index values:
+ *
+ * NUM_ENT(page) Btree/Recno leaf insert past end-of-tree
+ * NUM_ENT(page) - O_INDX Btree or Recno internal insert past EOT
+ * NUM_ENT(page) - P_INDX Btree leaf insert past EOT after a set
+ * of duplicates
+ *
+ * two of which, (NUM_ENT(page) - O_INDX or P_INDX) might be an insert
+ * near the end of the tree, and not after the end of the tree at all.
+ * Do a simple test which might be wrong because calling the comparison
+ * functions is expensive. Regardless, it's not a big deal if we're
+ * wrong, we'll do the split the right way next time.
*/
off = 0;
- if (NEXT_PGNO(pp) == PGNO_INVALID &&
- ((ISINTERNAL(pp) && cp->indx == NUM_ENT(cp->page) - 1) ||
- (!ISINTERNAL(pp) && cp->indx == NUM_ENT(cp->page))))
- off = NUM_ENT(cp->page) - adjust;
+ if (NEXT_PGNO(pp) == PGNO_INVALID && cp->indx >= NUM_ENT(pp) - adjust)
+ off = NUM_ENT(pp) - adjust;
else if (PREV_PGNO(pp) == PGNO_INVALID && cp->indx == 0)
off = adjust;
-
if (off != 0)
goto sort;
@@ -962,16 +1003,18 @@ __bam_psplit(dbc, cp, lp, rp, splitret)
for (nbytes = 0, off = 0; off < top && nbytes < half; ++off)
switch (TYPE(pp)) {
case P_IBTREE:
- if (B_TYPE(GET_BINTERNAL(pp, off)->type) == B_KEYDATA)
- nbytes +=
- BINTERNAL_SIZE(GET_BINTERNAL(pp, off)->len);
+ if (B_TYPE(
+ GET_BINTERNAL(dbp, pp, off)->type) == B_KEYDATA)
+ nbytes += BINTERNAL_SIZE(
+ GET_BINTERNAL(dbp, pp, off)->len);
else
nbytes += BINTERNAL_SIZE(BOVERFLOW_SIZE);
break;
case P_LBTREE:
- if (B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA)
- nbytes +=
- BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len);
+ if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
+ B_KEYDATA)
+ nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp,
+ pp, off)->len);
else
nbytes += BOVERFLOW_SIZE;
@@ -979,9 +1022,10 @@ __bam_psplit(dbc, cp, lp, rp, splitret)
/* FALLTHROUGH */
case P_LDUP:
case P_LRECNO:
- if (B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA)
- nbytes +=
- BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len);
+ if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
+ B_KEYDATA)
+ nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp,
+ pp, off)->len);
else
nbytes += BOVERFLOW_SIZE;
break;
@@ -989,7 +1033,7 @@ __bam_psplit(dbc, cp, lp, rp, splitret)
nbytes += RINTERNAL_SIZE;
break;
default:
- return (__db_pgfmt(dbp, pp->pgno));
+ return (__db_pgfmt(dbp->dbenv, pp->pgno));
}
sort: splitp = off;
@@ -1002,12 +1046,14 @@ sort: splitp = off;
switch (TYPE(pp)) {
case P_IBTREE:
iflag = 1;
- isbigkey = B_TYPE(GET_BINTERNAL(pp, off)->type) != B_KEYDATA;
+ isbigkey =
+ B_TYPE(GET_BINTERNAL(dbp, pp, off)->type) != B_KEYDATA;
break;
case P_LBTREE:
case P_LDUP:
iflag = 0;
- isbigkey = B_TYPE(GET_BKEYDATA(pp, off)->type) != B_KEYDATA;
+ isbigkey = B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) !=
+ B_KEYDATA;
break;
default:
iflag = isbigkey = 0;
@@ -1016,18 +1062,20 @@ sort: splitp = off;
for (cnt = 1; cnt <= 3; ++cnt) {
off = splitp + cnt * adjust;
if (off < (db_indx_t)NUM_ENT(pp) &&
- ((iflag &&
- B_TYPE(GET_BINTERNAL(pp,off)->type) == B_KEYDATA) ||
- B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA)) {
+ ((iflag && B_TYPE(
+ GET_BINTERNAL(dbp, pp,off)->type) == B_KEYDATA) ||
+ B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
+ B_KEYDATA)) {
splitp = off;
break;
}
if (splitp <= (db_indx_t)(cnt * adjust))
continue;
off = splitp - cnt * adjust;
- if (iflag ?
- B_TYPE(GET_BINTERNAL(pp, off)->type) == B_KEYDATA :
- B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA) {
+ if (iflag ? B_TYPE(
+ GET_BINTERNAL(dbp, pp, off)->type) == B_KEYDATA :
+ B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
+ B_KEYDATA) {
splitp = off;
break;
}
@@ -1040,18 +1088,18 @@ sort: splitp = off;
* page set. So, this loop can't be unbounded.
*/
if (TYPE(pp) == P_LBTREE &&
- pp->inp[splitp] == pp->inp[splitp - adjust])
+ inp[splitp] == inp[splitp - adjust])
for (cnt = 1;; ++cnt) {
off = splitp + cnt * adjust;
if (off < NUM_ENT(pp) &&
- pp->inp[splitp] != pp->inp[off]) {
+ inp[splitp] != inp[off]) {
splitp = off;
break;
}
if (splitp <= (db_indx_t)(cnt * adjust))
continue;
off = splitp - cnt * adjust;
- if (pp->inp[splitp] != pp->inp[off]) {
+ if (inp[splitp] != inp[off]) {
splitp = off + adjust;
break;
}
@@ -1079,18 +1127,20 @@ __bam_copy(dbp, pp, cp, nxt, stop)
PAGE *pp, *cp;
u_int32_t nxt, stop;
{
- db_indx_t nbytes, off;
+ db_indx_t *cinp, nbytes, off, *pinp;
+ cinp = P_INP(dbp, cp);
+ pinp = P_INP(dbp, pp);
/*
- * Copy the rest of the data to the right page. Nxt is the next
- * offset placed on the target page.
+ * Nxt is the offset of the next record to be placed on the target page.
*/
for (off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) {
switch (TYPE(pp)) {
case P_IBTREE:
- if (B_TYPE(GET_BINTERNAL(pp, nxt)->type) == B_KEYDATA)
- nbytes =
- BINTERNAL_SIZE(GET_BINTERNAL(pp, nxt)->len);
+ if (B_TYPE(
+ GET_BINTERNAL(dbp, pp, nxt)->type) == B_KEYDATA)
+ nbytes = BINTERNAL_SIZE(
+ GET_BINTERNAL(dbp, pp, nxt)->len);
else
nbytes = BINTERNAL_SIZE(BOVERFLOW_SIZE);
break;
@@ -1100,16 +1150,17 @@ __bam_copy(dbp, pp, cp, nxt, stop)
* the offset.
*/
if (off != 0 && (nxt % P_INDX) == 0 &&
- pp->inp[nxt] == pp->inp[nxt - P_INDX]) {
- cp->inp[off] = cp->inp[off - P_INDX];
+ pinp[nxt] == pinp[nxt - P_INDX]) {
+ cinp[off] = cinp[off - P_INDX];
continue;
}
/* FALLTHROUGH */
case P_LDUP:
case P_LRECNO:
- if (B_TYPE(GET_BKEYDATA(pp, nxt)->type) == B_KEYDATA)
- nbytes =
- BKEYDATA_SIZE(GET_BKEYDATA(pp, nxt)->len);
+ if (B_TYPE(GET_BKEYDATA(dbp, pp, nxt)->type) ==
+ B_KEYDATA)
+ nbytes = BKEYDATA_SIZE(GET_BKEYDATA(dbp,
+ pp, nxt)->len);
else
nbytes = BOVERFLOW_SIZE;
break;
@@ -1117,10 +1168,10 @@ __bam_copy(dbp, pp, cp, nxt, stop)
nbytes = RINTERNAL_SIZE;
break;
default:
- return (__db_pgfmt(dbp, pp->pgno));
+ return (__db_pgfmt(dbp->dbenv, pp->pgno));
}
- cp->inp[off] = HOFFSET(cp) -= nbytes;
- memcpy(P_ENTRY(cp, off), P_ENTRY(pp, nxt), nbytes);
+ cinp[off] = HOFFSET(cp) -= nbytes;
+ memcpy(P_ENTRY(dbp, cp, off), P_ENTRY(dbp, pp, nxt), nbytes);
}
return (0);
}
diff --git a/bdb/btree/bt_stat.c b/bdb/btree/bt_stat.c
index 349bb40cf8b..4428de98294 100644
--- a/bdb/btree/bt_stat.c
+++ b/bdb/btree/bt_stat.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_stat.c,v 11.29 2000/11/28 21:42:27 bostic Exp $";
+static const char revid[] = "$Id: bt_stat.c,v 11.52 2002/05/30 15:40:27 krinsky Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -18,22 +18,22 @@ static const char revid[] = "$Id: bt_stat.c,v 11.29 2000/11/28 21:42:27 bostic E
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "lock.h"
-#include "btree.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/btree.h"
+#include "dbinc/lock.h"
+#include "dbinc/log.h"
/*
* __bam_stat --
* Gather/print the btree statistics
*
- * PUBLIC: int __bam_stat __P((DB *, void *, void *(*)(size_t), u_int32_t));
+ * PUBLIC: int __bam_stat __P((DB *, void *, u_int32_t));
*/
int
-__bam_stat(dbp, spp, db_malloc, flags)
+__bam_stat(dbp, spp, flags)
DB *dbp;
void *spp;
- void *(*db_malloc) __P((size_t));
u_int32_t flags;
{
BTMETA *meta;
@@ -42,9 +42,10 @@ __bam_stat(dbp, spp, db_malloc, flags)
DBC *dbc;
DB_BTREE_STAT *sp;
DB_LOCK lock, metalock;
+ DB_MPOOLFILE *mpf;
PAGE *h;
db_pgno_t pgno;
- int ret, t_ret;
+ int ret, t_ret, write_meta;
PANIC_CHECK(dbp->dbenv);
DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->stat");
@@ -52,9 +53,12 @@ __bam_stat(dbp, spp, db_malloc, flags)
meta = NULL;
t = dbp->bt_internal;
sp = NULL;
- metalock.off = lock.off = LOCK_INVALID;
+ LOCK_INIT(metalock);
+ LOCK_INIT(lock);
+ mpf = dbp->mpf;
h = NULL;
ret = 0;
+ write_meta = 0;
/* Check for invalid flags. */
if ((ret = __db_statchk(dbp, flags)) != 0)
@@ -68,52 +72,31 @@ __bam_stat(dbp, spp, db_malloc, flags)
DEBUG_LWRITE(dbc, NULL, "bam_stat", NULL, NULL, flags);
/* Allocate and clear the structure. */
- if ((ret = __os_malloc(dbp->dbenv, sizeof(*sp), db_malloc, &sp)) != 0)
+ if ((ret = __os_umalloc(dbp->dbenv, sizeof(*sp), &sp)) != 0)
goto err;
memset(sp, 0, sizeof(*sp));
- /* If the app just wants the record count, make it fast. */
- if (flags == DB_RECORDCOUNT) {
- if ((ret = __db_lget(dbc, 0,
- cp->root, DB_LOCK_READ, 0, &lock)) != 0)
- goto err;
- if ((ret = memp_fget(dbp->mpf,
- &cp->root, 0, (PAGE **)&h)) != 0)
- goto err;
-
- sp->bt_nkeys = RE_NREC(h);
-
- goto done;
- }
- if (flags == DB_CACHED_COUNTS) {
- if ((ret = __db_lget(dbc,
- 0, t->bt_meta, DB_LOCK_READ, 0, &lock)) != 0)
- goto err;
- if ((ret =
- memp_fget(dbp->mpf, &t->bt_meta, 0, (PAGE **)&meta)) != 0)
- goto err;
- sp->bt_nkeys = meta->dbmeta.key_count;
- sp->bt_ndata = meta->dbmeta.record_count;
-
- goto done;
- }
-
/* Get the metadata page for the entire database. */
pgno = PGNO_BASE_MD;
if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &metalock)) != 0)
goto err;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, (PAGE **)&meta)) != 0)
+ if ((ret = mpf->get(mpf, &pgno, 0, (PAGE **)&meta)) != 0)
goto err;
+ if (flags == DB_RECORDCOUNT || flags == DB_CACHED_COUNTS)
+ flags = DB_FAST_STAT;
+ if (flags == DB_FAST_STAT)
+ goto meta_only;
+
/* Walk the metadata free list, counting pages. */
for (sp->bt_free = 0, pgno = meta->dbmeta.free; pgno != PGNO_INVALID;) {
++sp->bt_free;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
goto err;
pgno = h->next_pgno;
- if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ if ((ret = mpf->put(mpf, h, 0)) != 0)
goto err;
h = NULL;
}
@@ -122,14 +105,14 @@ __bam_stat(dbp, spp, db_malloc, flags)
pgno = cp->root;
if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &lock)) != 0)
goto err;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
goto err;
/* Get the levels from the root page. */
sp->bt_levels = h->level;
/* Discard the root page. */
- if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ if ((ret = mpf->put(mpf, h, 0)) != 0)
goto err;
h = NULL;
__LPUT(dbc, lock);
@@ -143,20 +126,36 @@ __bam_stat(dbp, spp, db_malloc, flags)
* Get the subdatabase metadata page if it's not the same as the
* one we already have.
*/
- if (t->bt_meta != PGNO_BASE_MD || !F_ISSET(dbp, DB_AM_RDONLY)) {
- if ((ret = memp_fput(dbp->mpf, meta, 0)) != 0)
+ write_meta = !F_ISSET(dbp, DB_AM_RDONLY);
+meta_only:
+ if (t->bt_meta != PGNO_BASE_MD || write_meta != 0) {
+ if ((ret = mpf->put(mpf, meta, 0)) != 0)
goto err;
meta = NULL;
__LPUT(dbc, metalock);
if ((ret = __db_lget(dbc,
- 0, t->bt_meta, F_ISSET(dbp, DB_AM_RDONLY) ?
+ 0, t->bt_meta, write_meta == 0 ?
DB_LOCK_READ : DB_LOCK_WRITE, 0, &metalock)) != 0)
goto err;
- if ((ret =
- memp_fget(dbp->mpf, &t->bt_meta, 0, (PAGE **)&meta)) != 0)
+ if ((ret = mpf->get(mpf, &t->bt_meta, 0, (PAGE **)&meta)) != 0)
goto err;
}
+ if (flags == DB_FAST_STAT) {
+ if (dbp->type == DB_RECNO ||
+ (dbp->type == DB_BTREE && F_ISSET(dbp, DB_AM_RECNUM))) {
+ if ((ret = __db_lget(dbc, 0,
+ cp->root, DB_LOCK_READ, 0, &lock)) != 0)
+ goto err;
+ if ((ret =
+ mpf->get(mpf, &cp->root, 0, (PAGE **)&h)) != 0)
+ goto err;
+
+ sp->bt_nkeys = RE_NREC(h);
+ } else
+ sp->bt_nkeys = meta->dbmeta.key_count;
+ sp->bt_ndata = meta->dbmeta.record_count;
+ }
/* Get metadata page statistics. */
sp->bt_metaflags = meta->dbmeta.flags;
@@ -167,39 +166,33 @@ __bam_stat(dbp, spp, db_malloc, flags)
sp->bt_pagesize = meta->dbmeta.pagesize;
sp->bt_magic = meta->dbmeta.magic;
sp->bt_version = meta->dbmeta.version;
- if (!F_ISSET(dbp, DB_AM_RDONLY)) {
+
+ if (write_meta != 0) {
meta->dbmeta.key_count = sp->bt_nkeys;
meta->dbmeta.record_count = sp->bt_ndata;
}
- /* Discard the metadata page. */
- if ((ret = memp_fput(dbp->mpf,
- meta, F_ISSET(dbp, DB_AM_RDONLY) ? 0 : DB_MPOOL_DIRTY)) != 0)
- goto err;
- meta = NULL;
- __LPUT(dbc, metalock);
-
-done: *(DB_BTREE_STAT **)spp = sp;
-
- if (0) {
-err: if (sp != NULL)
- __os_free(sp, sizeof(*sp));
- }
+ *(DB_BTREE_STAT **)spp = sp;
- if (h != NULL &&
- (t_ret = memp_fput(dbp->mpf, h, 0)) != 0 && ret == 0)
+err: /* Discard the second page. */
+ __LPUT(dbc, lock);
+ if (h != NULL && (t_ret = mpf->put(mpf, h, 0)) != 0 && ret == 0)
ret = t_ret;
- if (meta != NULL &&
- (t_ret = memp_fput(dbp->mpf, meta, 0)) != 0 && ret == 0)
+ /* Discard the metadata page. */
+ __LPUT(dbc, metalock);
+ if (meta != NULL && (t_ret = mpf->put(
+ mpf, meta, write_meta == 0 ? 0 : DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
- if (lock.off != LOCK_INVALID)
- __LPUT(dbc, lock);
-
if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
ret = t_ret;
+ if (ret != 0 && sp != NULL) {
+ __os_ufree(dbp->dbenv, sp);
+ *(DB_BTREE_STAT **)spp = NULL;
+ }
+
return (ret);
}
@@ -222,22 +215,27 @@ __bam_traverse(dbc, mode, root_pgno, callback, cookie)
BKEYDATA *bk;
DB *dbp;
DB_LOCK lock;
+ DB_MPOOLFILE *mpf;
PAGE *h;
RINTERNAL *ri;
db_indx_t indx;
int already_put, ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
+ already_put = 0;
if ((ret = __db_lget(dbc, 0, root_pgno, mode, 0, &lock)) != 0)
return (ret);
- if ((ret = memp_fget(dbp->mpf, &root_pgno, 0, &h)) != 0)
- goto err;
+ if ((ret = mpf->get(mpf, &root_pgno, 0, &h)) != 0) {
+ __LPUT(dbc, lock);
+ return (ret);
+ }
switch (TYPE(h)) {
case P_IBTREE:
for (indx = 0; indx < NUM_ENT(h); indx += O_INDX) {
- bi = GET_BINTERNAL(h, indx);
+ bi = GET_BINTERNAL(dbp, h, indx);
if (B_TYPE(bi->type) == B_OVERFLOW &&
(ret = __db_traverse_big(dbp,
((BOVERFLOW *)bi->data)->pgno,
@@ -245,34 +243,34 @@ __bam_traverse(dbc, mode, root_pgno, callback, cookie)
goto err;
if ((ret = __bam_traverse(
dbc, mode, bi->pgno, callback, cookie)) != 0)
- break;
+ goto err;
}
break;
case P_IRECNO:
for (indx = 0; indx < NUM_ENT(h); indx += O_INDX) {
- ri = GET_RINTERNAL(h, indx);
+ ri = GET_RINTERNAL(dbp, h, indx);
if ((ret = __bam_traverse(
dbc, mode, ri->pgno, callback, cookie)) != 0)
- break;
+ goto err;
}
break;
case P_LBTREE:
for (indx = 0; indx < NUM_ENT(h); indx += P_INDX) {
- bk = GET_BKEYDATA(h, indx);
+ bk = GET_BKEYDATA(dbp, h, indx);
if (B_TYPE(bk->type) == B_OVERFLOW &&
(ret = __db_traverse_big(dbp,
- GET_BOVERFLOW(h, indx)->pgno,
+ GET_BOVERFLOW(dbp, h, indx)->pgno,
callback, cookie)) != 0)
goto err;
- bk = GET_BKEYDATA(h, indx + O_INDX);
+ bk = GET_BKEYDATA(dbp, h, indx + O_INDX);
if (B_TYPE(bk->type) == B_DUPLICATE &&
(ret = __bam_traverse(dbc, mode,
- GET_BOVERFLOW(h, indx + O_INDX)->pgno,
+ GET_BOVERFLOW(dbp, h, indx + O_INDX)->pgno,
callback, cookie)) != 0)
goto err;
if (B_TYPE(bk->type) == B_OVERFLOW &&
(ret = __db_traverse_big(dbp,
- GET_BOVERFLOW(h, indx + O_INDX)->pgno,
+ GET_BOVERFLOW(dbp, h, indx + O_INDX)->pgno,
callback, cookie)) != 0)
goto err;
}
@@ -280,22 +278,19 @@ __bam_traverse(dbc, mode, root_pgno, callback, cookie)
case P_LDUP:
case P_LRECNO:
for (indx = 0; indx < NUM_ENT(h); indx += O_INDX) {
- bk = GET_BKEYDATA(h, indx);
+ bk = GET_BKEYDATA(dbp, h, indx);
if (B_TYPE(bk->type) == B_OVERFLOW &&
(ret = __db_traverse_big(dbp,
- GET_BOVERFLOW(h, indx)->pgno,
+ GET_BOVERFLOW(dbp, h, indx)->pgno,
callback, cookie)) != 0)
goto err;
}
break;
}
- already_put = 0;
- if ((ret = callback(dbp, h, cookie, &already_put)) != 0)
- goto err;
+ ret = callback(dbp, h, cookie, &already_put);
-err: if (!already_put &&
- (t_ret = memp_fput(dbp->mpf, h, 0)) != 0 && ret != 0)
+err: if (!already_put && (t_ret = mpf->put(mpf, h, 0)) != 0 && ret != 0)
ret = t_ret;
__LPUT(dbc, lock);
@@ -316,33 +311,34 @@ __bam_stat_callback(dbp, h, cookie, putp)
int *putp;
{
DB_BTREE_STAT *sp;
- db_indx_t indx, top;
+ db_indx_t indx, *inp, top;
u_int8_t type;
sp = cookie;
*putp = 0;
top = NUM_ENT(h);
+ inp = P_INP(dbp, h);
switch (TYPE(h)) {
case P_IBTREE:
case P_IRECNO:
++sp->bt_int_pg;
- sp->bt_int_pgfree += P_FREESPACE(h);
+ sp->bt_int_pgfree += P_FREESPACE(dbp, h);
break;
case P_LBTREE:
/* Correct for on-page duplicates and deleted items. */
for (indx = 0; indx < top; indx += P_INDX) {
if (indx + P_INDX >= top ||
- h->inp[indx] != h->inp[indx + P_INDX])
+ inp[indx] != inp[indx + P_INDX])
++sp->bt_nkeys;
- type = GET_BKEYDATA(h, indx + O_INDX)->type;
+ type = GET_BKEYDATA(dbp, h, indx + O_INDX)->type;
if (!B_DISSET(type) && B_TYPE(type) != B_DUPLICATE)
++sp->bt_ndata;
}
++sp->bt_leaf_pg;
- sp->bt_leaf_pgfree += P_FREESPACE(h);
+ sp->bt_leaf_pgfree += P_FREESPACE(dbp, h);
break;
case P_LRECNO:
/*
@@ -356,39 +352,39 @@ __bam_stat_callback(dbp, h, cookie, putp)
* Correct for deleted items in non-renumbering
* Recno databases.
*/
- if (F_ISSET(dbp, DB_RE_RENUMBER))
+ if (F_ISSET(dbp, DB_AM_RENUMBER))
sp->bt_ndata += top;
else
for (indx = 0; indx < top; indx += O_INDX) {
- type = GET_BKEYDATA(h, indx)->type;
+ type = GET_BKEYDATA(dbp, h, indx)->type;
if (!B_DISSET(type))
++sp->bt_ndata;
}
++sp->bt_leaf_pg;
- sp->bt_leaf_pgfree += P_FREESPACE(h);
+ sp->bt_leaf_pgfree += P_FREESPACE(dbp, h);
} else {
sp->bt_ndata += top;
++sp->bt_dup_pg;
- sp->bt_dup_pgfree += P_FREESPACE(h);
+ sp->bt_dup_pgfree += P_FREESPACE(dbp, h);
}
break;
case P_LDUP:
/* Correct for deleted items. */
for (indx = 0; indx < top; indx += O_INDX)
- if (!B_DISSET(GET_BKEYDATA(h, indx)->type))
+ if (!B_DISSET(GET_BKEYDATA(dbp, h, indx)->type))
++sp->bt_ndata;
++sp->bt_dup_pg;
- sp->bt_dup_pgfree += P_FREESPACE(h);
+ sp->bt_dup_pgfree += P_FREESPACE(dbp, h);
break;
case P_OVERFLOW:
++sp->bt_over_pg;
- sp->bt_over_pgfree += P_OVFLSPACE(dbp->pgsize, h);
+ sp->bt_over_pgfree += P_OVFLSPACE(dbp, dbp->pgsize, h);
break;
default:
- return (__db_pgfmt(dbp, h->pgno));
+ return (__db_pgfmt(dbp->dbenv, h->pgno));
}
return (0);
}
@@ -421,13 +417,18 @@ __bam_key_range(dbp, txn, dbt, kp, flags)
if (flags != 0)
return (__db_ferr(dbp->dbenv, "DB->key_range", 0));
+ /* Check for consistent transaction usage. */
+ if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 1)) != 0)
+ return (ret);
+
/* Acquire a cursor. */
if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0)
return (ret);
DEBUG_LWRITE(dbc, NULL, "bam_key_range", NULL, NULL, 0);
- if ((ret = __bam_search(dbc, dbt, S_STK_ONLY, 1, NULL, &exact)) != 0)
+ if ((ret = __bam_search(dbc, PGNO_INVALID,
+ dbt, S_STK_ONLY, 1, NULL, &exact)) != 0)
goto err;
cp = (BTREE_CURSOR *)dbc->internal;
diff --git a/bdb/btree/bt_upgrade.c b/bdb/btree/bt_upgrade.c
index 4032dba3b36..9f92648d739 100644
--- a/bdb/btree/bt_upgrade.c
+++ b/bdb/btree/bt_upgrade.c
@@ -1,13 +1,13 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_upgrade.c,v 11.19 2000/11/30 00:58:29 ubell Exp $";
+static const char revid[] = "$Id: bt_upgrade.c,v 11.25 2002/08/06 06:11:13 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -18,11 +18,9 @@ static const char revid[] = "$Id: bt_upgrade.c,v 11.19 2000/11/30 00:58:29 ubell
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_swap.h"
-#include "btree.h"
-#include "db_am.h"
-#include "db_upgrade.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_am.h"
+#include "dbinc/db_upgrade.h"
/*
* __bam_30_btreemeta --
@@ -107,7 +105,7 @@ __bam_31_btreemeta(dbp, real_name, flags, fhp, h, dirtyp)
newmeta->minkey = oldmeta->minkey;
newmeta->maxkey = oldmeta->maxkey;
memmove(newmeta->dbmeta.uid,
- oldmeta->dbmeta.uid, sizeof(oldmeta->dbmeta.uid));
+ oldmeta->dbmeta.uid, sizeof(oldmeta->dbmeta.uid));
newmeta->dbmeta.flags = oldmeta->dbmeta.flags;
newmeta->dbmeta.record_count = 0;
newmeta->dbmeta.key_count = 0;
@@ -126,7 +124,7 @@ __bam_31_btreemeta(dbp, real_name, flags, fhp, h, dirtyp)
/*
* __bam_31_lbtree --
- * Upgrade the database btree leaf pages.
+ * Upgrade the database btree leaf pages.
*
* PUBLIC: int __bam_31_lbtree
* PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
@@ -147,15 +145,15 @@ __bam_31_lbtree(dbp, real_name, flags, fhp, h, dirtyp)
ret = 0;
for (indx = O_INDX; indx < NUM_ENT(h); indx += P_INDX) {
- bk = GET_BKEYDATA(h, indx);
+ bk = GET_BKEYDATA(dbp, h, indx);
if (B_TYPE(bk->type) == B_DUPLICATE) {
- pgno = GET_BOVERFLOW(h, indx)->pgno;
+ pgno = GET_BOVERFLOW(dbp, h, indx)->pgno;
if ((ret = __db_31_offdup(dbp, real_name, fhp,
LF_ISSET(DB_DUPSORT) ? 1 : 0, &pgno)) != 0)
break;
- if (pgno != GET_BOVERFLOW(h, indx)->pgno) {
+ if (pgno != GET_BOVERFLOW(dbp, h, indx)->pgno) {
*dirtyp = 1;
- GET_BOVERFLOW(h, indx)->pgno = pgno;
+ GET_BOVERFLOW(dbp, h, indx)->pgno = pgno;
}
}
}
diff --git a/bdb/btree/bt_verify.c b/bdb/btree/bt_verify.c
index 9f8647e7e2a..0cf8a47e476 100644
--- a/bdb/btree/bt_verify.c
+++ b/bdb/btree/bt_verify.c
@@ -1,16 +1,16 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1999, 2000
+ * Copyright (c) 1999-2002
* Sleepycat Software. All rights reserved.
*
- * $Id: bt_verify.c,v 1.44 2000/12/06 19:55:44 ubell Exp $
+ * $Id: bt_verify.c,v 1.76 2002/07/03 19:03:51 bostic Exp $
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_verify.c,v 1.44 2000/12/06 19:55:44 ubell Exp $";
+static const char revid[] = "$Id: bt_verify.c,v 1.76 2002/07/03 19:03:51 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -20,9 +20,9 @@ static const char revid[] = "$Id: bt_verify.c,v 1.44 2000/12/06 19:55:44 ubell E
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_verify.h"
-#include "btree.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_verify.h"
+#include "dbinc/btree.h"
static int __bam_safe_getdata __P((DB *, PAGE *, u_int32_t, int, DBT *, int *));
static int __bam_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
@@ -79,15 +79,15 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
/* avoid division by zero */
ovflsize = meta->minkey > 0 ?
- B_MINKEY_TO_OVFLSIZE(meta->minkey, dbp->pgsize) : 0;
+ B_MINKEY_TO_OVFLSIZE(dbp, meta->minkey, dbp->pgsize) : 0;
if (meta->minkey < 2 ||
- ovflsize > B_MINKEY_TO_OVFLSIZE(DEFMINKEYPAGE, dbp->pgsize)) {
+ ovflsize > B_MINKEY_TO_OVFLSIZE(dbp, DEFMINKEYPAGE, dbp->pgsize)) {
pip->bt_minkey = 0;
isbad = 1;
EPRINT((dbp->dbenv,
- "Nonsensical bt_minkey value %lu on metadata page %lu",
- (u_long)meta->minkey, (u_long)pgno));
+ "Page %lu: nonsensical bt_minkey value %lu on metadata page",
+ (u_long)pgno, (u_long)meta->minkey));
} else
pip->bt_minkey = meta->minkey;
@@ -103,13 +103,13 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
* of the file, then the root page had better be page 1.
*/
pip->root = 0;
- if (meta->root == PGNO_INVALID
- || meta->root == pgno || !IS_VALID_PGNO(meta->root) ||
+ if (meta->root == PGNO_INVALID ||
+ meta->root == pgno || !IS_VALID_PGNO(meta->root) ||
(pgno == PGNO_BASE_MD && meta->root != 1)) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Nonsensical root page %lu on metadata page %lu",
- (u_long)meta->root, (u_long)vdp->last_pgno));
+ "Page %lu: nonsensical root page %lu on metadata page",
+ (u_long)pgno, (u_long)meta->root));
} else
pip->root = meta->root;
@@ -125,7 +125,7 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
if (F_ISSET(&meta->dbmeta, BTM_DUP) && pgno == PGNO_BASE_MD) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Btree metadata page %lu has both duplicates and multiple databases",
+"Page %lu: Btree metadata page has both duplicates and multiple databases",
(u_long)pgno));
}
F_SET(pip, VRFY_HAS_SUBDBS);
@@ -139,7 +139,7 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
F_SET(pip, VRFY_HAS_RECNUMS);
if (F_ISSET(pip, VRFY_HAS_RECNUMS) && F_ISSET(pip, VRFY_HAS_DUPS)) {
EPRINT((dbp->dbenv,
- "Btree metadata page %lu illegally has both recnums and dups",
+ "Page %lu: Btree metadata page illegally has both recnums and dups",
(u_long)pgno));
isbad = 1;
}
@@ -150,13 +150,13 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
} else if (F_ISSET(pip, VRFY_IS_RRECNO)) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Metadata page %lu has renumber flag set but is not recno",
+ "Page %lu: metadata page has renumber flag set but is not recno",
(u_long)pgno));
}
if (F_ISSET(pip, VRFY_IS_RECNO) && F_ISSET(pip, VRFY_HAS_DUPS)) {
EPRINT((dbp->dbenv,
- "Recno metadata page %lu specifies duplicates",
+ "Page %lu: recno metadata page specifies duplicates",
(u_long)pgno));
isbad = 1;
}
@@ -170,8 +170,8 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
*/
isbad = 1;
EPRINT((dbp->dbenv,
- "re_len of %lu in non-fixed-length database",
- (u_long)pip->re_len));
+ "Page %lu: re_len of %lu in non-fixed-length database",
+ (u_long)pgno, (u_long)pip->re_len));
}
/*
@@ -179,7 +179,8 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
* not be and may still be correct.
*/
-err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+err: if ((t_ret =
+ __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
}
@@ -242,7 +243,7 @@ __ram_vrfy_leaf(dbp, vdp, h, pgno, flags)
if (F_ISSET(pip, VRFY_HAS_DUPS)) {
EPRINT((dbp->dbenv,
- "Recno database has dups on page %lu", (u_long)pgno));
+ "Page %lu: Recno database has dups", (u_long)pgno));
ret = DB_VERIFY_BAD;
goto err;
}
@@ -255,7 +256,7 @@ __ram_vrfy_leaf(dbp, vdp, h, pgno, flags)
*/
re_len_guess = 0;
for (i = 0; i < NUM_ENT(h); i++) {
- bk = GET_BKEYDATA(h, i);
+ bk = GET_BKEYDATA(dbp, h, i);
/* KEYEMPTY. Go on. */
if (B_DISSET(bk->type))
continue;
@@ -266,8 +267,8 @@ __ram_vrfy_leaf(dbp, vdp, h, pgno, flags)
else {
isbad = 1;
EPRINT((dbp->dbenv,
- "Nonsensical type for item %lu, page %lu",
- (u_long)i, (u_long)pgno));
+ "Page %lu: nonsensical type for item %lu",
+ (u_long)pgno, (u_long)i));
continue;
}
if (re_len_guess == 0)
@@ -288,9 +289,10 @@ __ram_vrfy_leaf(dbp, vdp, h, pgno, flags)
/* Save off record count. */
pip->rec_cnt = NUM_ENT(h);
-err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+err: if ((t_ret =
+ __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
- return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : 0);
+ return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
}
/*
@@ -362,7 +364,7 @@ __bam_vrfy(dbp, vdp, h, pgno, flags)
else
goto err;
EPRINT((dbp->dbenv,
- "item order check on page %lu unsafe: skipping",
+ "Page %lu: item order check unsafe: skipping",
(u_long)pgno));
} else if (!LF_ISSET(DB_NOORDERCHK) && (ret =
__bam_vrfy_itemorder(dbp, vdp, h, pgno, 0, 0, 0, flags)) != 0) {
@@ -377,9 +379,10 @@ __bam_vrfy(dbp, vdp, h, pgno, flags)
goto err;
}
-err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+err: if ((t_ret =
+ __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
- return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : 0);
+ return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
}
/*
@@ -403,6 +406,7 @@ __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
VRFY_PAGEINFO *pip;
int ret, t_ret, isbad;
u_int32_t himark, i, offset, nentries;
+ db_indx_t *inp;
u_int8_t *pagelayout, *p;
isbad = 0;
@@ -422,30 +426,31 @@ __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
himark = dbp->pgsize;
if ((ret =
- __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &pagelayout)) != 0)
+ __os_malloc(dbp->dbenv, dbp->pgsize, &pagelayout)) != 0)
goto err;
memset(pagelayout, 0, dbp->pgsize);
+ inp = P_INP(dbp, h);
for (i = 0; i < NUM_ENT(h); i++) {
- if ((u_int8_t *)h->inp + i >= (u_int8_t *)h + himark) {
+ if ((u_int8_t *)inp + i >= (u_int8_t *)h + himark) {
EPRINT((dbp->dbenv,
- "Page %lu entries listing %lu overlaps data",
+ "Page %lu: entries listing %lu overlaps data",
(u_long)pgno, (u_long)i));
ret = DB_VERIFY_BAD;
goto err;
}
- offset = h->inp[i];
+ offset = inp[i];
/*
* Check that the item offset is reasonable: it points
* somewhere after the inp array and before the end of the
* page.
*/
- if (offset <= (u_int32_t)((u_int8_t *)h->inp + i -
+ if (offset <= (u_int32_t)((u_int8_t *)inp + i -
(u_int8_t *)h) ||
offset > (u_int32_t)(dbp->pgsize - RINTERNAL_SIZE)) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Bad offset %lu at page %lu index %lu",
- (u_long)offset, (u_long)pgno, (u_long)i));
+ "Page %lu: bad offset %lu at index %lu",
+ (u_long)pgno, (u_long)offset, (u_long)i));
continue;
}
@@ -456,7 +461,7 @@ __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
nentries++;
/* Make sure this RINTERNAL is not multiply referenced. */
- ri = GET_RINTERNAL(h, i);
+ ri = GET_RINTERNAL(dbp, h, i);
if (pagelayout[offset] == 0) {
pagelayout[offset] = 1;
child.pgno = ri->pgno;
@@ -466,8 +471,8 @@ __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
goto err;
} else {
EPRINT((dbp->dbenv,
- "RINTERNAL structure at offset %lu, page %lu referenced twice",
- (u_long)offset, (u_long)pgno));
+ "Page %lu: RINTERNAL structure at offset %lu referenced twice",
+ (u_long)pgno, (u_long)offset));
isbad = 1;
}
}
@@ -477,23 +482,25 @@ __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
p += RINTERNAL_SIZE)
if (*p != 1) {
EPRINT((dbp->dbenv,
- "Gap between items at offset %lu, page %lu",
- (u_long)(p - pagelayout), (u_long)pgno));
+ "Page %lu: gap between items at offset %lu",
+ (u_long)pgno, (u_long)(p - pagelayout)));
isbad = 1;
}
if ((db_indx_t)himark != HOFFSET(h)) {
- EPRINT((dbp->dbenv, "Bad HOFFSET %lu, appears to be %lu",
- (u_long)(HOFFSET(h)), (u_long)himark));
+ EPRINT((dbp->dbenv,
+ "Page %lu: bad HOFFSET %lu, appears to be %lu",
+ (u_long)pgno, (u_long)(HOFFSET(h)), (u_long)himark));
isbad = 1;
}
*nentriesp = nentries;
-err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+err: if ((t_ret =
+ __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
if (pagelayout != NULL)
- __os_free(pagelayout, dbp->pgsize);
+ __os_free(dbp->dbenv, pagelayout);
return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
}
@@ -558,22 +565,24 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
* it and the region immediately after it.
*/
himark = dbp->pgsize;
- if ((ret = __os_malloc(dbp->dbenv,
- dbp->pgsize, NULL, &pagelayout)) != 0)
+ if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &pagelayout)) != 0)
goto err;
memset(pagelayout, 0, dbp->pgsize);
for (i = 0; i < NUM_ENT(h); i++) {
-
- ret = __db_vrfy_inpitem(dbp,
- h, pgno, i, 1, flags, &himark, &offset);
- if (ret == DB_VERIFY_BAD) {
+ switch (ret = __db_vrfy_inpitem(dbp,
+ h, pgno, i, 1, flags, &himark, &offset)) {
+ case 0:
+ break;
+ case DB_VERIFY_BAD:
isbad = 1;
continue;
- } else if (ret == DB_VERIFY_FATAL) {
+ case DB_VERIFY_FATAL:
isbad = 1;
goto err;
- } else if (ret != 0)
- DB_ASSERT(0);
+ default:
+ DB_ASSERT(ret != 0);
+ break;
+ }
/*
* We now have a plausible beginning for the item, and we know
@@ -582,7 +591,7 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
* Mark the beginning and end in pagelayout so we can make sure
* items have no overlaps or gaps.
*/
- bk = GET_BKEYDATA(h, i);
+ bk = GET_BKEYDATA(dbp, h, i);
#define ITEM_BEGIN 1
#define ITEM_END 2
if (pagelayout[offset] == 0)
@@ -609,8 +618,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
} else {
isbad = 1;
EPRINT((dbp->dbenv,
- "Duplicated item %lu on page %lu",
- (u_long)i, (u_long)pgno));
+ "Page %lu: duplicated item %lu",
+ (u_long)pgno, (u_long)i));
}
}
@@ -662,8 +671,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
*/
if (isdupitem && pagelayout[endoff] != ITEM_END) {
EPRINT((dbp->dbenv,
- "Duplicated item %lu on page %lu",
- (u_long)i, (u_long)pgno));
+ "Page %lu: duplicated item %lu",
+ (u_long)pgno, (u_long)i));
isbad = 1;
} else if (pagelayout[endoff] == 0)
pagelayout[endoff] = ITEM_END;
@@ -676,8 +685,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
if (B_DISSET(bk->type) && TYPE(h) != P_LRECNO) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Item %lu on page %lu marked deleted",
- (u_long)i, (u_long)pgno));
+ "Page %lu: item %lu marked deleted",
+ (u_long)pgno, (u_long)i));
}
/*
@@ -696,13 +705,13 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
if (TYPE(h) == P_IBTREE) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Duplicate page referenced by internal btree page %lu at item %lu",
+ "Page %lu: duplicate page referenced by internal btree page at item %lu",
(u_long)pgno, (u_long)i));
break;
} else if (TYPE(h) == P_LRECNO) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Duplicate page referenced by recno page %lu at item %lu",
+ "Page %lu: duplicate page referenced by recno page at item %lu",
(u_long)pgno, (u_long)i));
break;
}
@@ -717,9 +726,9 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
if (bo->tlen > dbp->pgsize * vdp->last_pgno) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Impossible tlen %lu, item %lu, page %lu",
- (u_long)bo->tlen, (u_long)i,
- (u_long)pgno));
+ "Page %lu: impossible tlen %lu, item %lu",
+ (u_long)pgno,
+ (u_long)bo->tlen, (u_long)i));
/* Don't save as a child. */
break;
}
@@ -728,8 +737,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
bo->pgno == PGNO_INVALID) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Offpage item %lu, page %lu has bad pgno",
- (u_long)i, (u_long)pgno));
+ "Page %lu: offpage item %lu has bad pgno %lu",
+ (u_long)pgno, (u_long)i, (u_long)bo->pgno));
/* Don't save as a child. */
break;
}
@@ -744,8 +753,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
default:
isbad = 1;
EPRINT((dbp->dbenv,
- "Item %lu on page %lu of invalid type %lu",
- (u_long)i, (u_long)pgno));
+ "Page %lu: item %lu of invalid type %lu",
+ (u_long)pgno, (u_long)i));
break;
}
}
@@ -765,7 +774,7 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
isbad = 1;
EPRINT((dbp->dbenv,
- "Gap between items, page %lu offset %lu",
+ "Page %lu: gap between items at offset %lu",
(u_long)pgno, (u_long)i));
/* Find the end of the gap */
for ( ; pagelayout[i + 1] == 0 &&
@@ -777,8 +786,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
if (i != ALIGN(i, sizeof(u_int32_t))) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Offset %lu page %lu unaligned",
- (u_long)i, (u_long)pgno));
+ "Page %lu: offset %lu unaligned",
+ (u_long)pgno, (u_long)i));
}
initem = 1;
nentries++;
@@ -791,7 +800,7 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
*/
isbad = 1;
EPRINT((dbp->dbenv,
- "Overlapping items, page %lu offset %lu",
+ "Page %lu: overlapping items at offset %lu",
(u_long)pgno, (u_long)i));
break;
default:
@@ -816,24 +825,26 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
*/
isbad = 1;
EPRINT((dbp->dbenv,
- "Overlapping items, page %lu offset %lu",
+ "Page %lu: overlapping items at offset %lu",
(u_long)pgno, (u_long)i));
break;
}
- (void)__os_free(pagelayout, dbp->pgsize);
+ (void)__os_free(dbp->dbenv, pagelayout);
/* Verify HOFFSET. */
if ((db_indx_t)himark != HOFFSET(h)) {
- EPRINT((dbp->dbenv, "Bad HOFFSET %lu, appears to be %lu",
- (u_long)HOFFSET(h), (u_long)himark));
+ EPRINT((dbp->dbenv,
+ "Page %lu: bad HOFFSET %lu, appears to be %lu",
+ (u_long)pgno, (u_long)HOFFSET(h), (u_long)himark));
isbad = 1;
}
err: if (nentriesp != NULL)
*nentriesp = nentries;
- if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+ if ((t_ret =
+ __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
@@ -865,14 +876,14 @@ __bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags)
int ovflok, hasdups;
u_int32_t flags;
{
- DBT dbta, dbtb, dup1, dup2, *p1, *p2, *tmp;
+ DBT dbta, dbtb, dup_1, dup_2, *p1, *p2, *tmp;
BTREE *bt;
BINTERNAL *bi;
BKEYDATA *bk;
BOVERFLOW *bo;
VRFY_PAGEINFO *pip;
db_indx_t i;
- int cmp, freedup1, freedup2, isbad, ret, t_ret;
+ int cmp, freedup_1, freedup_2, isbad, ret, t_ret;
int (*dupfunc) __P((DB *, const DBT *, const DBT *));
int (*func) __P((DB *, const DBT *, const DBT *));
void *buf1, *buf2, *tmpbuf;
@@ -949,7 +960,7 @@ __bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags)
*/
switch (TYPE(h)) {
case P_IBTREE:
- bi = GET_BINTERNAL(h, i);
+ bi = GET_BINTERNAL(dbp, h, i);
if (B_TYPE(bi->type) == B_OVERFLOW) {
bo = (BOVERFLOW *)(bi->data);
goto overflow;
@@ -972,14 +983,14 @@ __bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags)
if (i == 0 && bi->len != 0) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Lowest key on internal page %lu of nonzero length",
+ "Page %lu: lowest key on internal page of nonzero length",
(u_long)pgno));
}
#endif
break;
case P_LBTREE:
case P_LDUP:
- bk = GET_BKEYDATA(h, i);
+ bk = GET_BKEYDATA(dbp, h, i);
if (B_TYPE(bk->type) == B_OVERFLOW) {
bo = (BOVERFLOW *)bk;
goto overflow;
@@ -1030,8 +1041,8 @@ overflow: if (!ovflok) {
p2, bo->tlen, bo->pgno, NULL, NULL)) != 0) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Error %lu in fetching overflow item %lu, page %lu",
- (u_long)ret, (u_long)i, (u_long)pgno));
+ "Page %lu: error %lu in fetching overflow item %lu",
+ (u_long)pgno, (u_long)ret, (u_long)i));
}
/* In case it got realloc'ed and thus changed. */
buf2 = p2->data;
@@ -1045,7 +1056,7 @@ overflow: if (!ovflok) {
if (cmp > 0) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Out-of-order key, page %lu item %lu",
+ "Page %lu: out-of-order key at entry %lu",
(u_long)pgno, (u_long)i));
/* proceed */
} else if (cmp == 0) {
@@ -1060,7 +1071,7 @@ overflow: if (!ovflok) {
else if (hasdups == 0) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Database with no duplicates has duplicated keys on page %lu",
+ "Page %lu: database with no duplicates has duplicated keys",
(u_long)pgno));
}
@@ -1092,11 +1103,11 @@ overflow: if (!ovflok) {
* dups are probably (?) rare.
*/
if (((ret = __bam_safe_getdata(dbp,
- h, i - 1, ovflok, &dup1,
- &freedup1)) != 0) ||
+ h, i - 1, ovflok, &dup_1,
+ &freedup_1)) != 0) ||
((ret = __bam_safe_getdata(dbp,
- h, i + 1, ovflok, &dup2,
- &freedup2)) != 0))
+ h, i + 1, ovflok, &dup_2,
+ &freedup_2)) != 0))
goto err;
/*
@@ -1105,8 +1116,8 @@ overflow: if (!ovflok) {
* it's not safe to chase them now.
* Mark an incomplete and return.
*/
- if (dup1.data == NULL ||
- dup2.data == NULL) {
+ if (dup_1.data == NULL ||
+ dup_2.data == NULL) {
DB_ASSERT(!ovflok);
F_SET(pip, VRFY_INCOMPLETE);
goto err;
@@ -1118,26 +1129,28 @@ overflow: if (!ovflok) {
* until we do the structure check
* and see whether DUPSORT is set.
*/
- if (dupfunc(dbp, &dup1, &dup2) > 0)
+ if (dupfunc(dbp, &dup_1, &dup_2) > 0)
F_SET(pip, VRFY_DUPS_UNSORTED);
- if (freedup1)
- __os_free(dup1.data, 0);
- if (freedup2)
- __os_free(dup2.data, 0);
+ if (freedup_1)
+ __os_ufree(dbp->dbenv,
+ dup_1.data);
+ if (freedup_2)
+ __os_ufree(dbp->dbenv,
+ dup_2.data);
}
}
}
}
-err: if (pip != NULL &&
- ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0) && ret == 0)
+err: if (pip != NULL && ((t_ret =
+ __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0) && ret == 0)
ret = t_ret;
if (buf1 != NULL)
- __os_free(buf1, 0);
+ __os_ufree(dbp->dbenv, buf1);
if (buf2 != NULL)
- __os_free(buf2, 0);
+ __os_ufree(dbp->dbenv, buf2);
return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
}
@@ -1173,7 +1186,7 @@ __bam_vrfy_structure(dbp, vdp, meta_pgno, flags)
goto err;
if (p != 0) {
EPRINT((dbp->dbenv,
- "Btree metadata page number %lu observed twice",
+ "Page %lu: btree metadata page observed twice",
(u_long)meta_pgno));
ret = DB_VERIFY_BAD;
goto err;
@@ -1185,7 +1198,8 @@ __bam_vrfy_structure(dbp, vdp, meta_pgno, flags)
if (root == 0) {
EPRINT((dbp->dbenv,
- "Btree metadata page %lu has no root", (u_long)meta_pgno));
+ "Page %lu: btree metadata page has no root",
+ (u_long)meta_pgno));
ret = DB_VERIFY_BAD;
goto err;
}
@@ -1222,7 +1236,7 @@ __bam_vrfy_structure(dbp, vdp, meta_pgno, flags)
*/
if (mip->re_len > 0 && relen > 0 && mip->re_len != relen) {
EPRINT((dbp->dbenv,
- "Recno database with meta page %lu has bad re_len %lu",
+ "Page %lu: recno database has bad re_len %lu",
(u_long)meta_pgno, (u_long)relen));
ret = DB_VERIFY_BAD;
goto err;
@@ -1231,24 +1245,24 @@ __bam_vrfy_structure(dbp, vdp, meta_pgno, flags)
break;
case P_LDUP:
EPRINT((dbp->dbenv,
- "Duplicate tree referenced from metadata page %lu",
+ "Page %lu: duplicate tree referenced from metadata page",
(u_long)meta_pgno));
ret = DB_VERIFY_BAD;
break;
default:
EPRINT((dbp->dbenv,
- "Btree root of incorrect type %lu on meta page %lu",
- (u_long)rip->type, (u_long)meta_pgno));
+ "Page %lu: btree root of incorrect type %lu on metadata page",
+ (u_long)meta_pgno, (u_long)rip->type));
ret = DB_VERIFY_BAD;
break;
}
-err: if (mip != NULL &&
- ((t_ret = __db_vrfy_putpageinfo(vdp, mip)) != 0) && ret == 0)
- t_ret = ret;
- if (rip != NULL &&
- ((t_ret = __db_vrfy_putpageinfo(vdp, rip)) != 0) && ret == 0)
- t_ret = ret;
+err: if (mip != NULL && ((t_ret =
+ __db_vrfy_putpageinfo(dbp->dbenv, vdp, mip)) != 0) && ret == 0)
+ ret = t_ret;
+ if (rip != NULL && ((t_ret =
+ __db_vrfy_putpageinfo(dbp->dbenv, vdp, rip)) != 0) && ret == 0)
+ ret = t_ret;
return (ret);
}
@@ -1273,20 +1287,26 @@ __bam_vrfy_subtree(dbp,
{
BINTERNAL *li, *ri, *lp, *rp;
DB *pgset;
+ DB_MPOOLFILE *mpf;
DBC *cc;
PAGE *h;
VRFY_CHILDINFO *child;
VRFY_PAGEINFO *pip;
- db_recno_t nrecs, child_nrecs;
db_indx_t i;
- int ret, t_ret, isbad, toplevel, p;
+ db_pgno_t next_pgno, prev_pgno;
+ db_recno_t child_nrecs, nrecs;
+ u_int32_t child_level, child_relen, level, relen, stflags;
+ u_int8_t leaf_type;
int (*func) __P((DB *, const DBT *, const DBT *));
- u_int32_t level, child_level, stflags, child_relen, relen;
+ int isbad, p, ret, t_ret, toplevel;
+ mpf = dbp->mpf;
ret = isbad = 0;
nrecs = 0;
h = NULL;
relen = 0;
+ leaf_type = P_INVALID;
+ next_pgno = prev_pgno = PGNO_INVALID;
rp = (BINTERNAL *)r;
lp = (BINTERNAL *)l;
@@ -1300,10 +1320,33 @@ __bam_vrfy_subtree(dbp,
cc = NULL;
level = pip->bt_level;
- toplevel = LF_ISSET(ST_TOPLEVEL);
+ toplevel = LF_ISSET(ST_TOPLEVEL) ? 1 : 0;
LF_CLR(ST_TOPLEVEL);
/*
+ * If this is the root, initialize the vdp's prev- and next-pgno
+ * accounting.
+ *
+ * For each leaf page we hit, we'll want to make sure that
+ * vdp->prev_pgno is the same as pip->prev_pgno and vdp->next_pgno is
+ * our page number. Then, we'll set vdp->next_pgno to pip->next_pgno
+ * and vdp->prev_pgno to our page number, and the next leaf page in
+ * line should be able to do the same verification.
+ */
+ if (toplevel) {
+ /*
+ * Cache the values stored in the vdp so that if we're an
+ * auxiliary tree such as an off-page duplicate set, our
+ * caller's leaf page chain doesn't get lost.
+ */
+ prev_pgno = vdp->prev_pgno;
+ next_pgno = vdp->next_pgno;
+ leaf_type = vdp->leaf_type;
+ vdp->next_pgno = vdp->prev_pgno = PGNO_INVALID;
+ vdp->leaf_type = P_INVALID;
+ }
+
+ /*
* We are recursively descending a btree, starting from the root
* and working our way out to the leaves.
*
@@ -1333,8 +1376,53 @@ __bam_vrfy_subtree(dbp,
case P_LDUP:
case P_LBTREE:
/*
- * Cases 1, 2 and 3 (overflow pages are common to all three);
- * traverse child list, looking for overflows.
+ * Cases 1, 2 and 3.
+ *
+ * We're some sort of leaf page; verify
+ * that our linked list of leaves is consistent.
+ */
+ if (vdp->leaf_type == P_INVALID) {
+ /*
+ * First leaf page. Set the type that all its
+ * successors should be, and verify that our prev_pgno
+ * is PGNO_INVALID.
+ */
+ vdp->leaf_type = pip->type;
+ if (pip->prev_pgno != PGNO_INVALID)
+ goto bad_prev;
+ } else {
+ /*
+ * Successor leaf page. Check our type, the previous
+ * page's next_pgno, and our prev_pgno.
+ */
+ if (pip->type != vdp->leaf_type) {
+ EPRINT((dbp->dbenv,
+ "Page %lu: unexpected page type %lu found in leaf chain (expected %lu)",
+ (u_long)pip->pgno, (u_long)pip->type,
+ (u_long)vdp->leaf_type));
+ isbad = 1;
+ }
+ if (pip->pgno != vdp->next_pgno) {
+ EPRINT((dbp->dbenv,
+ "Page %lu: incorrect next_pgno %lu found in leaf chain (should be %lu)",
+ (u_long)vdp->prev_pgno,
+ (u_long)vdp->next_pgno, (u_long)pip->pgno));
+ isbad = 1;
+ }
+ if (pip->prev_pgno != vdp->prev_pgno) {
+bad_prev: EPRINT((dbp->dbenv,
+ "Page %lu: incorrect prev_pgno %lu found in leaf chain (should be %lu)",
+ (u_long)pip->pgno, (u_long)pip->prev_pgno,
+ (u_long)vdp->prev_pgno));
+ isbad = 1;
+ }
+ }
+ vdp->prev_pgno = pip->pgno;
+ vdp->next_pgno = pip->next_pgno;
+
+ /*
+ * Overflow pages are common to all three leaf types;
+ * traverse the child list, looking for overflows.
*/
if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0)
goto err;
@@ -1360,7 +1448,7 @@ __bam_vrfy_subtree(dbp,
!(LF_ISSET(ST_DUPOK) && !LF_ISSET(ST_DUPSORT))) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Recno leaf page %lu in non-recno tree",
+ "Page %lu: recno leaf page non-recno tree",
(u_long)pgno));
goto done;
}
@@ -1372,7 +1460,7 @@ __bam_vrfy_subtree(dbp,
*/
isbad = 1;
EPRINT((dbp->dbenv,
- "Non-recno leaf page %lu in recno tree",
+ "Page %lu: non-recno leaf page in recno tree",
(u_long)pgno));
goto done;
}
@@ -1389,7 +1477,7 @@ __bam_vrfy_subtree(dbp,
if (!LF_ISSET(ST_DUPOK)) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Duplicates on page %lu in non-dup btree",
+ "Page %lu: duplicates in non-dup btree",
(u_long)pgno));
} else {
/*
@@ -1414,8 +1502,8 @@ __bam_vrfy_subtree(dbp,
}
if ((ret = __bam_vrfy_subtree(
dbp, vdp, child->pgno, NULL,
- NULL, stflags, NULL, NULL,
- NULL)) != 0) {
+ NULL, stflags | ST_TOPLEVEL,
+ NULL, NULL, NULL)) != 0) {
if (ret !=
DB_VERIFY_BAD)
goto err;
@@ -1436,14 +1524,13 @@ __bam_vrfy_subtree(dbp,
if (F_ISSET(pip, VRFY_DUPS_UNSORTED) &&
LF_ISSET(ST_DUPSORT)) {
EPRINT((dbp->dbenv,
- "Unsorted duplicate set at page %lu in sorted-dup database",
+ "Page %lu: unsorted duplicate set in sorted-dup database",
(u_long)pgno));
isbad = 1;
}
}
}
goto leaf;
- break;
case P_IBTREE:
case P_IRECNO:
/* We handle these below. */
@@ -1455,10 +1542,18 @@ __bam_vrfy_subtree(dbp,
* Note that the code at the "done" label assumes that the
* current page is a btree/recno one of some sort; this
* is not the case here, so we goto err.
+ *
+ * If the page is entirely zeroed, its pip->type will be a lie
+ * (we assumed it was a hash page, as they're allowed to be
+ * zeroed); handle this case specially.
*/
- EPRINT((dbp->dbenv,
- "Page %lu is of inappropriate type %lu",
- (u_long)pgno, (u_long)pip->type));
+ if (F_ISSET(pip, VRFY_IS_ALLZEROES))
+ ZEROPG_ERR_PRINT(dbp->dbenv,
+ pgno, "btree or recno page");
+ else
+ EPRINT((dbp->dbenv,
+ "Page %lu: btree or recno page is of inappropriate type %lu",
+ (u_long)pgno, (u_long)pip->type));
ret = DB_VERIFY_BAD;
goto err;
}
@@ -1500,8 +1595,9 @@ __bam_vrfy_subtree(dbp,
relen != child_relen) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Recno page %lu returned bad re_len",
- (u_long)child->pgno));
+ "Page %lu: recno page returned bad re_len %lu",
+ (u_long)child->pgno,
+ (u_long)child_relen));
}
if (relenp)
*relenp = relen;
@@ -1510,10 +1606,8 @@ __bam_vrfy_subtree(dbp,
nrecs += child_nrecs;
if (level != child_level + 1) {
isbad = 1;
- EPRINT((dbp->dbenv, "%s%lu%s%lu%s%lu",
- "Recno level incorrect on page ",
- (u_long)child->pgno, ": got ",
- (u_long)child_level, ", expected ",
+ EPRINT((dbp->dbenv, "Page %lu: recno level incorrect: got %lu, expected %lu",
+ (u_long)child->pgno, (u_long)child_level,
(u_long)(level - 1)));
}
} else if (child->type == V_OVERFLOW &&
@@ -1543,12 +1637,12 @@ __bam_vrfy_subtree(dbp,
* itself, which must sort lower than all entries on its child;
* ri will be the key to its right, which must sort greater.
*/
- if (h == NULL && (ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
goto err;
for (i = 0; i < pip->entries; i += O_INDX) {
- li = GET_BINTERNAL(h, i);
+ li = GET_BINTERNAL(dbp, h, i);
ri = (i + O_INDX < pip->entries) ?
- GET_BINTERNAL(h, i + O_INDX) : NULL;
+ GET_BINTERNAL(dbp, h, i + O_INDX) : NULL;
/*
* The leftmost key is forcibly sorted less than all entries,
@@ -1578,18 +1672,18 @@ __bam_vrfy_subtree(dbp,
if (li->nrecs != child_nrecs) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Item %lu page %lu has incorrect record count of %lu, should be %lu",
- (u_long)i, (u_long)pgno, (u_long)li->nrecs,
+ "Page %lu: item %lu has incorrect record count of %lu, should be %lu",
+ (u_long)pgno, (u_long)i, (u_long)li->nrecs,
(u_long)child_nrecs));
}
}
if (level != child_level + 1) {
isbad = 1;
- EPRINT((dbp->dbenv, "%s%lu%s%lu%s%lu",
- "Btree level incorrect on page ", (u_long)li->pgno,
- ": got ", (u_long)child_level, ", expected ",
- (u_long)(level - 1)));
+ EPRINT((dbp->dbenv,
+ "Page %lu: Btree level incorrect: got %lu, expected %lu",
+ (u_long)li->pgno,
+ (u_long)child_level, (u_long)(level - 1)));
}
}
@@ -1616,7 +1710,7 @@ done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) {
* isbad == 0, though, it's now safe to do so, as we've
* traversed any child overflow pages. Do it.
*/
- if (h == NULL && (ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
goto err;
if ((ret = __bam_vrfy_itemorder(dbp,
vdp, h, pgno, 0, 1, 0, flags)) != 0)
@@ -1625,12 +1719,35 @@ done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) {
}
/*
+ * It's possible to get to this point with a page that has no
+ * items, but without having detected any sort of failure yet.
+ * Having zero items is legal if it's a leaf--it may be the
+ * root page in an empty tree, or the tree may have been
+ * modified with the DB_REVSPLITOFF flag set (there's no way
+ * to tell from what's on disk). For an internal page,
+ * though, having no items is a problem (all internal pages
+ * must have children).
+ */
+ if (isbad == 0 && ret == 0) {
+ if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
+ goto err;
+
+ if (NUM_ENT(h) == 0 && ISINTERNAL(h)) {
+ EPRINT((dbp->dbenv,
+ "Page %lu: internal page is empty and should not be",
+ (u_long)pgno));
+ isbad = 1;
+ goto err;
+ }
+ }
+
+ /*
* Our parent has sent us BINTERNAL pointers to parent records
* so that we can verify our place with respect to them. If it's
* appropriate--we have a default sort function--verify this.
*/
if (isbad == 0 && ret == 0 && !LF_ISSET(DB_NOORDERCHK) && lp != NULL) {
- if (h == NULL && (ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
goto err;
/*
@@ -1662,7 +1779,7 @@ done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) {
if (LF_ISSET(ST_RECNUM) && nrecs != pip->rec_cnt && toplevel) {
isbad = 1;
EPRINT((dbp->dbenv,
- "Bad record count on page %lu: got %lu, expected %lu",
+ "Page %lu: bad record count: has %lu records, claims %lu",
(u_long)pgno, (u_long)nrecs, (u_long)pip->rec_cnt));
}
@@ -1676,13 +1793,32 @@ done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) {
goto err;
if (p != 0) {
isbad = 1;
- EPRINT((dbp->dbenv, "Page %lu linked twice", (u_long)pgno));
+ EPRINT((dbp->dbenv, "Page %lu: linked twice", (u_long)pgno));
} else if ((ret = __db_vrfy_pgset_inc(pgset, pgno)) != 0)
goto err;
-err: if (h != NULL && (t_ret = memp_fput(dbp->mpf, h, 0)) != 0 && ret == 0)
+ if (toplevel)
+ /*
+ * The last page's next_pgno in the leaf chain should have been
+ * PGNO_INVALID.
+ */
+ if (vdp->next_pgno != PGNO_INVALID) {
+ EPRINT((dbp->dbenv, "Page %lu: unterminated leaf chain",
+ (u_long)vdp->prev_pgno));
+ isbad = 1;
+ }
+
+err: if (toplevel) {
+ /* Restore our caller's settings. */
+ vdp->next_pgno = next_pgno;
+ vdp->prev_pgno = prev_pgno;
+ vdp->leaf_type = leaf_type;
+ }
+
+ if (h != NULL && (t_ret = mpf->put(mpf, h, 0)) != 0 && ret == 0)
ret = t_ret;
- if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+ if ((t_ret =
+ __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0)
ret = t_ret;
@@ -1720,6 +1856,14 @@ __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags)
F_SET(&dbt, DB_DBT_MALLOC);
ret = 0;
+ /*
+ * Empty pages are sorted correctly by definition. We check
+ * to see whether they ought to be empty elsewhere; leaf
+ * pages legally may be.
+ */
+ if (NUM_ENT(h) == 0)
+ return (0);
+
switch (TYPE(h)) {
case P_IBTREE:
case P_LDUP:
@@ -1760,7 +1904,8 @@ __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags)
} else {
DB_ASSERT(0);
EPRINT((dbp->dbenv,
- "Unknown type for internal record"));
+ "Page %lu: unknown type for internal record",
+ (u_long)PGNO(h)));
return (EINVAL);
}
@@ -1768,17 +1913,17 @@ __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags)
if ((ret = __bam_cmp(dbp, &dbt, h, 0, func, &cmp)) == 0) {
if (cmp > 0) {
EPRINT((dbp->dbenv,
- "First item on page %lu sorted greater than parent entry",
+ "Page %lu: first item on page sorted greater than parent entry",
(u_long)PGNO(h)));
ret = DB_VERIFY_BAD;
}
} else
EPRINT((dbp->dbenv,
- "First item on page %lu had comparison error",
+ "Page %lu: first item on page had comparison error",
(u_long)PGNO(h)));
if (dbt.data != lp->data)
- __os_free(dbt.data, 0);
+ __os_ufree(dbp->dbenv, dbt.data);
if (ret != 0)
return (ret);
}
@@ -1795,7 +1940,8 @@ __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags)
} else {
DB_ASSERT(0);
EPRINT((dbp->dbenv,
- "Unknown type for internal record"));
+ "Page %lu: unknown type for internal record",
+ (u_long)PGNO(h)));
return (EINVAL);
}
@@ -1803,17 +1949,17 @@ __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags)
if ((ret = __bam_cmp(dbp, &dbt, h, last, func, &cmp)) == 0) {
if (cmp < 0) {
EPRINT((dbp->dbenv,
- "Last item on page %lu sorted greater than parent entry",
+ "Page %lu: last item on page sorted greater than parent entry",
(u_long)PGNO(h)));
ret = DB_VERIFY_BAD;
}
} else
EPRINT((dbp->dbenv,
- "Last item on page %lu had comparison error",
+ "Page %lu: last item on page had comparison error",
(u_long)PGNO(h)));
if (dbt.data != rp->data)
- __os_free(dbt.data, 0);
+ __os_ufree(dbp->dbenv, dbt.data);
}
return (ret);
@@ -1843,7 +1989,7 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
DBT dbt, unkdbt;
BKEYDATA *bk;
BOVERFLOW *bo;
- db_indx_t i, beg, end;
+ db_indx_t i, beg, end, *inp;
u_int32_t himark;
u_int8_t *pgmap;
void *ovflbuf;
@@ -1854,24 +2000,25 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
ovflbuf = pgmap = NULL;
err_ret = ret = 0;
+ inp = P_INP(dbp, h);
memset(&dbt, 0, sizeof(DBT));
dbt.flags = DB_DBT_REALLOC;
memset(&unkdbt, 0, sizeof(DBT));
- unkdbt.size = strlen("UNKNOWN") + 1;
+ unkdbt.size = (u_int32_t)(strlen("UNKNOWN") + 1);
unkdbt.data = "UNKNOWN";
/*
* Allocate a buffer for overflow items. Start at one page;
* __db_safe_goff will realloc as needed.
*/
- if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &ovflbuf)) != 0)
+ if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &ovflbuf)) != 0)
return (ret);
if (LF_ISSET(DB_AGGRESSIVE)) {
if ((ret =
- __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &pgmap)) != 0)
+ __os_malloc(dbp->dbenv, dbp->pgsize, &pgmap)) != 0)
goto err;
memset(pgmap, 0, dbp->pgsize);
}
@@ -1914,7 +2061,7 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
* We only want to print deleted items if
* DB_AGGRESSIVE is set.
*/
- bk = GET_BKEYDATA(h, i);
+ bk = GET_BKEYDATA(dbp, h, i);
if (!LF_ISSET(DB_AGGRESSIVE) && B_DISSET(bk->type))
continue;
@@ -1927,10 +2074,10 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
if (key != NULL &&
(i != 0 || !LF_ISSET(SA_SKIPFIRSTKEY)))
if ((ret = __db_prdbt(key,
- 0, " ", handle, callback, 0, NULL)) != 0)
+ 0, " ", handle, callback, 0, vdp)) != 0)
err_ret = ret;
- beg = h->inp[i];
+ beg = inp[i];
switch (B_TYPE(bk->type)) {
case B_DUPLICATE:
end = beg + BOVERFLOW_SIZE - 1;
@@ -1958,23 +2105,24 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
(i % P_INDX == 0)) {
/* Not much to do on failure. */
if ((ret = __db_prdbt(&unkdbt, 0, " ",
- handle, callback, 0, NULL)) != 0)
+ handle, callback, 0, vdp)) != 0)
err_ret = ret;
break;
}
if ((ret = __db_salvage_duptree(dbp,
vdp, bo->pgno, &dbt, handle, callback,
- flags | SA_SKIPFIRSTKEY)) != 0)
+ flags | SA_SKIPFIRSTKEY)) != 0)
err_ret = ret;
break;
case B_KEYDATA:
- end = ALIGN(beg + bk->len, sizeof(u_int32_t)) - 1;
+ end =
+ ALIGN(beg + bk->len, sizeof(u_int32_t)) - 1;
dbt.data = bk->data;
dbt.size = bk->len;
if ((ret = __db_prdbt(&dbt,
- 0, " ", handle, callback, 0, NULL)) != 0)
+ 0, " ", handle, callback, 0, vdp)) != 0)
err_ret = ret;
break;
case B_OVERFLOW:
@@ -1985,11 +2133,11 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
err_ret = ret;
/* We care about err_ret more. */
(void)__db_prdbt(&unkdbt, 0, " ",
- handle, callback, 0, NULL);
+ handle, callback, 0, vdp);
break;
}
if ((ret = __db_prdbt(&dbt,
- 0, " ", handle, callback, 0, NULL)) != 0)
+ 0, " ", handle, callback, 0, vdp)) != 0)
err_ret = ret;
break;
default:
@@ -2020,12 +2168,12 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
* a datum; fix this imbalance by printing an "UNKNOWN".
*/
if (pgtype == P_LBTREE && (i % P_INDX == 1) && ((ret =
- __db_prdbt(&unkdbt, 0, " ", handle, callback, 0, NULL)) != 0))
+ __db_prdbt(&unkdbt, 0, " ", handle, callback, 0, vdp)) != 0))
err_ret = ret;
err: if (pgmap != NULL)
- __os_free(pgmap, 0);
- __os_free(ovflbuf, 0);
+ __os_free(dbp->dbenv, pgmap);
+ __os_free(dbp->dbenv, ovflbuf);
/* Mark this page as done. */
if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0)
@@ -2061,12 +2209,13 @@ __bam_salvage_walkdupint(dbp, vdp, h, key, handle, callback, flags)
for (i = 0; i < NUM_ENT(h); i++) {
switch (TYPE(h)) {
case P_IBTREE:
- bi = GET_BINTERNAL(h, i);
+ bi = GET_BINTERNAL(dbp, h, i);
if ((t_ret = __db_salvage_duptree(dbp,
vdp, bi->pgno, key, handle, callback, flags)) != 0)
ret = t_ret;
+ break;
case P_IRECNO:
- ri = GET_RINTERNAL(h, i);
+ ri = GET_RINTERNAL(dbp, h, i);
if ((t_ret = __db_salvage_duptree(dbp,
vdp, ri->pgno, key, handle, callback, flags)) != 0)
ret = t_ret;
@@ -2110,11 +2259,13 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset)
DB *pgset;
{
BINTERNAL *bi;
+ DB_MPOOLFILE *mpf;
PAGE *h;
RINTERNAL *ri;
db_pgno_t current, p;
int err_ret, ret;
+ mpf = dbp->mpf;
h = NULL;
ret = err_ret = 0;
DB_ASSERT(pgset != NULL);
@@ -2123,7 +2274,7 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset)
err_ret = DB_VERIFY_BAD;
goto err;
}
- if ((ret = memp_fget(dbp->mpf, &current, 0, &h)) != 0) {
+ if ((ret = mpf->get(mpf, &current, 0, &h)) != 0) {
err_ret = ret;
goto err;
}
@@ -2137,10 +2288,10 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset)
goto err;
}
if (TYPE(h) == P_IBTREE) {
- bi = GET_BINTERNAL(h, 0);
+ bi = GET_BINTERNAL(dbp, h, 0);
current = bi->pgno;
} else { /* P_IRECNO */
- ri = GET_RINTERNAL(h, 0);
+ ri = GET_RINTERNAL(dbp, h, 0);
current = ri->pgno;
}
break;
@@ -2152,7 +2303,7 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset)
goto err;
}
- if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ if ((ret = mpf->put(mpf, h, 0)) != 0)
err_ret = ret;
h = NULL;
}
@@ -2163,8 +2314,7 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset)
*/
traverse:
while (IS_VALID_PGNO(current) && current != PGNO_INVALID) {
- if (h == NULL &&
- (ret = memp_fget(dbp->mpf, &current, 0, &h) != 0)) {
+ if (h == NULL && (ret = mpf->get(mpf, &current, 0, &h)) != 0) {
err_ret = ret;
break;
}
@@ -2184,13 +2334,13 @@ traverse:
goto err;
current = NEXT_PGNO(h);
- if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ if ((ret = mpf->put(mpf, h, 0)) != 0)
err_ret = ret;
h = NULL;
}
err: if (h != NULL)
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
return (ret == 0 ? err_ret : ret);
}
@@ -2218,7 +2368,7 @@ __bam_safe_getdata(dbp, h, i, ovflok, dbt, freedbtp)
memset(dbt, 0, sizeof(DBT));
*freedbtp = 0;
- bk = GET_BKEYDATA(h, i);
+ bk = GET_BKEYDATA(dbp, h, i);
if (B_TYPE(bk->type) == B_OVERFLOW) {
if (!ovflok)
return (0);
diff --git a/bdb/btree/btree.src b/bdb/btree/btree.src
index a1eba7d7fc7..73f4abac874 100644
--- a/bdb/btree/btree.src
+++ b/bdb/btree/btree.src
@@ -1,13 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*
- * $Id: btree.src,v 10.26 2000/12/12 17:40:23 bostic Exp $
+ * $Id: btree.src,v 10.35 2002/04/17 19:02:56 krinsky Exp $
*/
-PREFIX bam
+PREFIX __bam
+DBPRIVATE
INCLUDE #include "db_config.h"
INCLUDE
@@ -15,69 +16,23 @@ INCLUDE #ifndef NO_SYSTEM_INCLUDES
INCLUDE #include <sys/types.h>
INCLUDE
INCLUDE #include <ctype.h>
-INCLUDE #include <errno.h>
INCLUDE #include <string.h>
INCLUDE #endif
INCLUDE
INCLUDE #include "db_int.h"
-INCLUDE #include "db_page.h"
-INCLUDE #include "db_dispatch.h"
-INCLUDE #include "db_am.h"
-INCLUDE #include "btree.h"
-INCLUDE #include "txn.h"
+INCLUDE #include "dbinc/crypto.h"
+INCLUDE #include "dbinc/db_page.h"
+INCLUDE #include "dbinc/db_dispatch.h"
+INCLUDE #include "dbinc/db_am.h"
+INCLUDE #include "dbinc/btree.h"
+INCLUDE #include "dbinc/log.h"
+INCLUDE #include "dbinc/rep.h"
+INCLUDE #include "dbinc/txn.h"
INCLUDE
/*
- * BTREE-pg_alloc: used to record allocating a new page.
- *
- * meta_lsn: the meta-data page's original lsn.
- * page_lsn: the allocated page's original lsn.
- * pgno: the page allocated.
- * next: the next page on the free list.
+ * NOTE: pg_alloc and pg_free have been moved to db.src, where they belong.
*/
-BEGIN pg_alloc 51
-ARG fileid int32_t ld
-POINTER meta_lsn DB_LSN * lu
-POINTER page_lsn DB_LSN * lu
-ARG pgno db_pgno_t lu
-ARG ptype u_int32_t lu
-ARG next db_pgno_t lu
-END
-
-DEPRECATED pg_alloc1 60
-ARG fileid int32_t ld
-POINTER meta_lsn DB_LSN * lu
-POINTER alloc_lsn DB_LSN * lu
-POINTER page_lsn DB_LSN * lu
-ARG pgno db_pgno_t lu
-ARG ptype u_int32_t lu
-ARG next db_pgno_t lu
-END
-
-/*
- * BTREE-pg_free: used to record freeing a page.
- *
- * pgno: the page being freed.
- * meta_lsn: the meta-data page's original lsn.
- * header: the header from the free'd page.
- * next: the previous next pointer on the metadata page.
- */
-BEGIN pg_free 52
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
-POINTER meta_lsn DB_LSN * lu
-DBT header DBT s
-ARG next db_pgno_t lu
-END
-
-DEPRECATED pg_free1 61
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
-POINTER meta_lsn DB_LSN * lu
-POINTER alloc_lsn DB_LSN * lu
-DBT header DBT s
-ARG next db_pgno_t lu
-END
/*
* BTREE-split: used to log a page split.
@@ -89,46 +44,21 @@ END
* indx: the number of entries that went to the left page.
* npgno: the next page number
* nlsn: the next page's original LSN (or 0 if no next page).
- * pg: the split page's contents before the split.
- */
-DEPRECATED split1 53
-ARG fileid int32_t ld
-ARG left db_pgno_t lu
-POINTER llsn DB_LSN * lu
-ARG right db_pgno_t lu
-POINTER rlsn DB_LSN * lu
-ARG indx u_int32_t lu
-ARG npgno db_pgno_t lu
-POINTER nlsn DB_LSN * lu
-DBT pg DBT s
-END
-
-/*
- * BTREE-split: used to log a page split.
- *
- * left: the page number for the low-order contents.
- * llsn: the left page's original LSN.
- * right: the page number for the high-order contents.
- * rlsn: the right page's original LSN.
- * indx: the number of entries that went to the left page.
- * npgno: the next page number
- * npgno: the next page number
- * nlsn: the next page's original LSN (or 0 if no next page).
* root_pgno: the root page number
* pg: the split page's contents before the split.
* opflags: SPL_NRECS: if splitting a tree that maintains a record count.
*/
BEGIN split 62
-ARG fileid int32_t ld
-ARG left db_pgno_t lu
+DB fileid int32_t ld
+WRLOCK left db_pgno_t lu
POINTER llsn DB_LSN * lu
-ARG right db_pgno_t lu
+WRLOCK right db_pgno_t lu
POINTER rlsn DB_LSN * lu
ARG indx u_int32_t lu
ARG npgno db_pgno_t lu
POINTER nlsn DB_LSN * lu
-ARG root_pgno db_pgno_t lu
-DBT pg DBT s
+WRLOCKNZ root_pgno db_pgno_t lu
+PGDBT pg DBT s
ARG opflags u_int32_t lu
END
@@ -137,34 +67,16 @@ END
*
* pgno: the page number of the page copied over the root.
* pgdbt: the page being copied on the root page.
- * nrec: the tree's record count.
- * rootent: last entry on the root page.
- * rootlsn: the root page's original lsn.
- */
-DEPRECATED rsplit1 54
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
-DBT pgdbt DBT s
-ARG nrec db_pgno_t lu
-DBT rootent DBT s
-POINTER rootlsn DB_LSN * lu
-END
-
-/*
- * BTREE-rsplit: used to log a reverse-split
- *
- * pgno: the page number of the page copied over the root.
- * pgdbt: the page being copied on the root page.
* root_pgno: the root page number.
* nrec: the tree's record count.
* rootent: last entry on the root page.
* rootlsn: the root page's original lsn.
*/
BEGIN rsplit 63
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
-DBT pgdbt DBT s
-ARG root_pgno db_pgno_t lu
+DB fileid int32_t ld
+WRLOCK pgno db_pgno_t lu
+PGDBT pgdbt DBT s
+WRLOCK root_pgno db_pgno_t lu
ARG nrec db_pgno_t lu
DBT rootent DBT s
POINTER rootlsn DB_LSN * lu
@@ -180,8 +92,8 @@ END
* is_insert: 0 if a delete, 1 if an insert.
*/
BEGIN adj 55
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
+DB fileid int32_t ld
+WRLOCK pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
ARG indx u_int32_t lu
ARG indx_copy u_int32_t lu
@@ -198,8 +110,8 @@ END
* opflags: CAD_UPDATEROOT: if root page count was adjusted.
*/
BEGIN cadjust 56
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
+DB fileid int32_t ld
+WRLOCK pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
ARG indx u_int32_t lu
ARG adjust int32_t ld
@@ -214,8 +126,8 @@ END
* indx: the index to be deleted.
*/
BEGIN cdel 57
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
+DB fileid int32_t ld
+WRLOCK pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
ARG indx u_int32_t lu
END
@@ -230,8 +142,8 @@ END
* duplicate: the prefix of the replacement that matches the original.
*/
BEGIN repl 58
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
+DB fileid int32_t ld
+WRLOCK pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
ARG indx u_int32_t lu
ARG isdeleted u_int32_t lu
@@ -245,9 +157,9 @@ END
* BTREE-root: log the assignment of a root btree page.
*/
BEGIN root 59
-ARG fileid int32_t ld
-ARG meta_pgno db_pgno_t lu
-ARG root_pgno db_pgno_t lu
+DB fileid int32_t ld
+WRLOCK meta_pgno db_pgno_t lu
+WRLOCK root_pgno db_pgno_t lu
POINTER meta_lsn DB_LSN * lu
END
@@ -260,7 +172,7 @@ END
*/
BEGIN curadj 64
/* Fileid of db affected. */
-ARG fileid int32_t ld
+DB fileid int32_t ld
/* Which adjustment. */
ARG mode db_ca_mode ld
/* Page entry is from. */
@@ -284,7 +196,7 @@ END
*/
BEGIN rcuradj 65
/* Fileid of db affected. */
-ARG fileid int32_t ld
+DB fileid int32_t ld
/* Which adjustment. */
ARG mode ca_recno_arg ld
/* Root page number. */