diff options
author | unknown <ram@mysql.r18.ru> | 2002-10-30 15:57:05 +0400 |
---|---|---|
committer | unknown <ram@mysql.r18.ru> | 2002-10-30 15:57:05 +0400 |
commit | 155e78f014de1a2e259ae5119f4621fbb210a784 (patch) | |
tree | 6881a3cca88bea0bb9eeffd5aae34be437152786 /bdb/btree | |
parent | b8798d25ab71436bf690ee8ae48285a655c5487e (diff) | |
download | mariadb-git-155e78f014de1a2e259ae5119f4621fbb210a784.tar.gz |
BDB 4.1.24
BitKeeper/deleted/.del-ex_access.wpj~3df6ae8c99bf7c5f:
Delete: bdb/build_vxworks/ex_access/ex_access.wpj
BitKeeper/deleted/.del-ex_btrec.wpj~a7622f1c6f432dc6:
Delete: bdb/build_vxworks/ex_btrec/ex_btrec.wpj
BitKeeper/deleted/.del-ex_dbclient.wpj~7345440f3b204cdd:
Delete: bdb/build_vxworks/ex_dbclient/ex_dbclient.wpj
BitKeeper/deleted/.del-ex_env.wpj~fbe1ab10b04e8b74:
Delete: bdb/build_vxworks/ex_env/ex_env.wpj
BitKeeper/deleted/.del-ex_mpool.wpj~4479cfd5c45f327d:
Delete: bdb/build_vxworks/ex_mpool/ex_mpool.wpj
BitKeeper/deleted/.del-ex_tpcb.wpj~f78093006e14bf41:
Delete: bdb/build_vxworks/ex_tpcb/ex_tpcb.wpj
BitKeeper/deleted/.del-db_buildall.dsp~bd749ff6da11682:
Delete: bdb/build_win32/db_buildall.dsp
BitKeeper/deleted/.del-cxx_app.cpp~ad8df8e0791011ed:
Delete: bdb/cxx/cxx_app.cpp
BitKeeper/deleted/.del-cxx_log.cpp~a50ff3118fe06952:
Delete: bdb/cxx/cxx_log.cpp
BitKeeper/deleted/.del-cxx_table.cpp~ecd751e79b055556:
Delete: bdb/cxx/cxx_table.cpp
BitKeeper/deleted/.del-namemap.txt~796a3acd3885d8fd:
Delete: bdb/cxx/namemap.txt
BitKeeper/deleted/.del-Design.fileop~3ca4da68f1727373:
Delete: bdb/db/Design.fileop
BitKeeper/deleted/.del-db185_int.h~61bee3736e7959ef:
Delete: bdb/db185/db185_int.h
BitKeeper/deleted/.del-acconfig.h~411e8854d67ad8b5:
Delete: bdb/dist/acconfig.h
BitKeeper/deleted/.del-mutex.m4~a13383cde18a64e1:
Delete: bdb/dist/aclocal/mutex.m4
BitKeeper/deleted/.del-options.m4~b9d0ca637213750a:
Delete: bdb/dist/aclocal/options.m4
BitKeeper/deleted/.del-programs.m4~3ce7890b47732b30:
Delete: bdb/dist/aclocal/programs.m4
BitKeeper/deleted/.del-tcl.m4~f944e2db93c3b6db:
Delete: bdb/dist/aclocal/tcl.m4
BitKeeper/deleted/.del-types.m4~59cae158c9a32cff:
Delete: bdb/dist/aclocal/types.m4
BitKeeper/deleted/.del-script~d38f6d3a4f159cb4:
Delete: bdb/dist/build/script
BitKeeper/deleted/.del-configure.in~ac795a92c8fe049c:
Delete: bdb/dist/configure.in
BitKeeper/deleted/.del-ltconfig~66bbd007d8024af:
Delete: bdb/dist/ltconfig
BitKeeper/deleted/.del-rec_ctemp~a28554362534f00a:
Delete: bdb/dist/rec_ctemp
BitKeeper/deleted/.del-s_tcl~2ffe4326459fcd9f:
Delete: bdb/dist/s_tcl
BitKeeper/deleted/.del-.IGNORE_ME~d8148b08fa7d5d15:
Delete: bdb/dist/template/.IGNORE_ME
BitKeeper/deleted/.del-btree.h~179f2aefec1753d:
Delete: bdb/include/btree.h
BitKeeper/deleted/.del-cxx_int.h~6b649c04766508f8:
Delete: bdb/include/cxx_int.h
BitKeeper/deleted/.del-db.src~6b433ae615b16a8d:
Delete: bdb/include/db.src
BitKeeper/deleted/.del-db_185.h~ad8b373d9391d35c:
Delete: bdb/include/db_185.h
BitKeeper/deleted/.del-db_am.h~a714912b6b75932f:
Delete: bdb/include/db_am.h
BitKeeper/deleted/.del-db_cxx.h~fcafadf45f5d19e9:
Delete: bdb/include/db_cxx.h
BitKeeper/deleted/.del-db_dispatch.h~6844f20f7eb46904:
Delete: bdb/include/db_dispatch.h
BitKeeper/deleted/.del-db_int.src~419a3f48b6a01da7:
Delete: bdb/include/db_int.src
BitKeeper/deleted/.del-db_join.h~76f9747a42c3399a:
Delete: bdb/include/db_join.h
BitKeeper/deleted/.del-db_page.h~e302ca3a4db3abdc:
Delete: bdb/include/db_page.h
BitKeeper/deleted/.del-db_server_int.h~e1d20b6ba3bca1ab:
Delete: bdb/include/db_server_int.h
BitKeeper/deleted/.del-db_shash.h~5fbf2d696fac90f3:
Delete: bdb/include/db_shash.h
BitKeeper/deleted/.del-db_swap.h~1e60887550864a59:
Delete: bdb/include/db_swap.h
BitKeeper/deleted/.del-db_upgrade.h~c644eee73701fc8d:
Delete: bdb/include/db_upgrade.h
BitKeeper/deleted/.del-db_verify.h~b8d6c297c61f342e:
Delete: bdb/include/db_verify.h
BitKeeper/deleted/.del-debug.h~dc2b4f2cf27ccebc:
Delete: bdb/include/debug.h
BitKeeper/deleted/.del-hash.h~2aaa548b28882dfb:
Delete: bdb/include/hash.h
BitKeeper/deleted/.del-lock.h~a761c1b7de57b77f:
Delete: bdb/include/lock.h
BitKeeper/deleted/.del-log.h~ff20184238e35e4d:
Delete: bdb/include/log.h
BitKeeper/deleted/.del-mp.h~7e317597622f3411:
Delete: bdb/include/mp.h
BitKeeper/deleted/.del-mutex.h~d3ae7a2977a68137:
Delete: bdb/include/mutex.h
BitKeeper/deleted/.del-os.h~91867cc8757cd0e3:
Delete: bdb/include/os.h
BitKeeper/deleted/.del-os_jump.h~e1b939fa5151d4be:
Delete: bdb/include/os_jump.h
BitKeeper/deleted/.del-qam.h~6fad0c1b5723d597:
Delete: bdb/include/qam.h
BitKeeper/deleted/.del-queue.h~4c72c0826c123d5:
Delete: bdb/include/queue.h
BitKeeper/deleted/.del-region.h~513fe04d977ca0fc:
Delete: bdb/include/region.h
BitKeeper/deleted/.del-shqueue.h~525fc3e6c2025c36:
Delete: bdb/include/shqueue.h
BitKeeper/deleted/.del-tcl_db.h~c536fd61a844f23f:
Delete: bdb/include/tcl_db.h
BitKeeper/deleted/.del-txn.h~c8d94b221ec147e4:
Delete: bdb/include/txn.h
BitKeeper/deleted/.del-xa.h~ecc466493aae9d9a:
Delete: bdb/include/xa.h
BitKeeper/deleted/.del-DbRecoveryInit.java~756b52601a0b9023:
Delete: bdb/java/src/com/sleepycat/db/DbRecoveryInit.java
BitKeeper/deleted/.del-DbTxnRecover.java~74607cba7ab89d6d:
Delete: bdb/java/src/com/sleepycat/db/DbTxnRecover.java
BitKeeper/deleted/.del-lock_conflict.c~fc5e0f14cf597a2b:
Delete: bdb/lock/lock_conflict.c
BitKeeper/deleted/.del-log.src~53ac9e7b5cb023f2:
Delete: bdb/log/log.src
BitKeeper/deleted/.del-log_findckp.c~24287f008916e81f:
Delete: bdb/log/log_findckp.c
BitKeeper/deleted/.del-log_rec.c~d51711f2cac09297:
Delete: bdb/log/log_rec.c
BitKeeper/deleted/.del-log_register.c~b40bb4efac75ca15:
Delete: bdb/log/log_register.c
BitKeeper/deleted/.del-Design~b3d0f179f2767b:
Delete: bdb/mp/Design
BitKeeper/deleted/.del-os_finit.c~95dbefc6fe79b26c:
Delete: bdb/os/os_finit.c
BitKeeper/deleted/.del-os_abs.c~df95d1e7db81924:
Delete: bdb/os_vxworks/os_abs.c
BitKeeper/deleted/.del-os_finit.c~803b484bdb9d0122:
Delete: bdb/os_vxworks/os_finit.c
BitKeeper/deleted/.del-os_map.c~3a6d7926398b76d3:
Delete: bdb/os_vxworks/os_map.c
BitKeeper/deleted/.del-os_finit.c~19a227c6d3c78ad:
Delete: bdb/os_win32/os_finit.c
BitKeeper/deleted/.del-log-corruption.patch~1cf2ecc7c6408d5d:
Delete: bdb/patches/log-corruption.patch
BitKeeper/deleted/.del-Btree.pm~af6d0c5eaed4a98e:
Delete: bdb/perl.BerkeleyDB/BerkeleyDB/Btree.pm
BitKeeper/deleted/.del-BerkeleyDB.pm~7244036d4482643:
Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pm
BitKeeper/deleted/.del-BerkeleyDB.pod~e7b18fd6132448e3:
Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pod
BitKeeper/deleted/.del-Hash.pm~10292a26c06a5c95:
Delete: bdb/perl.BerkeleyDB/BerkeleyDB/Hash.pm
BitKeeper/deleted/.del-BerkeleyDB.pod.P~79f76a1495eda203:
Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pod.P
BitKeeper/deleted/.del-BerkeleyDB.xs~80c99afbd98e392c:
Delete: bdb/perl.BerkeleyDB/BerkeleyDB.xs
BitKeeper/deleted/.del-Changes~729c1891efa60de9:
Delete: bdb/perl.BerkeleyDB/Changes
BitKeeper/deleted/.del-MANIFEST~63a1e34aecf157a0:
Delete: bdb/perl.BerkeleyDB/MANIFEST
BitKeeper/deleted/.del-Makefile.PL~c68797707d8df87a:
Delete: bdb/perl.BerkeleyDB/Makefile.PL
BitKeeper/deleted/.del-README~5f2f579b1a241407:
Delete: bdb/perl.BerkeleyDB/README
BitKeeper/deleted/.del-Todo~dca3c66c193adda9:
Delete: bdb/perl.BerkeleyDB/Todo
BitKeeper/deleted/.del-config.in~ae81681e450e0999:
Delete: bdb/perl.BerkeleyDB/config.in
BitKeeper/deleted/.del-dbinfo~28ad67d83be4f68e:
Delete: bdb/perl.BerkeleyDB/dbinfo
BitKeeper/deleted/.del-mkconsts~543ab60669c7a04e:
Delete: bdb/perl.BerkeleyDB/mkconsts
BitKeeper/deleted/.del-mkpod~182c0ca54e439afb:
Delete: bdb/perl.BerkeleyDB/mkpod
BitKeeper/deleted/.del-5.004~e008cb5a48805543:
Delete: bdb/perl.BerkeleyDB/patches/5.004
BitKeeper/deleted/.del-irix_6_5.pl~61662bb08afcdec8:
Delete: bdb/perl.BerkeleyDB/hints/irix_6_5.pl
BitKeeper/deleted/.del-solaris.pl~6771e7182394e152:
Delete: bdb/perl.BerkeleyDB/hints/solaris.pl
BitKeeper/deleted/.del-typemap~783b8f5295b05f3d:
Delete: bdb/perl.BerkeleyDB/typemap
BitKeeper/deleted/.del-5.004_01~6081ce2fff7b0bc:
Delete: bdb/perl.BerkeleyDB/patches/5.004_01
BitKeeper/deleted/.del-5.004_02~87214eac35ad9e6:
Delete: bdb/perl.BerkeleyDB/patches/5.004_02
BitKeeper/deleted/.del-5.004_03~9a672becec7cb40f:
Delete: bdb/perl.BerkeleyDB/patches/5.004_03
BitKeeper/deleted/.del-5.004_04~e326cb51af09d154:
Delete: bdb/perl.BerkeleyDB/patches/5.004_04
BitKeeper/deleted/.del-5.004_05~7ab457a1e41a92fe:
Delete: bdb/perl.BerkeleyDB/patches/5.004_05
BitKeeper/deleted/.del-5.005~f9e2d59b5964cd4b:
Delete: bdb/perl.BerkeleyDB/patches/5.005
BitKeeper/deleted/.del-5.005_01~3eb9fb7b5842ea8e:
Delete: bdb/perl.BerkeleyDB/patches/5.005_01
BitKeeper/deleted/.del-5.005_02~67477ce0bef717cb:
Delete: bdb/perl.BerkeleyDB/patches/5.005_02
BitKeeper/deleted/.del-5.005_03~c4c29a1fb21e290a:
Delete: bdb/perl.BerkeleyDB/patches/5.005_03
BitKeeper/deleted/.del-5.6.0~e1fb9897d124ee22:
Delete: bdb/perl.BerkeleyDB/patches/5.6.0
BitKeeper/deleted/.del-btree.t~e4a1a3c675ddc406:
Delete: bdb/perl.BerkeleyDB/t/btree.t
BitKeeper/deleted/.del-db-3.0.t~d2c60991d84558f2:
Delete: bdb/perl.BerkeleyDB/t/db-3.0.t
BitKeeper/deleted/.del-db-3.1.t~6ee88cd13f55e018:
Delete: bdb/perl.BerkeleyDB/t/db-3.1.t
BitKeeper/deleted/.del-db-3.2.t~f73b6461f98fd1cf:
Delete: bdb/perl.BerkeleyDB/t/db-3.2.t
BitKeeper/deleted/.del-destroy.t~cc6a2ae1980a2ecd:
Delete: bdb/perl.BerkeleyDB/t/destroy.t
BitKeeper/deleted/.del-env.t~a8604a4499c4bd07:
Delete: bdb/perl.BerkeleyDB/t/env.t
BitKeeper/deleted/.del-examples.t~2571b77c3cc75574:
Delete: bdb/perl.BerkeleyDB/t/examples.t
BitKeeper/deleted/.del-examples.t.T~8228bdd75ac78b88:
Delete: bdb/perl.BerkeleyDB/t/examples.t.T
BitKeeper/deleted/.del-examples3.t.T~66a186897a87026d:
Delete: bdb/perl.BerkeleyDB/t/examples3.t.T
BitKeeper/deleted/.del-examples3.t~fe3822ba2f2d7f83:
Delete: bdb/perl.BerkeleyDB/t/examples3.t
BitKeeper/deleted/.del-filter.t~f87b045c1b708637:
Delete: bdb/perl.BerkeleyDB/t/filter.t
BitKeeper/deleted/.del-hash.t~616bfb4d644de3a3:
Delete: bdb/perl.BerkeleyDB/t/hash.t
BitKeeper/deleted/.del-join.t~29fc39f74a83ca22:
Delete: bdb/perl.BerkeleyDB/t/join.t
BitKeeper/deleted/.del-mldbm.t~31f5015341eea040:
Delete: bdb/perl.BerkeleyDB/t/mldbm.t
BitKeeper/deleted/.del-queue.t~8f338034ce44a641:
Delete: bdb/perl.BerkeleyDB/t/queue.t
BitKeeper/deleted/.del-recno.t~d4ddbd3743add63e:
Delete: bdb/perl.BerkeleyDB/t/recno.t
BitKeeper/deleted/.del-strict.t~6885cdd2ea71ca2d:
Delete: bdb/perl.BerkeleyDB/t/strict.t
BitKeeper/deleted/.del-subdb.t~aab62a5d5864c603:
Delete: bdb/perl.BerkeleyDB/t/subdb.t
BitKeeper/deleted/.del-txn.t~65033b8558ae1216:
Delete: bdb/perl.BerkeleyDB/t/txn.t
BitKeeper/deleted/.del-unknown.t~f3710458682665e1:
Delete: bdb/perl.BerkeleyDB/t/unknown.t
BitKeeper/deleted/.del-Changes~436f74a5c414c65b:
Delete: bdb/perl.DB_File/Changes
BitKeeper/deleted/.del-DB_File.pm~ae0951c6c7665a82:
Delete: bdb/perl.DB_File/DB_File.pm
BitKeeper/deleted/.del-DB_File.xs~89e49a0b5556f1d8:
Delete: bdb/perl.DB_File/DB_File.xs
BitKeeper/deleted/.del-DB_File_BS~290fad5dbbb87069:
Delete: bdb/perl.DB_File/DB_File_BS
BitKeeper/deleted/.del-MANIFEST~90ee581572bdd4ac:
Delete: bdb/perl.DB_File/MANIFEST
BitKeeper/deleted/.del-Makefile.PL~ac0567bb5a377e38:
Delete: bdb/perl.DB_File/Makefile.PL
BitKeeper/deleted/.del-README~77e924a5a9bae6b3:
Delete: bdb/perl.DB_File/README
BitKeeper/deleted/.del-config.in~ab4c2792b86a810b:
Delete: bdb/perl.DB_File/config.in
BitKeeper/deleted/.del-dbinfo~461c43b30fab2cb:
Delete: bdb/perl.DB_File/dbinfo
BitKeeper/deleted/.del-dynixptx.pl~50dcddfae25d17e9:
Delete: bdb/perl.DB_File/hints/dynixptx.pl
BitKeeper/deleted/.del-typemap~55cffb3288a9e587:
Delete: bdb/perl.DB_File/typemap
BitKeeper/deleted/.del-version.c~a4df0e646f8b3975:
Delete: bdb/perl.DB_File/version.c
BitKeeper/deleted/.del-5.004_01~d6830d0082702af7:
Delete: bdb/perl.DB_File/patches/5.004_01
BitKeeper/deleted/.del-5.004_02~78b082dc80c91031:
Delete: bdb/perl.DB_File/patches/5.004_02
BitKeeper/deleted/.del-5.004~4411ec2e3c9e008b:
Delete: bdb/perl.DB_File/patches/5.004
BitKeeper/deleted/.del-sco.pl~1e795fe14fe4dcfe:
Delete: bdb/perl.DB_File/hints/sco.pl
BitKeeper/deleted/.del-5.004_03~33f274648b160d95:
Delete: bdb/perl.DB_File/patches/5.004_03
BitKeeper/deleted/.del-5.004_04~8f3d1b3cf18bb20a:
Delete: bdb/perl.DB_File/patches/5.004_04
BitKeeper/deleted/.del-5.004_05~9c0f02e7331e142:
Delete: bdb/perl.DB_File/patches/5.004_05
BitKeeper/deleted/.del-5.005~c2108cb2e3c8d951:
Delete: bdb/perl.DB_File/patches/5.005
BitKeeper/deleted/.del-5.005_01~3b45e9673afc4cfa:
Delete: bdb/perl.DB_File/patches/5.005_01
BitKeeper/deleted/.del-5.005_02~9fe5766bb02a4522:
Delete: bdb/perl.DB_File/patches/5.005_02
BitKeeper/deleted/.del-5.005_03~ffa1c38c19ae72ea:
Delete: bdb/perl.DB_File/patches/5.005_03
BitKeeper/deleted/.del-5.6.0~373be3a5ce47be85:
Delete: bdb/perl.DB_File/patches/5.6.0
BitKeeper/deleted/.del-db-btree.t~3231595a1c241eb3:
Delete: bdb/perl.DB_File/t/db-btree.t
BitKeeper/deleted/.del-db-hash.t~7c4ad0c795c7fad2:
Delete: bdb/perl.DB_File/t/db-hash.t
BitKeeper/deleted/.del-db-recno.t~6c2d3d80b9ba4a50:
Delete: bdb/perl.DB_File/t/db-recno.t
BitKeeper/deleted/.del-db_server.sed~cdb00ebcd48a64e2:
Delete: bdb/rpc_server/db_server.sed
BitKeeper/deleted/.del-db_server_proc.c~d46c8f409c3747f4:
Delete: bdb/rpc_server/db_server_proc.c
BitKeeper/deleted/.del-db_server_svc.sed~3f5e59f334fa4607:
Delete: bdb/rpc_server/db_server_svc.sed
BitKeeper/deleted/.del-db_server_util.c~a809f3a4629acda:
Delete: bdb/rpc_server/db_server_util.c
BitKeeper/deleted/.del-log.tcl~ff1b41f1355b97d7:
Delete: bdb/test/log.tcl
BitKeeper/deleted/.del-mpool.tcl~b0df4dc1b04db26c:
Delete: bdb/test/mpool.tcl
BitKeeper/deleted/.del-mutex.tcl~52fd5c73a150565:
Delete: bdb/test/mutex.tcl
BitKeeper/deleted/.del-txn.tcl~c4ff071550b5446e:
Delete: bdb/test/txn.tcl
BitKeeper/deleted/.del-README~e800a12a5392010a:
Delete: bdb/test/upgrade/README
BitKeeper/deleted/.del-pack-2.6.6.pl~89d5076d758d3e98:
Delete: bdb/test/upgrade/generate-2.X/pack-2.6.6.pl
BitKeeper/deleted/.del-test-2.6.patch~4a52dc83d447547b:
Delete: bdb/test/upgrade/generate-2.X/test-2.6.patch
Diffstat (limited to 'bdb/btree')
-rw-r--r-- | bdb/btree/bt_compare.c | 14 | ||||
-rw-r--r-- | bdb/btree/bt_conv.c | 30 | ||||
-rw-r--r-- | bdb/btree/bt_curadj.c | 55 | ||||
-rw-r--r-- | bdb/btree/bt_cursor.c | 1193 | ||||
-rw-r--r-- | bdb/btree/bt_delete.c | 186 | ||||
-rw-r--r-- | bdb/btree/bt_method.c | 33 | ||||
-rw-r--r-- | bdb/btree/bt_open.c | 425 | ||||
-rw-r--r-- | bdb/btree/bt_put.c | 165 | ||||
-rw-r--r-- | bdb/btree/bt_rec.c | 494 | ||||
-rw-r--r-- | bdb/btree/bt_reclaim.c | 45 | ||||
-rw-r--r-- | bdb/btree/bt_recno.c | 430 | ||||
-rw-r--r-- | bdb/btree/bt_rsearch.c | 85 | ||||
-rw-r--r-- | bdb/btree/bt_search.c | 92 | ||||
-rw-r--r-- | bdb/btree/bt_split.c | 323 | ||||
-rw-r--r-- | bdb/btree/bt_stat.c | 203 | ||||
-rw-r--r-- | bdb/btree/bt_upgrade.c | 24 | ||||
-rw-r--r-- | bdb/btree/bt_verify.c | 526 | ||||
-rw-r--r-- | bdb/btree/btree.src | 158 |
18 files changed, 2546 insertions, 1935 deletions
diff --git a/bdb/btree/bt_compare.c b/bdb/btree/bt_compare.c index 91481c31366..cbe2a1a7170 100644 --- a/bdb/btree/bt_compare.c +++ b/bdb/btree/bt_compare.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ /* @@ -43,7 +43,7 @@ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_compare.c,v 11.12 2000/10/26 19:00:28 krinsky Exp $"; +static const char revid[] = "$Id: bt_compare.c,v 11.17 2002/03/27 04:30:42 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -51,8 +51,8 @@ static const char revid[] = "$Id: bt_compare.c,v 11.12 2000/10/26 19:00:28 krins #endif #include "db_int.h" -#include "db_page.h" -#include "btree.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" /* * __bam_cmp -- @@ -92,7 +92,7 @@ __bam_cmp(dbp, dbt, h, indx, func, cmpp) case P_LBTREE: case P_LDUP: case P_LRECNO: - bk = GET_BKEYDATA(h, indx); + bk = GET_BKEYDATA(dbp, h, indx); if (B_TYPE(bk->type) == B_OVERFLOW) bo = (BOVERFLOW *)bk; else { @@ -125,7 +125,7 @@ __bam_cmp(dbp, dbt, h, indx, func, cmpp) return (0); } - bi = GET_BINTERNAL(h, indx); + bi = GET_BINTERNAL(dbp, h, indx); if (B_TYPE(bi->type) == B_OVERFLOW) bo = (BOVERFLOW *)(bi->data); else { @@ -136,7 +136,7 @@ __bam_cmp(dbp, dbt, h, indx, func, cmpp) } break; default: - return (__db_pgfmt(dbp, PGNO(h))); + return (__db_pgfmt(dbp->dbenv, PGNO(h))); } /* diff --git a/bdb/btree/bt_conv.c b/bdb/btree/bt_conv.c index fd30f375f7c..4264b62ffdd 100644 --- a/bdb/btree/bt_conv.c +++ b/bdb/btree/bt_conv.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_conv.c,v 11.6 2000/03/31 00:30:26 ubell Exp $"; +static const char revid[] = "$Id: bt_conv.c,v 11.13 2002/08/06 06:11:12 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -16,20 +16,21 @@ static const char revid[] = "$Id: bt_conv.c,v 11.6 2000/03/31 00:30:26 ubell Exp #endif #include "db_int.h" -#include "db_page.h" -#include "db_swap.h" -#include "btree.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/btree.h" /* * __bam_pgin -- * Convert host-specific page layout from the host-independent format * stored on disk. * - * PUBLIC: int __bam_pgin __P((DB_ENV *, db_pgno_t, void *, DBT *)); + * PUBLIC: int __bam_pgin __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *)); */ int -__bam_pgin(dbenv, pg, pp, cookie) +__bam_pgin(dbenv, dummydbp, pg, pp, cookie) DB_ENV *dbenv; + DB *dummydbp; db_pgno_t pg; void *pp; DBT *cookie; @@ -38,12 +39,12 @@ __bam_pgin(dbenv, pg, pp, cookie) PAGE *h; pginfo = (DB_PGINFO *)cookie->data; - if (!pginfo->needswap) + if (!F_ISSET(pginfo, DB_AM_SWAP)) return (0); h = pp; return (TYPE(h) == P_BTREEMETA ? __bam_mswap(pp) : - __db_byteswap(dbenv, pg, pp, pginfo->db_pagesize, 1)); + __db_byteswap(dbenv, dummydbp, pg, pp, pginfo->db_pagesize, 1)); } /* @@ -51,11 +52,12 @@ __bam_pgin(dbenv, pg, pp, cookie) * Convert host-specific page layout to the host-independent format * stored on disk. * - * PUBLIC: int __bam_pgout __P((DB_ENV *, db_pgno_t, void *, DBT *)); + * PUBLIC: int __bam_pgout __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *)); */ int -__bam_pgout(dbenv, pg, pp, cookie) +__bam_pgout(dbenv, dummydbp, pg, pp, cookie) DB_ENV *dbenv; + DB *dummydbp; db_pgno_t pg; void *pp; DBT *cookie; @@ -64,12 +66,12 @@ __bam_pgout(dbenv, pg, pp, cookie) PAGE *h; pginfo = (DB_PGINFO *)cookie->data; - if (!pginfo->needswap) + if (!F_ISSET(pginfo, DB_AM_SWAP)) return (0); h = pp; return (TYPE(h) == P_BTREEMETA ? __bam_mswap(pp) : - __db_byteswap(dbenv, pg, pp, pginfo->db_pagesize, 0)); + __db_byteswap(dbenv, dummydbp, pg, pp, pginfo->db_pagesize, 0)); } /* @@ -93,6 +95,8 @@ __bam_mswap(pg) SWAP32(p); /* re_len */ SWAP32(p); /* re_pad */ SWAP32(p); /* root */ + p += 92 * sizeof(u_int32_t); /* unused */ + SWAP32(p); /* crypto_magic */ return (0); } diff --git a/bdb/btree/bt_curadj.c b/bdb/btree/bt_curadj.c index 011acd2f4a1..50d3d422e49 100644 --- a/bdb/btree/bt_curadj.c +++ b/bdb/btree/bt_curadj.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_curadj.c,v 11.20 2001/01/17 16:15:49 bostic Exp $"; +static const char revid[] = "$Id: bt_curadj.c,v 11.30 2002/07/03 19:03:48 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -16,9 +16,8 @@ static const char revid[] = "$Id: bt_curadj.c,v 11.20 2001/01/17 16:15:49 bostic #endif #include "db_int.h" -#include "db_page.h" -#include "btree.h" -#include "txn.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" static int __bam_opd_cursor __P((DB *, DBC *, db_pgno_t, u_int32_t, u_int32_t)); @@ -203,10 +202,9 @@ __bam_ca_di(my_dbc, pgno, indx, adjust) } MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); - if (found != 0 && DB_LOGGING(my_dbc)) { - if ((ret = __bam_curadj_log(dbenv, - my_dbc->txn, &lsn, 0, dbp->log_fileid, - DB_CA_DI, pgno, 0, 0, adjust, indx, 0)) != 0) + if (found != 0 && DBC_LOGGING(my_dbc)) { + if ((ret = __bam_curadj_log(dbp, my_dbc->txn, + &lsn, 0, DB_CA_DI, pgno, 0, 0, adjust, indx, 0)) != 0) return (ret); } @@ -234,8 +232,13 @@ __bam_opd_cursor(dbp, dbc, first, tpgno, ti) * Allocate a new cursor and create the stack. If duplicates * are sorted, we've just created an off-page duplicate Btree. * If duplicates aren't sorted, we've just created a Recno tree. + * + * Note that in order to get here at all, there shouldn't be + * an old off-page dup cursor--to augment the checking db_c_newopd + * will do, assert this. */ - if ((ret = __db_c_newopd(dbc, tpgno, &dbc_nopd)) != 0) + DB_ASSERT(orig_cp->opd == NULL); + if ((ret = __db_c_newopd(dbc, tpgno, orig_cp->opd, &dbc_nopd)) != 0) return (ret); cp = (BTREE_CURSOR *)dbc_nopd->internal; @@ -321,17 +324,16 @@ loop: MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); return (ret); if (my_txn != NULL && dbc->txn != my_txn) found = 1; - /* We released the MUTEX to get a cursor, start over. */ + /* We released the mutex to get a cursor, start over. */ goto loop; } MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); } MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); - if (found != 0 && DB_LOGGING(my_dbc)) { - if ((ret = __bam_curadj_log(dbenv, - my_dbc->txn, &lsn, 0, dbp->log_fileid, - DB_CA_DUP, fpgno, tpgno, 0, first, fi, ti)) != 0) + if (found != 0 && DBC_LOGGING(my_dbc)) { + if ((ret = __bam_curadj_log(dbp, my_dbc->txn, + &lsn, 0, DB_CA_DUP, fpgno, tpgno, 0, first, fi, ti)) != 0) return (ret); } return (0); @@ -372,8 +374,16 @@ loop: MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { orig_cp = (BTREE_CURSOR *)dbc->internal; + /* + * A note on the orig_cp->opd != NULL requirement here: + * it's possible that there's a cursor that refers to + * the same duplicate set, but which has no opd cursor, + * because it refers to a different item and we took + * care of it while processing a previous record. + */ if (orig_cp->pgno != fpgno || orig_cp->indx != first || + orig_cp->opd == NULL || ((BTREE_CURSOR *)orig_cp->opd->internal)->indx != ti) continue; @@ -383,7 +393,7 @@ loop: MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); orig_cp->opd = NULL; orig_cp->indx = fi; /* - * We released the MUTEX to free a cursor, + * We released the mutex to free a cursor, * start over. */ goto loop; @@ -440,10 +450,9 @@ __bam_ca_rsplit(my_dbc, fpgno, tpgno) } MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); - if (found != 0 && DB_LOGGING(my_dbc)) { - if ((ret = __bam_curadj_log(dbenv, - my_dbc->txn, &lsn, 0, dbp->log_fileid, - DB_CA_RSPLIT, fpgno, tpgno, 0, 0, 0, 0)) != 0) + if (found != 0 && DBC_LOGGING(my_dbc)) { + if ((ret = __bam_curadj_log(dbp, my_dbc->txn, + &lsn, 0, DB_CA_RSPLIT, fpgno, tpgno, 0, 0, 0, 0)) != 0) return (ret); } return (0); @@ -512,9 +521,9 @@ __bam_ca_split(my_dbc, ppgno, lpgno, rpgno, split_indx, cleft) } MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); - if (found != 0 && DB_LOGGING(my_dbc)) { - if ((ret = __bam_curadj_log(dbenv, my_dbc->txn, - &lsn, 0, dbp->log_fileid, DB_CA_SPLIT, ppgno, rpgno, + if (found != 0 && DBC_LOGGING(my_dbc)) { + if ((ret = __bam_curadj_log(dbp, + my_dbc->txn, &lsn, 0, DB_CA_SPLIT, ppgno, rpgno, cleft ? lpgno : PGNO_INVALID, 0, split_indx, 0)) != 0) return (ret); } diff --git a/bdb/btree/bt_cursor.c b/bdb/btree/bt_cursor.c index 84ab7c80744..14d90e8873d 100644 --- a/bdb/btree/bt_cursor.c +++ b/bdb/btree/bt_cursor.c @@ -1,31 +1,29 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_cursor.c,v 11.88 2001/01/11 18:19:49 bostic Exp $"; +static const char revid[] = "$Id: bt_cursor.c,v 11.147 2002/08/13 20:46:07 ubell Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <stdlib.h> #include <string.h> #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "btree.h" -#include "lock.h" -#include "qam.h" -#include "common_ext.h" +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +static int __bam_bulk __P((DBC *, DBT *, u_int32_t)); static int __bam_c_close __P((DBC *, db_pgno_t, int *)); static int __bam_c_del __P((DBC *)); static int __bam_c_destroy __P((DBC *)); @@ -33,15 +31,16 @@ static int __bam_c_first __P((DBC *)); static int __bam_c_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); static int __bam_c_getstack __P((DBC *)); static int __bam_c_last __P((DBC *)); -static int __bam_c_next __P((DBC *, int)); +static int __bam_c_next __P((DBC *, int, int)); static int __bam_c_physdel __P((DBC *)); static int __bam_c_prev __P((DBC *)); static int __bam_c_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); -static void __bam_c_reset __P((BTREE_CURSOR *)); -static int __bam_c_search __P((DBC *, const DBT *, u_int32_t, int *)); +static int __bam_c_search __P((DBC *, + db_pgno_t, const DBT *, u_int32_t, int *)); static int __bam_c_writelock __P((DBC *)); -static int __bam_getboth_finddatum __P((DBC *, DBT *)); +static int __bam_getboth_finddatum __P((DBC *, DBT *, u_int32_t)); static int __bam_getbothc __P((DBC *, DBT *)); +static int __bam_get_prev __P((DBC *)); static int __bam_isopd __P((DBC *, db_pgno_t *)); /* @@ -53,48 +52,60 @@ static int __bam_isopd __P((DBC *, db_pgno_t *)); * don't -- we don't duplicate locks when we duplicate cursors if we are * running in a transaction environment as there's no point if locks are * never discarded. This means that the cursor may or may not hold a lock. + * In the case where we are decending the tree we always want to + * unlock the held interior page so we use ACQUIRE_COUPLE. */ #undef ACQUIRE -#define ACQUIRE(dbc, mode, lpgno, lock, fpgno, pagep, ret) {\ +#define ACQUIRE(dbc, mode, lpgno, lock, fpgno, pagep, ret) { \ + DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \ if ((pagep) != NULL) { \ - ret = memp_fput((dbc)->dbp->mpf, pagep, 0); \ + ret = __mpf->put(__mpf, pagep, 0); \ + pagep = NULL; \ + } else \ + ret = 0; \ + if ((ret) == 0 && STD_LOCKING(dbc)) \ + ret = __db_lget(dbc, LCK_COUPLE, lpgno, mode, 0, &(lock));\ + if ((ret) == 0) \ + ret = __mpf->get(__mpf, &(fpgno), 0, &(pagep)); \ +} + +#undef ACQUIRE_COUPLE +#define ACQUIRE_COUPLE(dbc, mode, lpgno, lock, fpgno, pagep, ret) { \ + DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \ + if ((pagep) != NULL) { \ + ret = __mpf->put(__mpf, pagep, 0); \ pagep = NULL; \ } else \ ret = 0; \ if ((ret) == 0 && STD_LOCKING(dbc)) \ ret = __db_lget(dbc, \ - (lock).off == LOCK_INVALID ? 0 : LCK_COUPLE, \ - lpgno, mode, 0, &lock); \ - else \ - (lock).off = LOCK_INVALID; \ + LCK_COUPLE_ALWAYS, lpgno, mode, 0, &(lock)); \ if ((ret) == 0) \ - ret = memp_fget((dbc)->dbp->mpf, &(fpgno), 0, &(pagep));\ + ret = __mpf->get(__mpf, &(fpgno), 0, &(pagep)); \ } /* Acquire a new page/lock for a cursor. */ #undef ACQUIRE_CUR -#define ACQUIRE_CUR(dbc, mode, ret) { \ +#define ACQUIRE_CUR(dbc, mode, p, ret) { \ BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal; \ - ACQUIRE(dbc, mode, \ - __cp->pgno, __cp->lock, __cp->pgno, __cp->page, ret); \ - if ((ret) == 0) \ + ACQUIRE(dbc, mode, p, __cp->lock, p, __cp->page, ret); \ + if ((ret) == 0) { \ + __cp->pgno = p; \ __cp->lock_mode = (mode); \ + } \ } /* - * Acquire a new page/lock for a cursor, and move the cursor on success. - * The reason that this is a separate macro is because we don't want to - * set the pgno/indx fields in the cursor until we actually have the lock, - * otherwise the cursor adjust routines will adjust the cursor even though - * we're not really on the page. + * Acquire a new page/lock for a cursor and release the previous. + * This is typically used when decending a tree and we do not + * want to hold the interior nodes locked. */ -#undef ACQUIRE_CUR_SET -#define ACQUIRE_CUR_SET(dbc, mode, p, ret) { \ +#undef ACQUIRE_CUR_COUPLE +#define ACQUIRE_CUR_COUPLE(dbc, mode, p, ret) { \ BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal; \ - ACQUIRE(dbc, mode, p, __cp->lock, p, __cp->page, ret); \ + ACQUIRE_COUPLE(dbc, mode, p, __cp->lock, p, __cp->page, ret); \ if ((ret) == 0) { \ - __cp->pgno = p; \ - __cp->indx = 0; \ + __cp->pgno = p; \ __cp->lock_mode = (mode); \ } \ } @@ -112,7 +123,7 @@ static int __bam_isopd __P((DBC *, db_pgno_t *)); if (STD_LOCKING(dbc) && \ __cp->lock_mode != DB_LOCK_WRITE && \ ((ret) = __db_lget(dbc, \ - __cp->lock.off == LOCK_INVALID ? 0 : LCK_COUPLE, \ + LOCK_ISSET(__cp->lock) ? LCK_COUPLE : 0, \ __cp->pgno, DB_LOCK_WRITE, 0, &__cp->lock)) == 0) \ __cp->lock_mode = DB_LOCK_WRITE; \ } @@ -120,19 +131,19 @@ static int __bam_isopd __P((DBC *, db_pgno_t *)); /* Discard the current page/lock. */ #undef DISCARD #define DISCARD(dbc, ldiscard, lock, pagep, ret) { \ + DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \ int __t_ret; \ if ((pagep) != NULL) { \ - ret = memp_fput((dbc)->dbp->mpf, pagep, 0); \ + ret = __mpf->put(__mpf, pagep, 0); \ pagep = NULL; \ } else \ ret = 0; \ - if ((lock).off != LOCK_INVALID) { \ - __t_ret = ldiscard ? \ - __LPUT((dbc), lock): __TLPUT((dbc), lock); \ - if (__t_ret != 0 && (ret) == 0) \ - ret = __t_ret; \ - (lock).off = LOCK_INVALID; \ - } \ + if (ldiscard) \ + __t_ret = __LPUT((dbc), lock); \ + else \ + __t_ret = __TLPUT((dbc), lock); \ + if (__t_ret != 0 && (ret) == 0) \ + ret = __t_ret; \ } /* Discard the current page/lock for a cursor. */ @@ -146,12 +157,12 @@ static int __bam_isopd __P((DBC *, db_pgno_t *)); /* If on-page item is a deleted record. */ #undef IS_DELETED -#define IS_DELETED(page, indx) \ - B_DISSET(GET_BKEYDATA(page, \ +#define IS_DELETED(dbp, page, indx) \ + B_DISSET(GET_BKEYDATA(dbp, page, \ (indx) + (TYPE(page) == P_LBTREE ? O_INDX : 0))->type) #undef IS_CUR_DELETED #define IS_CUR_DELETED(dbc) \ - IS_DELETED((dbc)->internal->page, (dbc)->internal->indx) + IS_DELETED((dbc)->dbp, (dbc)->internal->page, (dbc)->internal->indx) /* * Test to see if two cursors could point to duplicates of the same key. @@ -163,8 +174,8 @@ static int __bam_isopd __P((DBC *, db_pgno_t *)); */ #undef IS_DUPLICATE #define IS_DUPLICATE(dbc, i1, i2) \ - (((PAGE *)(dbc)->internal->page)->inp[i1] == \ - ((PAGE *)(dbc)->internal->page)->inp[i2]) + (P_INP((dbc)->dbp,((PAGE *)(dbc)->internal->page))[i1] == \ + P_INP((dbc)->dbp,((PAGE *)(dbc)->internal->page))[i2]) #undef IS_CUR_DUPLICATE #define IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx) \ (F_ISSET(dbc, DBC_OPD) || \ @@ -172,22 +183,6 @@ static int __bam_isopd __P((DBC *, db_pgno_t *)); IS_DUPLICATE(dbc, (dbc)->internal->indx, orig_indx))) /* - * __bam_c_reset -- - * Initialize internal cursor structure. - */ -static void -__bam_c_reset(cp) - BTREE_CURSOR *cp; -{ - cp->csp = cp->sp; - cp->lock.off = LOCK_INVALID; - cp->lock_mode = DB_LOCK_NG; - cp->recno = RECNO_OOB; - cp->order = INVALID_ORDER; - cp->flags = 0; -} - -/* * __bam_c_init -- * Initialize the access private portion of a cursor * @@ -198,35 +193,26 @@ __bam_c_init(dbc, dbtype) DBC *dbc; DBTYPE dbtype; { - BTREE *t; - BTREE_CURSOR *cp; - DB *dbp; + DB_ENV *dbenv; int ret; - u_int32_t minkey; - dbp = dbc->dbp; + dbenv = dbc->dbp->dbenv; /* Allocate/initialize the internal structure. */ - if (dbc->internal == NULL) { - if ((ret = __os_malloc(dbp->dbenv, - sizeof(BTREE_CURSOR), NULL, &cp)) != 0) - return (ret); - dbc->internal = (DBC_INTERNAL *)cp; - - cp->sp = cp->csp = cp->stack; - cp->esp = cp->stack + sizeof(cp->stack) / sizeof(cp->stack[0]); - } else - cp = (BTREE_CURSOR *)dbc->internal; - __bam_c_reset(cp); + if (dbc->internal == NULL && (ret = + __os_malloc(dbenv, sizeof(BTREE_CURSOR), &dbc->internal)) != 0) + return (ret); /* Initialize methods. */ dbc->c_close = __db_c_close; dbc->c_count = __db_c_count; dbc->c_del = __db_c_del; dbc->c_dup = __db_c_dup; - dbc->c_get = __db_c_get; + dbc->c_get = dbc->c_real_get = __db_c_get; + dbc->c_pget = __db_c_pget; dbc->c_put = __db_c_put; if (dbtype == DB_BTREE) { + dbc->c_am_bulk = __bam_bulk; dbc->c_am_close = __bam_c_close; dbc->c_am_del = __bam_c_del; dbc->c_am_destroy = __bam_c_destroy; @@ -234,6 +220,7 @@ __bam_c_init(dbc, dbtype) dbc->c_am_put = __bam_c_put; dbc->c_am_writelock = __bam_c_writelock; } else { + dbc->c_am_bulk = __bam_bulk; dbc->c_am_close = __bam_c_close; dbc->c_am_del = __ram_c_del; dbc->c_am_destroy = __bam_c_destroy; @@ -242,18 +229,6 @@ __bam_c_init(dbc, dbtype) dbc->c_am_writelock = __bam_c_writelock; } - /* - * The btree leaf page data structures require that two key/data pairs - * (or four items) fit on a page, but other than that there's no fixed - * requirement. The btree off-page duplicates only require two items, - * to be exact, but requiring four for them as well seems reasonable. - * - * Recno uses the btree bt_ovflsize value -- it's close enough. - */ - t = dbp->bt_internal; - minkey = F_ISSET(dbc, DBC_OPD) ? 2 : t->bt_minkey; - cp->ovflsize = B_MINKEY_TO_OVFLSIZE(minkey, dbp->pgsize); - return (0); } @@ -267,12 +242,13 @@ int __bam_c_refresh(dbc) DBC *dbc; { + BTREE *t; BTREE_CURSOR *cp; DB *dbp; dbp = dbc->dbp; + t = dbp->bt_internal; cp = (BTREE_CURSOR *)dbc->internal; - __bam_c_reset(cp); /* * If our caller set the root page number, it's because the root was @@ -280,11 +256,32 @@ __bam_c_refresh(dbc) * pull it out of our internal information. */ if (cp->root == PGNO_INVALID) - cp->root = ((BTREE *)dbp->bt_internal)->bt_root; + cp->root = t->bt_root; + + LOCK_INIT(cp->lock); + cp->lock_mode = DB_LOCK_NG; + + cp->sp = cp->csp = cp->stack; + cp->esp = cp->stack + sizeof(cp->stack) / sizeof(cp->stack[0]); + + /* + * The btree leaf page data structures require that two key/data pairs + * (or four items) fit on a page, but other than that there's no fixed + * requirement. The btree off-page duplicates only require two items, + * to be exact, but requiring four for them as well seems reasonable. + * + * Recno uses the btree bt_ovflsize value -- it's close enough. + */ + cp->ovflsize = B_MINKEY_TO_OVFLSIZE( + dbp, F_ISSET(dbc, DBC_OPD) ? 2 : t->bt_minkey, dbp->pgsize); + + cp->recno = RECNO_OOB; + cp->order = INVALID_ORDER; + cp->flags = 0; /* Initialize for record numbers. */ if (F_ISSET(dbc, DBC_OPD) || - dbc->dbtype == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM)) { + dbc->dbtype == DB_RECNO || F_ISSET(dbp, DB_AM_RECNUM)) { F_SET(cp, C_RECNUM); /* @@ -293,7 +290,7 @@ __bam_c_refresh(dbc) * mutable record numbers. */ if ((F_ISSET(dbc, DBC_OPD) && dbc->dbtype == DB_RECNO) || - F_ISSET(dbp, DB_BT_RECNUM | DB_RE_RENUMBER)) + F_ISSET(dbp, DB_AM_RECNUM | DB_AM_RENUMBER)) F_SET(cp, C_RENUMBER); } @@ -313,11 +310,12 @@ __bam_c_close(dbc, root_pgno, rmroot) BTREE_CURSOR *cp, *cp_opd, *cp_c; DB *dbp; DBC *dbc_opd, *dbc_c; + DB_MPOOLFILE *mpf; PAGE *h; - u_int32_t num; int cdb_lock, ret, t_ret; dbp = dbc->dbp; + mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; cp_opd = (dbc_opd = cp->opd) == NULL ? NULL : (BTREE_CURSOR *)dbc_opd->internal; @@ -408,10 +406,10 @@ __bam_c_close(dbc, root_pgno, rmroot) * We will not have been provided a root page number. Acquire * one from the primary database. */ - if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &h)) != 0) + if ((ret = mpf->get(mpf, &cp->pgno, 0, &h)) != 0) goto err; - root_pgno = GET_BOVERFLOW(h, cp->indx + O_INDX)->pgno; - if ((ret = memp_fput(dbp->mpf, h, 0)) != 0) + root_pgno = GET_BOVERFLOW(dbp, h, cp->indx + O_INDX)->pgno; + if ((ret = mpf->put(mpf, h, 0)) != 0) goto err; dbc_c = dbc_opd; @@ -453,18 +451,14 @@ lock: cp_c = (BTREE_CURSOR *)dbc_c->internal; * info in __db_c_get--the OPD is also a WRITEDUP. */ if (CDB_LOCKING(dbp->dbenv)) { - DB_ASSERT(!F_ISSET(dbc, DBC_OPD) || F_ISSET(dbc, DBC_WRITEDUP)); - if (!F_ISSET(dbc, DBC_WRITER)) { - if ((ret = - lock_get(dbp->dbenv, dbc->locker, DB_LOCK_UPGRADE, + if (F_ISSET(dbc, DBC_WRITEDUP | DBC_WRITECURSOR)) { + if ((ret = dbp->dbenv->lock_get( + dbp->dbenv, dbc->locker, DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE, &dbc->mylock)) != 0) goto err; cdb_lock = 1; } - - cp_c->lock.off = LOCK_INVALID; - if ((ret = - memp_fget(dbp->mpf, &cp_c->pgno, 0, &cp_c->page)) != 0) + if ((ret = mpf->get(mpf, &cp_c->pgno, 0, &cp_c->page)) != 0) goto err; goto delete; @@ -480,9 +474,7 @@ lock: cp_c = (BTREE_CURSOR *)dbc_c->internal; * is responsible for acquiring any necessary locks before calling us. */ if (F_ISSET(dbc, DBC_OPD)) { - cp_c->lock.off = LOCK_INVALID; - if ((ret = - memp_fget(dbp->mpf, &cp_c->pgno, 0, &cp_c->page)) != 0) + if ((ret = mpf->get(mpf, &cp_c->pgno, 0, &cp_c->page)) != 0) goto err; goto delete; } @@ -542,13 +534,13 @@ delete: /* * in that case. So, if the off-page duplicate tree is empty at this * point, we want to remove it. */ - if ((ret = memp_fget(dbp->mpf, &root_pgno, 0, &h)) != 0) + if ((ret = mpf->get(mpf, &root_pgno, 0, &h)) != 0) goto err; - if ((num = NUM_ENT(h)) == 0) { + if (NUM_ENT(h) == 0) { if ((ret = __db_free(dbc, h)) != 0) goto err; } else { - if ((ret = memp_fput(dbp->mpf, h, 0)) != 0) + if ((ret = mpf->put(mpf, h, 0)) != 0) goto err; goto done; } @@ -566,8 +558,7 @@ delete: /* * the primary page. */ if (dbc_opd != NULL) { - cp->lock.off = LOCK_INVALID; - if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0) + if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0) goto err; if ((ret = __bam_c_physdel(dbc)) != 0) goto err; @@ -604,7 +595,7 @@ __bam_c_destroy(dbc) DBC *dbc; { /* Discard the structures. */ - __os_free(dbc->internal, sizeof(BTREE_CURSOR)); + __os_free(dbc->dbp->dbenv, dbc->internal); return (0); } @@ -622,11 +613,13 @@ __bam_c_count(dbc, recnop) { BTREE_CURSOR *cp; DB *dbp; + DB_MPOOLFILE *mpf; db_indx_t indx, top; db_recno_t recno; int ret; dbp = dbc->dbp; + mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; /* @@ -637,7 +630,7 @@ __bam_c_count(dbc, recnop) * new locks, we have to have a read lock to even get here. */ if (cp->opd == NULL) { - if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0) + if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0) return (ret); /* @@ -654,14 +647,14 @@ __bam_c_count(dbc, recnop) break; *recnop = recno; } else { - if ((ret = memp_fget(dbp->mpf, - &cp->opd->internal->root, 0, &cp->page)) != 0) + if ((ret = + mpf->get(mpf, &cp->opd->internal->root, 0, &cp->page)) != 0) return (ret); *recnop = RE_NREC(cp->page); } - ret = memp_fput(dbp->mpf, cp->page, 0); + ret = mpf->put(mpf, cp->page, 0); cp->page = NULL; return (ret); @@ -677,9 +670,11 @@ __bam_c_del(dbc) { BTREE_CURSOR *cp; DB *dbp; + DB_MPOOLFILE *mpf; int ret, t_ret; dbp = dbc->dbp; + mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; ret = 0; @@ -706,25 +701,27 @@ __bam_c_del(dbc) goto err; cp->page = cp->csp->page; } else { - ACQUIRE_CUR(dbc, DB_LOCK_WRITE, ret); + ACQUIRE_CUR(dbc, DB_LOCK_WRITE, cp->pgno, ret); if (ret != 0) goto err; } /* Log the change. */ - if (DB_LOGGING(dbc) && - (ret = __bam_cdel_log(dbp->dbenv, dbc->txn, &LSN(cp->page), 0, - dbp->log_fileid, PGNO(cp->page), &LSN(cp->page), cp->indx)) != 0) - goto err; + if (DBC_LOGGING(dbc)) { + if ((ret = __bam_cdel_log(dbp, dbc->txn, &LSN(cp->page), 0, + PGNO(cp->page), &LSN(cp->page), cp->indx)) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(cp->page)); /* Set the intent-to-delete flag on the page. */ if (TYPE(cp->page) == P_LBTREE) - B_DSET(GET_BKEYDATA(cp->page, cp->indx + O_INDX)->type); + B_DSET(GET_BKEYDATA(dbp, cp->page, cp->indx + O_INDX)->type); else - B_DSET(GET_BKEYDATA(cp->page, cp->indx)->type); + B_DSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type); /* Mark the page dirty. */ - ret = memp_fset(dbp->mpf, cp->page, DB_MPOOL_DIRTY); + ret = mpf->set(mpf, cp->page, DB_MPOOL_DIRTY); err: /* * If we've been successful so far and the tree has record numbers, @@ -736,7 +733,7 @@ err: /* (void)__bam_stkrel(dbc, 0); } else if (cp->page != NULL && - (t_ret = memp_fput(dbp->mpf, cp->page, 0)) != 0 && ret == 0) + (t_ret = mpf->put(mpf, cp->page, 0)) != 0 && ret == 0) ret = t_ret; cp->page = NULL; @@ -771,7 +768,7 @@ __bam_c_dup(orig_dbc, new_dbc) * holding inside a transaction because all the locks are retained * until the transaction commits or aborts. */ - if (orig->lock.off != LOCK_INVALID && orig_dbc->txn == NULL) { + if (LOCK_ISSET(orig->lock) && orig_dbc->txn == NULL) { if ((ret = __db_lget(new_dbc, 0, new->pgno, new->lock_mode, 0, &new->lock)) != 0) return (ret); @@ -796,11 +793,13 @@ __bam_c_get(dbc, key, data, flags, pgnop) { BTREE_CURSOR *cp; DB *dbp; + DB_MPOOLFILE *mpf; db_pgno_t orig_pgno; db_indx_t orig_indx; int exact, newopd, ret; dbp = dbc->dbp; + mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; orig_pgno = cp->pgno; orig_indx = cp->indx; @@ -820,7 +819,7 @@ __bam_c_get(dbc, key, data, flags, pgnop) * write lock, but upgrading to a write lock has no better * chance of succeeding now instead of later, so don't try. */ - if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0) + if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0) goto err; break; case DB_FIRST: @@ -829,9 +828,10 @@ __bam_c_get(dbc, key, data, flags, pgnop) goto err; break; case DB_GET_BOTH: + case DB_GET_BOTH_RANGE: /* * There are two ways to get here based on DBcursor->c_get - * with the DB_GET_BOTH flag set: + * with the DB_GET_BOTH/DB_GET_BOTH_RANGE flags set: * * 1. Searching a sorted off-page duplicate tree: do a tree * search. @@ -839,20 +839,34 @@ __bam_c_get(dbc, key, data, flags, pgnop) * 2. Searching btree: do a tree search. If it returns a * reference to off-page duplicate tree, return immediately * and let our caller deal with it. If the search doesn't - * return a reference to off-page duplicate tree, start an - * on-page search. + * return a reference to off-page duplicate tree, continue + * with an on-page search. */ if (F_ISSET(dbc, DBC_OPD)) { if ((ret = __bam_c_search( - dbc, data, DB_GET_BOTH, &exact)) != 0) - goto err; - if (!exact) { - ret = DB_NOTFOUND; + dbc, PGNO_INVALID, data, flags, &exact)) != 0) goto err; + if (flags == DB_GET_BOTH) { + if (!exact) { + ret = DB_NOTFOUND; + goto err; + } + break; } + + /* + * We didn't require an exact match, so the search may + * may have returned an entry past the end of the page, + * or we may be referencing a deleted record. If so, + * move to the next entry. + */ + if ((cp->indx == NUM_ENT(cp->page) || + IS_CUR_DELETED(dbc)) && + (ret = __bam_c_next(dbc, 1, 0)) != 0) + goto err; } else { if ((ret = __bam_c_search( - dbc, key, DB_GET_BOTH, &exact)) != 0) + dbc, PGNO_INVALID, key, flags, &exact)) != 0) return (ret); if (!exact) { ret = DB_NOTFOUND; @@ -863,7 +877,8 @@ __bam_c_get(dbc, key, data, flags, pgnop) newopd = 1; break; } - if ((ret = __bam_getboth_finddatum(dbc, data)) != 0) + if ((ret = + __bam_getboth_finddatum(dbc, data, flags)) != 0) goto err; } break; @@ -882,11 +897,11 @@ __bam_c_get(dbc, key, data, flags, pgnop) if ((ret = __bam_c_first(dbc)) != 0) goto err; } else - if ((ret = __bam_c_next(dbc, 1)) != 0) + if ((ret = __bam_c_next(dbc, 1, 0)) != 0) goto err; break; case DB_NEXT_DUP: - if ((ret = __bam_c_next(dbc, 1)) != 0) + if ((ret = __bam_c_next(dbc, 1, 0)) != 0) goto err; if (!IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx)) { ret = DB_NOTFOUND; @@ -900,7 +915,7 @@ __bam_c_get(dbc, key, data, flags, pgnop) goto err; } else do { - if ((ret = __bam_c_next(dbc, 1)) != 0) + if ((ret = __bam_c_next(dbc, 1, 0)) != 0) goto err; } while (IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx)); break; @@ -927,12 +942,14 @@ __bam_c_get(dbc, key, data, flags, pgnop) case DB_SET: case DB_SET_RECNO: newopd = 1; - if ((ret = __bam_c_search(dbc, key, flags, &exact)) != 0) + if ((ret = __bam_c_search(dbc, + PGNO_INVALID, key, flags, &exact)) != 0) goto err; break; case DB_SET_RANGE: newopd = 1; - if ((ret = __bam_c_search(dbc, key, flags, &exact)) != 0) + if ((ret = __bam_c_search(dbc, + PGNO_INVALID, key, flags, &exact)) != 0) goto err; /* @@ -942,7 +959,7 @@ __bam_c_get(dbc, key, data, flags, pgnop) * the next entry. */ if (cp->indx == NUM_ENT(cp->page) || IS_CUR_DELETED(dbc)) - if ((ret = __bam_c_next(dbc, 0)) != 0) + if ((ret = __bam_c_next(dbc, 0, 0)) != 0) goto err; break; default: @@ -957,8 +974,15 @@ __bam_c_get(dbc, key, data, flags, pgnop) if (newopd && pgnop != NULL) (void)__bam_isopd(dbc, pgnop); - /* Don't return the key, it was passed to us */ - if (flags == DB_SET) + /* + * Don't return the key, it was passed to us (this is true even if the + * application defines a compare function returning equality for more + * than one key value, since in that case which actual value we store + * in the database is undefined -- and particularly true in the case of + * duplicates where we only store one key value). + */ + if (flags == DB_GET_BOTH || + flags == DB_GET_BOTH_RANGE || flags == DB_SET) F_SET(key, DB_DBT_ISSET); err: /* @@ -966,13 +990,596 @@ err: /* * moved, clear the delete flag, DBcursor->c_get never references * a deleted key, if it moved at all. */ - if (F_ISSET(cp, C_DELETED) - && (cp->pgno != orig_pgno || cp->indx != orig_indx)) + if (F_ISSET(cp, C_DELETED) && + (cp->pgno != orig_pgno || cp->indx != orig_indx)) F_CLR(cp, C_DELETED); return (ret); } +static int +__bam_get_prev(dbc) + DBC *dbc; +{ + BTREE_CURSOR *cp; + DBT key, data; + db_pgno_t pgno; + int ret; + + if ((ret = __bam_c_prev(dbc)) != 0) + return (ret); + + if (__bam_isopd(dbc, &pgno)) { + cp = (BTREE_CURSOR *)dbc->internal; + if ((ret = __db_c_newopd(dbc, pgno, cp->opd, &cp->opd)) != 0) + return (ret); + if ((ret = cp->opd->c_am_get(cp->opd, + &key, &data, DB_LAST, NULL)) != 0) + return (ret); + } + + return (0); +} + +/* + * __bam_bulk -- Return bulk data from a btree. + */ +static int +__bam_bulk(dbc, data, flags) + DBC *dbc; + DBT *data; + u_int32_t flags; +{ + BKEYDATA *bk; + BOVERFLOW *bo; + BTREE_CURSOR *cp; + PAGE *pg; + db_indx_t *inp, indx, pg_keyoff; + int32_t *endp, key_off, *offp, *saveoffp; + u_int8_t *dbuf, *dp, *np; + u_int32_t key_size, size, space; + int adj, is_key, need_pg, next_key, no_dup; + int pagesize, rec_key, ret; + + ret = 0; + key_off = 0; + size = 0; + pagesize = dbc->dbp->pgsize; + cp = (BTREE_CURSOR *)dbc->internal; + + /* + * dp tracks the beginging of the page in the buffer. + * np is the next place to copy things into the buffer. + * dbuf always stays at the beging of the buffer. + */ + dbuf = data->data; + np = dp = dbuf; + + /* Keep track of space that is left. There is a termination entry */ + space = data->ulen; + space -= sizeof(*offp); + + /* Build the offset/size table from the end up. */ + endp = (int32_t *)((u_int8_t *)dbuf + data->ulen); + endp--; + offp = endp; + + key_size = 0; + + /* + * Distinguish between BTREE and RECNO. + * There are no keys in RECNO. If MULTIPLE_KEY is specified + * then we return the record numbers. + * is_key indicates that multiple btree keys are returned. + * rec_key is set if we are returning record numbers. + * next_key is set if we are going after the next key rather than dup. + */ + if (dbc->dbtype == DB_BTREE) { + is_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1: 0; + rec_key = 0; + next_key = is_key && LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP; + adj = 2; + } else { + is_key = 0; + rec_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1 : 0; + next_key = LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP; + adj = 1; + } + no_dup = LF_ISSET(DB_OPFLAGS_MASK) == DB_NEXT_NODUP; + +next_pg: + indx = cp->indx; + pg = cp->page; + + inp = P_INP(dbc->dbp, pg); + /* The current page is not yet in the buffer. */ + need_pg = 1; + + /* + * Keep track of the offset of the current key on the page. + * If we are returning keys, set it to 0 first so we force + * the copy of the key to the buffer. + */ + pg_keyoff = 0; + if (is_key == 0) + pg_keyoff = inp[indx]; + + do { + if (IS_DELETED(dbc->dbp, pg, indx)) { + if (dbc->dbtype != DB_RECNO) + continue; + + cp->recno++; + /* + * If we are not returning recnos then we + * need to fill in every slot so the user + * can calculate the record numbers. + */ + if (rec_key != 0) + continue; + + space -= 2 * sizeof(*offp); + /* Check if space as underflowed. */ + if (space > data->ulen) + goto back_up; + + /* Just mark the empty recno slots. */ + *offp-- = 0; + *offp-- = 0; + continue; + } + + /* + * Check to see if we have a new key. + * If so, then see if we need to put the + * key on the page. If its already there + * then we just point to it. + */ + if (is_key && pg_keyoff != inp[indx]) { + bk = GET_BKEYDATA(dbc->dbp, pg, indx); + if (B_TYPE(bk->type) == B_OVERFLOW) { + bo = (BOVERFLOW *)bk; + size = key_size = bo->tlen; + if (key_size > space) + goto get_key_space; + if ((ret = __bam_bulk_overflow(dbc, + bo->tlen, bo->pgno, np)) != 0) + return (ret); + space -= key_size; + key_off = (int32_t)(np - dbuf); + np += key_size; + } else { + if (need_pg) { + dp = np; + size = pagesize - HOFFSET(pg); + if (space < size) { +get_key_space: + /* Nothing added, then error. */ + if (offp == endp) { + data->size = + ALIGN(size + + pagesize, + sizeof(u_int32_t)); + return (ENOMEM); + } + /* + * We need to back up to the + * last record put into the + * buffer so that it is + * CURRENT. + */ + if (indx != 0) + indx -= P_INDX; + else { + if ((ret = + __bam_get_prev( + dbc)) != 0) + return (ret); + indx = cp->indx; + pg = cp->page; + } + break; + } + /* + * Move the data part of the page + * to the buffer. + */ + memcpy(dp, + (u_int8_t *)pg + HOFFSET(pg), size); + need_pg = 0; + space -= size; + np += size; + } + key_size = bk->len; + key_off = (int32_t)(inp[indx] - HOFFSET(pg) + + dp - dbuf + SSZA(BKEYDATA, data)); + pg_keyoff = inp[indx]; + } + } + + /* + * Reserve space for the pointers and sizes. + * Either key/data pair or just for a data item. + */ + space -= (is_key ? 4 : 2) * sizeof(*offp); + if (rec_key) + space -= sizeof(*offp); + + /* Check to see if space has underflowed. */ + if (space > data->ulen) + goto back_up; + + /* + * Determine if the next record is in the + * buffer already or if it needs to be copied in. + * If we have an off page dup, then copy as many + * as will fit into the buffer. + */ + bk = GET_BKEYDATA(dbc->dbp, pg, indx + adj - 1); + if (B_TYPE(bk->type) == B_DUPLICATE) { + bo = (BOVERFLOW *)bk; + if (is_key) { + *offp-- = key_off; + *offp-- = key_size; + } + /* + * We pass the offset of the current key. + * On return we check to see if offp has + * moved to see if any data fit. + */ + saveoffp = offp; + if ((ret = __bam_bulk_duplicates(dbc, bo->pgno, + dbuf, is_key ? offp + P_INDX : NULL, + &offp, &np, &space, no_dup)) != 0) { + if (ret == ENOMEM) { + size = space; + /* If nothing was added, then error. */ + if (offp == saveoffp) { + offp += 2; + goto back_up; + } + goto get_space; + } + return (ret); + } + } else if (B_TYPE(bk->type) == B_OVERFLOW) { + bo = (BOVERFLOW *)bk; + size = bo->tlen; + if (size > space) + goto back_up; + if ((ret = + __bam_bulk_overflow(dbc, + bo->tlen, bo->pgno, np)) != 0) + return (ret); + space -= size; + if (is_key) { + *offp-- = key_off; + *offp-- = key_size; + } else if (rec_key) + *offp-- = cp->recno; + *offp-- = (int32_t)(np - dbuf); + np += size; + *offp-- = size; + } else { + if (need_pg) { + dp = np; + size = pagesize - HOFFSET(pg); + if (space < size) { +back_up: + /* + * Back up the index so that the + * last record in the buffer is CURRENT + */ + if (indx >= adj) + indx -= adj; + else { + if ((ret = + __bam_get_prev(dbc)) != 0 && + ret != DB_NOTFOUND) + return (ret); + indx = cp->indx; + pg = cp->page; + } + if (dbc->dbtype == DB_RECNO) + cp->recno--; +get_space: + /* + * See if we put anything in the + * buffer or if we are doing a DBP->get + * did we get all of the data. + */ + if (offp >= + (is_key ? &endp[-1] : endp) || + F_ISSET(dbc, DBC_TRANSIENT)) { + data->size = ALIGN(size + + data->ulen - space, + sizeof(u_int32_t)); + return (ENOMEM); + } + break; + } + memcpy(dp, (u_int8_t *)pg + HOFFSET(pg), size); + need_pg = 0; + space -= size; + np += size; + } + /* + * Add the offsets and sizes to the end of the buffer. + * First add the key info then the data info. + */ + if (is_key) { + *offp-- = key_off; + *offp-- = key_size; + } else if (rec_key) + *offp-- = cp->recno; + *offp-- = (int32_t)(inp[indx + adj - 1] - HOFFSET(pg) + + dp - dbuf + SSZA(BKEYDATA, data)); + *offp-- = bk->len; + } + if (dbc->dbtype == DB_RECNO) + cp->recno++; + else if (no_dup) { + while (indx + adj < NUM_ENT(pg) && + pg_keyoff == inp[indx + adj]) + indx += adj; + } + /* + * Stop when we either run off the page or we + * move to the next key and we are not returning mulitple keys. + */ + } while ((indx += adj) < NUM_ENT(pg) && + (next_key || pg_keyoff == inp[indx])); + + /* If we are off the page then try to the next page. */ + if (ret == 0 && next_key && indx >= NUM_ENT(pg)) { + cp->indx = indx; + ret = __bam_c_next(dbc, 0, 1); + if (ret == 0) + goto next_pg; + if (ret != DB_NOTFOUND) + return (ret); + } + + /* + * If we did a DBP->get we must error if we did not return + * all the data for the current key because there is + * no way to know if we did not get it all, nor any + * interface to fetch the balance. + */ + + if (ret == 0 && + F_ISSET(dbc, DBC_TRANSIENT) && pg_keyoff == inp[indx]) { + data->size = (data->ulen - space) + size; + return (ENOMEM); + } + /* + * Must leave the index pointing at the last record fetched. + * If we are not fetching keys, we may have stepped to the + * next key. + */ + if (next_key || pg_keyoff == inp[indx]) + cp->indx = indx; + else + cp->indx = indx - P_INDX; + + if (rec_key == 1) + *offp = (u_int32_t) RECNO_OOB; + else + *offp = (u_int32_t) -1; + return (0); +} + +/* + * __bam_bulk_overflow -- + * Dump overflow record into the buffer. + * The space requirements have already been checked. + * PUBLIC: int __bam_bulk_overflow + * PUBLIC: __P((DBC *, u_int32_t, db_pgno_t, u_int8_t *)); + */ +int +__bam_bulk_overflow(dbc, len, pgno, dp) + DBC *dbc; + u_int32_t len; + db_pgno_t pgno; + u_int8_t *dp; +{ + DBT dbt; + + memset(&dbt, 0, sizeof(dbt)); + F_SET(&dbt, DB_DBT_USERMEM); + dbt.ulen = len; + dbt.data = (void *)dp; + return (__db_goff(dbc->dbp, &dbt, len, pgno, NULL, NULL)); +} + +/* + * __bam_bulk_duplicates -- + * Put as many off page duplicates as will fit into the buffer. + * This routine will adjust the cursor to reflect the position in + * the overflow tree. + * PUBLIC: int __bam_bulk_duplicates __P((DBC *, + * PUBLIC: db_pgno_t, u_int8_t *, int32_t *, + * PUBLIC: int32_t **, u_int8_t **, u_int32_t *, int)); + */ +int +__bam_bulk_duplicates(dbc, pgno, dbuf, keyoff, offpp, dpp, spacep, no_dup) + DBC *dbc; + db_pgno_t pgno; + u_int8_t *dbuf; + int32_t *keyoff, **offpp; + u_int8_t **dpp; + u_int32_t *spacep; + int no_dup; +{ + DB *dbp; + BKEYDATA *bk; + BOVERFLOW *bo; + BTREE_CURSOR *cp; + DBC *opd; + DBT key, data; + PAGE *pg; + db_indx_t indx, *inp; + int32_t *offp; + u_int32_t size, space; + u_int8_t *dp, *np; + int first, need_pg, pagesize, ret, t_ret; + + ret = 0; + + dbp = dbc->dbp; + cp = (BTREE_CURSOR *)dbc->internal; + opd = cp->opd; + + if (opd == NULL) { + if ((ret = __db_c_newopd(dbc, pgno, NULL, &opd)) != 0) + return (ret); + cp->opd = opd; + if ((ret = opd->c_am_get(opd, + &key, &data, DB_FIRST, NULL)) != 0) + return (ret); + } + + pagesize = opd->dbp->pgsize; + cp = (BTREE_CURSOR *)opd->internal; + space = *spacep; + /* Get current offset slot. */ + offp = *offpp; + + /* + * np is the next place to put data. + * dp is the begining of the current page in the buffer. + */ + np = dp = *dpp; + first = 1; + indx = cp->indx; + + do { + /* Fetch the current record. No initial move. */ + if ((ret = __bam_c_next(opd, 0, 0)) != 0) + break; + pg = cp->page; + indx = cp->indx; + inp = P_INP(dbp, pg); + /* We need to copy the page to the buffer. */ + need_pg = 1; + + do { + if (IS_DELETED(dbp, pg, indx)) + goto contin; + bk = GET_BKEYDATA(dbp, pg, indx); + space -= 2 * sizeof(*offp); + /* Allocate space for key if needed. */ + if (first == 0 && keyoff != NULL) + space -= 2 * sizeof(*offp); + + /* Did space underflow? */ + if (space > *spacep) { + ret = ENOMEM; + if (first == 1) { + space = *spacep + -(int32_t)space; + if (need_pg) + space += pagesize - HOFFSET(pg); + } + break; + } + if (B_TYPE(bk->type) == B_OVERFLOW) { + bo = (BOVERFLOW *)bk; + size = bo->tlen; + if (size > space) { + ret = ENOMEM; + if (first == 1) { + space = *spacep + size; + } + break; + } + if (first == 0 && keyoff != NULL) { + *offp-- = keyoff[0]; + *offp-- = keyoff[-1]; + } + if ((ret = __bam_bulk_overflow(dbc, + bo->tlen, bo->pgno, np)) != 0) + return (ret); + space -= size; + *offp-- = (int32_t)(np - dbuf); + np += size; + } else { + if (need_pg) { + dp = np; + size = pagesize - HOFFSET(pg); + if (space < size) { + ret = ENOMEM; + /* Return space required. */ + if (first == 1) { + space = *spacep + size; + } + break; + } + memcpy(dp, + (u_int8_t *)pg + HOFFSET(pg), size); + need_pg = 0; + space -= size; + np += size; + } + if (first == 0 && keyoff != NULL) { + *offp-- = keyoff[0]; + *offp-- = keyoff[-1]; + } + size = bk->len; + *offp-- = (int32_t)(inp[indx] - HOFFSET(pg) + + dp - dbuf + SSZA(BKEYDATA, data)); + } + *offp-- = size; + first = 0; + if (no_dup) + break; +contin: + indx++; + if (opd->dbtype == DB_RECNO) + cp->recno++; + } while (indx < NUM_ENT(pg)); + if (no_dup) + break; + cp->indx = indx; + + } while (ret == 0); + + /* Return the updated information. */ + *spacep = space; + *offpp = offp; + *dpp = np; + + /* + * If we ran out of space back up the pointer. + * If we did not return any dups or reached the end, close the opd. + */ + if (ret == ENOMEM) { + if (opd->dbtype == DB_RECNO) { + if (--cp->recno == 0) + goto close_opd; + } else if (indx != 0) + cp->indx--; + else { + t_ret = __bam_c_prev(opd); + if (t_ret == DB_NOTFOUND) + goto close_opd; + if (t_ret != 0) + ret = t_ret; + } + } else if (keyoff == NULL && ret == DB_NOTFOUND) { + cp->indx--; + if (opd->dbtype == DB_RECNO) + --cp->recno; + } else if (indx == 0 || ret == DB_NOTFOUND) { +close_opd: + opd->c_close(opd); + ((BTREE_CURSOR *)dbc->internal)->opd = NULL; + } + if (ret == DB_NOTFOUND) + ret = 0; + + return (ret); +} + /* * __bam_getbothc -- * Search for a matching data item on a join. @@ -984,9 +1591,11 @@ __bam_getbothc(dbc, data) { BTREE_CURSOR *cp; DB *dbp; + DB_MPOOLFILE *mpf; int cmp, exact, ret; dbp = dbc->dbp; + mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; /* @@ -995,7 +1604,7 @@ __bam_getbothc(dbc, data) * write lock, but upgrading to a write lock has no better * chance of succeeding now instead of later, so don't try. */ - if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0) + if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0) return (ret); /* @@ -1017,11 +1626,12 @@ __bam_getbothc(dbc, data) return (DB_NOTFOUND); /* Discard the current page, we're going to do a full search. */ - if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0) + if ((ret = mpf->put(mpf, cp->page, 0)) != 0) return (ret); cp->page = NULL; - return (__bam_c_search(dbc, data, DB_GET_BOTH, &exact)); + return (__bam_c_search(dbc, + PGNO_INVALID, data, DB_GET_BOTH, &exact)); } /* @@ -1038,7 +1648,7 @@ __bam_getbothc(dbc, data) return (DB_NOTFOUND); cp->indx += P_INDX; - return (__bam_getboth_finddatum(dbc, data)); + return (__bam_getboth_finddatum(dbc, data, DB_GET_BOTH)); } /* @@ -1046,9 +1656,10 @@ __bam_getbothc(dbc, data) * Find a matching on-page data item. */ static int -__bam_getboth_finddatum(dbc, data) +__bam_getboth_finddatum(dbc, data, flags) DBC *dbc; DBT *data; + u_int32_t flags; { BTREE_CURSOR *cp; DB *dbp; @@ -1060,17 +1671,14 @@ __bam_getboth_finddatum(dbc, data) /* * Called (sometimes indirectly) from DBC->get to search on-page data - * item(s) for a matching value. If the original flag was DB_GET_BOTH, - * the cursor argument is set to the first data item for the key. If - * the original flag was DB_GET_BOTHC, the cursor argument is set to - * the first data item that we can potentially return. In both cases, - * there may or may not be additional duplicate data items to search. + * item(s) for a matching value. If the original flag was DB_GET_BOTH + * or DB_GET_BOTH_RANGE, the cursor is set to the first undeleted data + * item for the key. If the original flag was DB_GET_BOTHC, the cursor + * argument is set to the first data item we can potentially return. + * In both cases, there may or may not be additional duplicate data + * items to search. * * If the duplicates are not sorted, do a linear search. - * - * If the duplicates are sorted, do a binary search. The reason for - * this is that large pages and small key/data pairs result in large - * numbers of on-page duplicates before they get pushed off-page. */ if (dbp->dup_compare == NULL) { for (;; cp->indx += P_INDX) { @@ -1085,41 +1693,62 @@ __bam_getboth_finddatum(dbc, data) !IS_DUPLICATE(dbc, cp->indx, cp->indx + P_INDX)) break; } - } else { - /* - * Find the top and bottom of the duplicate set. Binary search - * requires at least two items, don't loop if there's only one. - */ - for (base = top = cp->indx; - top < NUM_ENT(cp->page); top += P_INDX) - if (!IS_DUPLICATE(dbc, cp->indx, top)) - break; - if (base == (top - P_INDX)) { - if ((ret = __bam_cmp(dbp, data, - cp->page, cp->indx + O_INDX, - dbp->dup_compare, &cmp)) != 0) - return (ret); - return (cmp == 0 ? 0 : DB_NOTFOUND); - } + return (DB_NOTFOUND); + } - for (lim = - (top - base) / (db_indx_t)P_INDX; lim != 0; lim >>= 1) { - cp->indx = base + ((lim >> 1) * P_INDX); - if ((ret = __bam_cmp(dbp, data, cp->page, - cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0) - return (ret); - if (cmp == 0) { - if (!IS_CUR_DELETED(dbc)) - return (0); - break; - } - if (cmp > 0) { - base = cp->indx + P_INDX; - --lim; - } + /* + * If the duplicates are sorted, do a binary search. The reason for + * this is that large pages and small key/data pairs result in large + * numbers of on-page duplicates before they get pushed off-page. + * + * Find the top and bottom of the duplicate set. Binary search + * requires at least two items, don't loop if there's only one. + */ + for (base = top = cp->indx; top < NUM_ENT(cp->page); top += P_INDX) + if (!IS_DUPLICATE(dbc, cp->indx, top)) + break; + if (base == (top - P_INDX)) { + if ((ret = __bam_cmp(dbp, data, + cp->page, cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0) + return (ret); + return (cmp == 0 || + (cmp < 0 && flags == DB_GET_BOTH_RANGE) ? 0 : DB_NOTFOUND); + } + + for (lim = (top - base) / (db_indx_t)P_INDX; lim != 0; lim >>= 1) { + cp->indx = base + ((lim >> 1) * P_INDX); + if ((ret = __bam_cmp(dbp, data, cp->page, + cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0) + return (ret); + if (cmp == 0) { + /* + * XXX + * No duplicate duplicates in sorted duplicate sets, + * so there can be only one. + */ + if (!IS_CUR_DELETED(dbc)) + return (0); + break; + } + if (cmp > 0) { + base = cp->indx + P_INDX; + --lim; } } - return (DB_NOTFOUND); + + /* No match found; if we're looking for an exact match, we're done. */ + if (flags == DB_GET_BOTH) + return (DB_NOTFOUND); + + /* + * Base is the smallest index greater than the data item, may be zero + * or a last + O_INDX index, and may be deleted. Find an undeleted + * item. + */ + cp->indx = base; + while (cp->indx < top && IS_CUR_DELETED(dbc)) + cp->indx += P_INDX; + return (cp->indx < top ? 0 : DB_NOTFOUND); } /* @@ -1136,19 +1765,22 @@ __bam_c_put(dbc, key, data, flags, pgnop) BTREE_CURSOR *cp; DB *dbp; DBT dbt; + DB_MPOOLFILE *mpf; + db_pgno_t root_pgno; u_int32_t iiop; - int cmp, exact, needkey, ret, stack; + int cmp, exact, ret, stack; void *arg; dbp = dbc->dbp; + mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; + root_pgno = cp->root; -split: needkey = ret = stack = 0; +split: ret = stack = 0; switch (flags) { case DB_AFTER: case DB_BEFORE: case DB_CURRENT: - needkey = 1; iiop = flags; /* @@ -1182,7 +1814,7 @@ split: needkey = ret = stack = 0; ACQUIRE_WRITE_LOCK(dbc, ret); if (ret != 0) goto err; - if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0) + if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0) goto err; break; case DB_KEYFIRST: @@ -1192,15 +1824,22 @@ split: needkey = ret = stack = 0; * Searching off-page, sorted duplicate tree: do a tree search * for the correct item; __bam_c_search returns the smallest * slot greater than the key, use it. + * + * See comment below regarding where we can start the search. */ if (F_ISSET(dbc, DBC_OPD)) { - if ((ret = - __bam_c_search(dbc, data, flags, &exact)) != 0) + if ((ret = __bam_c_search(dbc, + F_ISSET(cp, C_RECNUM) ? cp->root : root_pgno, + data, flags, &exact)) != 0) goto err; stack = 1; /* Disallow "sorted" duplicate duplicates. */ if (exact) { + if (IS_DELETED(dbp, cp->page, cp->indx)) { + iiop = DB_CURRENT; + break; + } ret = __db_duperr(dbp, flags); goto err; } @@ -1208,8 +1847,17 @@ split: needkey = ret = stack = 0; break; } - /* Searching a btree. */ - if ((ret = __bam_c_search(dbc, key, + /* + * Searching a btree. + * + * If we've done a split, we can start the search from the + * parent of the split page, which __bam_split returned + * for us in root_pgno, unless we're in a Btree with record + * numbering. In that case, we'll need the true root page + * in order to adjust the record count. + */ + if ((ret = __bam_c_search(dbc, + F_ISSET(cp, C_RECNUM) ? cp->root : root_pgno, key, flags == DB_KEYFIRST || dbp->dup_compare != NULL ? DB_KEYFIRST : DB_KEYLAST, &exact)) != 0) goto err; @@ -1264,8 +1912,8 @@ split: needkey = ret = stack = 0; */ for (;; cp->indx += P_INDX) { if ((ret = __bam_cmp(dbp, data, cp->page, - cp->indx + O_INDX, dbp->dup_compare, &cmp)) !=0) - return (ret); + cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0) + goto err; if (cmp < 0) { iiop = DB_BEFORE; break; @@ -1273,7 +1921,7 @@ split: needkey = ret = stack = 0; /* Disallow "sorted" duplicate duplicates. */ if (cmp == 0) { - if (IS_DELETED(cp->page, cp->indx)) { + if (IS_DELETED(dbp, cp->page, cp->indx)) { iiop = DB_CURRENT; break; } @@ -1282,8 +1930,8 @@ split: needkey = ret = stack = 0; } if (cp->indx + P_INDX >= NUM_ENT(cp->page) || - ((PAGE *)cp->page)->inp[cp->indx] != - ((PAGE *)cp->page)->inp[cp->indx + P_INDX]) { + P_INP(dbp, ((PAGE *)cp->page))[cp->indx] != + P_INP(dbp, ((PAGE *)cp->page))[cp->indx + P_INDX]) { iiop = DB_AFTER; break; } @@ -1306,7 +1954,7 @@ split: needkey = ret = stack = 0; flags == DB_BEFORE || flags == DB_CURRENT) { memset(&dbt, 0, sizeof(DBT)); if ((ret = __db_ret(dbp, cp->page, 0, &dbt, - &dbc->rkey.data, &dbc->rkey.ulen)) != 0) + &dbc->rkey->data, &dbc->rkey->ulen)) != 0) goto err; arg = &dbt; } else @@ -1327,7 +1975,7 @@ split: needkey = ret = stack = 0; goto err; /* Split the tree. */ - if ((ret = __bam_split(dbc, arg)) != 0) + if ((ret = __bam_split(dbc, arg, &root_pgno)) != 0) return (ret); goto split; @@ -1361,22 +2009,22 @@ done: /* * __bam_c_rget -- * Return the record number for a cursor. * - * PUBLIC: int __bam_c_rget __P((DBC *, DBT *, u_int32_t)); + * PUBLIC: int __bam_c_rget __P((DBC *, DBT *)); */ int -__bam_c_rget(dbc, data, flags) +__bam_c_rget(dbc, data) DBC *dbc; DBT *data; - u_int32_t flags; { BTREE_CURSOR *cp; DB *dbp; DBT dbt; + DB_MPOOLFILE *mpf; db_recno_t recno; int exact, ret; - COMPQUIET(flags, 0); dbp = dbc->dbp; + mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; /* @@ -1384,24 +2032,24 @@ __bam_c_rget(dbc, data, flags) * Get a copy of the key. * Release the page, making sure we don't release it twice. */ - if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0) + if ((ret = mpf->get(mpf, &cp->pgno, 0, &cp->page)) != 0) return (ret); memset(&dbt, 0, sizeof(DBT)); if ((ret = __db_ret(dbp, cp->page, - cp->indx, &dbt, &dbc->rkey.data, &dbc->rkey.ulen)) != 0) + cp->indx, &dbt, &dbc->rkey->data, &dbc->rkey->ulen)) != 0) goto err; - ret = memp_fput(dbp->mpf, cp->page, 0); + ret = mpf->put(mpf, cp->page, 0); cp->page = NULL; if (ret != 0) return (ret); - if ((ret = __bam_search(dbc, &dbt, + if ((ret = __bam_search(dbc, PGNO_INVALID, &dbt, F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND, 1, &recno, &exact)) != 0) goto err; - ret = __db_retcopy(dbp, data, - &recno, sizeof(recno), &dbc->rdata.data, &dbc->rdata.ulen); + ret = __db_retcopy(dbp->dbenv, data, + &recno, sizeof(recno), &dbc->rdata->data, &dbc->rdata->ulen); /* Release the stack. */ err: __bam_stkrel(dbc, 0); @@ -1444,17 +2092,15 @@ __bam_c_first(dbc) DBC *dbc; { BTREE_CURSOR *cp; - DB *dbp; db_pgno_t pgno; int ret; - dbp = dbc->dbp; cp = (BTREE_CURSOR *)dbc->internal; ret = 0; /* Walk down the left-hand side of the tree. */ for (pgno = cp->root;;) { - ACQUIRE_CUR_SET(dbc, DB_LOCK_READ, pgno, ret); + ACQUIRE_CUR_COUPLE(dbc, DB_LOCK_READ, pgno, ret); if (ret != 0) return (ret); @@ -1462,7 +2108,7 @@ __bam_c_first(dbc) if (ISLEAF(cp->page)) break; - pgno = GET_BINTERNAL(cp->page, 0)->pgno; + pgno = GET_BINTERNAL(dbc->dbp, cp->page, 0)->pgno; } /* If we want a write lock instead of a read lock, get it now. */ @@ -1472,9 +2118,11 @@ __bam_c_first(dbc) return (ret); } + cp->indx = 0; + /* If on an empty page or a deleted record, move to the next one. */ if (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(dbc)) - if ((ret = __bam_c_next(dbc, 0)) != 0) + if ((ret = __bam_c_next(dbc, 0, 0)) != 0) return (ret); return (0); @@ -1489,17 +2137,15 @@ __bam_c_last(dbc) DBC *dbc; { BTREE_CURSOR *cp; - DB *dbp; db_pgno_t pgno; int ret; - dbp = dbc->dbp; cp = (BTREE_CURSOR *)dbc->internal; ret = 0; /* Walk down the right-hand side of the tree. */ for (pgno = cp->root;;) { - ACQUIRE_CUR_SET(dbc, DB_LOCK_READ, pgno, ret); + ACQUIRE_CUR_COUPLE(dbc, DB_LOCK_READ, pgno, ret); if (ret != 0) return (ret); @@ -1507,8 +2153,8 @@ __bam_c_last(dbc) if (ISLEAF(cp->page)) break; - pgno = - GET_BINTERNAL(cp->page, NUM_ENT(cp->page) - O_INDX)->pgno; + pgno = GET_BINTERNAL(dbc->dbp, cp->page, + NUM_ENT(cp->page) - O_INDX)->pgno; } /* If we want a write lock instead of a read lock, get it now. */ @@ -1535,18 +2181,16 @@ __bam_c_last(dbc) * Move to the next record. */ static int -__bam_c_next(dbc, initial_move) +__bam_c_next(dbc, initial_move, deleted_okay) DBC *dbc; - int initial_move; + int initial_move, deleted_okay; { BTREE_CURSOR *cp; - DB *dbp; db_indx_t adjust; db_lockmode_t lock_mode; db_pgno_t pgno; int ret; - dbp = dbc->dbp; cp = (BTREE_CURSOR *)dbc->internal; ret = 0; @@ -1566,7 +2210,7 @@ __bam_c_next(dbc, initial_move) F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE : DB_LOCK_READ; } if (cp->page == NULL) { - ACQUIRE_CUR(dbc, lock_mode, ret); + ACQUIRE_CUR(dbc, lock_mode, cp->pgno, ret); if (ret != 0) return (ret); } @@ -1587,12 +2231,13 @@ __bam_c_next(dbc, initial_move) = NEXT_PGNO(cp->page)) == PGNO_INVALID) return (DB_NOTFOUND); - ACQUIRE_CUR_SET(dbc, lock_mode, pgno, ret); + ACQUIRE_CUR(dbc, lock_mode, pgno, ret); if (ret != 0) return (ret); + cp->indx = 0; continue; } - if (IS_CUR_DELETED(dbc)) { + if (!deleted_okay && IS_CUR_DELETED(dbc)) { cp->indx += adjust; continue; } @@ -1610,13 +2255,11 @@ __bam_c_prev(dbc) DBC *dbc; { BTREE_CURSOR *cp; - DB *dbp; db_indx_t adjust; db_lockmode_t lock_mode; db_pgno_t pgno; int ret; - dbp = dbc->dbp; cp = (BTREE_CURSOR *)dbc->internal; ret = 0; @@ -1636,7 +2279,7 @@ __bam_c_prev(dbc) F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE : DB_LOCK_READ; } if (cp->page == NULL) { - ACQUIRE_CUR(dbc, lock_mode, ret); + ACQUIRE_CUR(dbc, lock_mode, cp->pgno, ret); if (ret != 0) return (ret); } @@ -1648,7 +2291,7 @@ __bam_c_prev(dbc) PREV_PGNO(cp->page)) == PGNO_INVALID) return (DB_NOTFOUND); - ACQUIRE_CUR_SET(dbc, lock_mode, pgno, ret); + ACQUIRE_CUR(dbc, lock_mode, pgno, ret); if (ret != 0) return (ret); @@ -1671,8 +2314,9 @@ __bam_c_prev(dbc) * Move to a specified record. */ static int -__bam_c_search(dbc, key, flags, exactp) +__bam_c_search(dbc, root_pgno, key, flags, exactp) DBC *dbc; + db_pgno_t root_pgno; const DBT *key; u_int32_t flags; int *exactp; @@ -1681,7 +2325,7 @@ __bam_c_search(dbc, key, flags, exactp) BTREE_CURSOR *cp; DB *dbp; PAGE *h; - db_indx_t indx; + db_indx_t indx, *inp; db_pgno_t bt_lpgno; db_recno_t recno; u_int32_t sflags; @@ -1712,6 +2356,9 @@ __bam_c_search(dbc, key, flags, exactp) case DB_GET_BOTH: sflags = (F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND) | S_EXACT; goto search; + case DB_GET_BOTH_RANGE: + sflags = (F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND); + goto search; case DB_SET_RANGE: sflags = (F_ISSET(dbc, DBC_RMW) ? S_WRITE : S_READ) | S_DUPFIRST; @@ -1758,6 +2405,7 @@ fast_search: /* if (ret != 0) goto fast_miss; + inp = P_INP(dbp, h); /* * It's okay if the page type isn't right or it's empty, it * just means that the world changed. @@ -1796,7 +2444,7 @@ fast_search: /* if (flags == DB_KEYLAST) goto fast_hit; for (; - indx > 0 && h->inp[indx - P_INDX] == h->inp[indx]; + indx > 0 && inp[indx - P_INDX] == inp[indx]; indx -= P_INDX) ; goto fast_hit; @@ -1823,7 +2471,7 @@ try_begin: if (h->prev_pgno == PGNO_INVALID) { goto fast_hit; for (; indx < (db_indx_t)(NUM_ENT(h) - P_INDX) && - h->inp[indx] == h->inp[indx + P_INDX]; + inp[indx] == inp[indx + P_INDX]; indx += P_INDX) ; goto fast_hit; @@ -1852,8 +2500,8 @@ fast_miss: /* if (ret != 0) return (ret); -search: if ((ret = - __bam_search(dbc, key, sflags, 1, NULL, exactp)) != 0) +search: if ((ret = __bam_search(dbc, root_pgno, + key, sflags, 1, NULL, exactp)) != 0) return (ret); break; default: @@ -1870,12 +2518,15 @@ search: if ((ret = /* * If we inserted a key into the first or last slot of the tree, * remember where it was so we can do it more quickly next time. + * If there are duplicates and we are inserting into the last slot, + * the cursor will point _to_ the last item, not after it, which + * is why we subtract P_INDX below. */ if (TYPE(cp->page) == P_LBTREE && (flags == DB_KEYFIRST || flags == DB_KEYLAST)) t->bt_lpgno = (NEXT_PGNO(cp->page) == PGNO_INVALID && - cp->indx >= NUM_ENT(cp->page)) || + cp->indx >= NUM_ENT(cp->page) - P_INDX) || (PREV_PGNO(cp->page) == PGNO_INVALID && cp->indx == 0) ? cp->pgno : PGNO_INVALID; return (0); @@ -1893,11 +2544,13 @@ __bam_c_physdel(dbc) DB *dbp; DBT key; DB_LOCK lock; + DB_MPOOLFILE *mpf; PAGE *h; db_pgno_t pgno; int delete_page, empty_page, exact, level, ret; dbp = dbc->dbp; + mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; delete_page = empty_page = ret = 0; @@ -1911,7 +2564,7 @@ __bam_c_physdel(dbc) * space will never be reused unless the exact same key is specified. */ if (delete_page && - !F_ISSET(dbc, DBC_OPD) && F_ISSET(dbp, DB_BT_REVSPLIT)) + !F_ISSET(dbc, DBC_OPD) && F_ISSET(dbp, DB_AM_REVSPLITOFF)) delete_page = 0; /* @@ -1926,11 +2579,17 @@ __bam_c_physdel(dbc) * To delete a leaf page other than an empty root page, we need a * copy of a key from the page. Use the 0th page index since it's * the last key the page held. + * + * !!! + * Note that because __bam_c_physdel is always called from a cursor + * close, it should be safe to use the cursor's own "my_rkey" memory + * to temporarily hold this key. We shouldn't own any returned-data + * memory of interest--if we do, we're in trouble anyway. */ if (delete_page) { memset(&key, 0, sizeof(DBT)); if ((ret = __db_ret(dbp, cp->page, - 0, &key, &dbc->rkey.data, &dbc->rkey.ulen)) != 0) + 0, &key, &dbc->my_rkey.data, &dbc->my_rkey.ulen)) != 0) return (ret); } @@ -1940,7 +2599,7 @@ __bam_c_physdel(dbc) * !!! * The following operations to delete a page may deadlock. The easy * scenario is if we're deleting an item because we're closing cursors - * because we've already deadlocked and want to call txn_abort(). If + * because we've already deadlocked and want to call txn->abort. If * we fail due to deadlock, we'll leave a locked, possibly empty page * in the tree, which won't be empty long because we'll undo the delete * when we undo the transaction's modifications. @@ -1977,8 +2636,8 @@ __bam_c_physdel(dbc) */ for (level = LEAFLEVEL;; ++level) { /* Acquire a page and its parent, locked. */ - if ((ret = __bam_search( - dbc, &key, S_WRPAIR, level, NULL, &exact)) != 0) + if ((ret = __bam_search(dbc, PGNO_INVALID, + &key, S_WRPAIR, level, NULL, &exact)) != 0) return (ret); /* @@ -2031,19 +2690,19 @@ __bam_c_physdel(dbc) */ switch (TYPE(h)) { case P_IBTREE: - pgno = GET_BINTERNAL(h, 0)->pgno; + pgno = GET_BINTERNAL(dbp, h, 0)->pgno; break; case P_IRECNO: - pgno = GET_RINTERNAL(h, 0)->pgno; + pgno = GET_RINTERNAL(dbp, h, 0)->pgno; break; default: - return (__db_pgfmt(dbp, PGNO(h))); + return (__db_pgfmt(dbp->dbenv, PGNO(h))); } if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, &lock)) != 0) break; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) + if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0) break; BT_STK_PUSH(dbp->dbenv, cp, h, 0, lock, DB_LOCK_WRITE, ret); if (ret != 0) @@ -2076,10 +2735,12 @@ __bam_c_getstack(dbc) BTREE_CURSOR *cp; DB *dbp; DBT dbt; + DB_MPOOLFILE *mpf; PAGE *h; int exact, ret, t_ret; dbp = dbc->dbp; + mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; /* @@ -2087,21 +2748,22 @@ __bam_c_getstack(dbc) * routine has to already hold a read lock on the page, so there * is no additional lock to acquire. */ - if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &h)) != 0) + if ((ret = mpf->get(mpf, &cp->pgno, 0, &h)) != 0) return (ret); /* Get a copy of a key from the page. */ memset(&dbt, 0, sizeof(DBT)); if ((ret = __db_ret(dbp, - h, 0, &dbt, &dbc->rkey.data, &dbc->rkey.ulen)) != 0) + h, 0, &dbt, &dbc->rkey->data, &dbc->rkey->ulen)) != 0) goto err; /* Get a write-locked stack for the page. */ exact = 0; - ret = __bam_search(dbc, &dbt, S_KEYFIRST, 1, NULL, &exact); + ret = __bam_search(dbc, PGNO_INVALID, + &dbt, S_KEYFIRST, 1, NULL, &exact); err: /* Discard the key and the page. */ - if ((t_ret = memp_fput(dbp->mpf, h, 0)) != 0 && ret == 0) + if ((t_ret = mpf->put(mpf, h, 0)) != 0 && ret == 0) ret = t_ret; return (ret); @@ -2122,7 +2784,8 @@ __bam_isopd(dbc, pgnop) if (TYPE(dbc->internal->page) != P_LBTREE) return (0); - bo = GET_BOVERFLOW(dbc->internal->page, dbc->internal->indx + O_INDX); + bo = GET_BOVERFLOW(dbc->dbp, + dbc->internal->page, dbc->internal->indx + O_INDX); if (B_TYPE(bo->type) == B_DUPLICATE) { *pgnop = bo->pgno; return (1); diff --git a/bdb/btree/bt_delete.c b/bdb/btree/bt_delete.c index 9725887882a..8c76ead2922 100644 --- a/bdb/btree/bt_delete.c +++ b/bdb/btree/bt_delete.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ /* @@ -43,7 +43,7 @@ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_delete.c,v 11.31 2001/01/17 18:48:46 bostic Exp $"; +static const char revid[] = "$Id: bt_delete.c,v 11.44 2002/07/03 19:03:49 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -53,88 +53,10 @@ static const char revid[] = "$Id: bt_delete.c,v 11.31 2001/01/17 18:48:46 bostic #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "btree.h" -#include "lock.h" - -/* - * __bam_delete -- - * Delete the items referenced by a key. - * - * PUBLIC: int __bam_delete __P((DB *, DB_TXN *, DBT *, u_int32_t)); - */ -int -__bam_delete(dbp, txn, key, flags) - DB *dbp; - DB_TXN *txn; - DBT *key; - u_int32_t flags; -{ - DBC *dbc; - DBT lkey; - DBT data; - u_int32_t f_init, f_next; - int ret, t_ret; - - PANIC_CHECK(dbp->dbenv); - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->del"); - DB_CHECK_TXN(dbp, txn); - - /* Check for invalid flags. */ - if ((ret = - __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0) - return (ret); - - /* Allocate a cursor. */ - if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0) - return (ret); - - DEBUG_LWRITE(dbc, txn, "bam_delete", key, NULL, flags); - - /* - * Walk a cursor through the key/data pairs, deleting as we go. Set - * the DB_DBT_USERMEM flag, as this might be a threaded application - * and the flags checking will catch us. We don't actually want the - * keys or data, so request a partial of length 0. - */ - memset(&lkey, 0, sizeof(lkey)); - F_SET(&lkey, DB_DBT_USERMEM | DB_DBT_PARTIAL); - memset(&data, 0, sizeof(data)); - F_SET(&data, DB_DBT_USERMEM | DB_DBT_PARTIAL); - - /* - * If locking (and we haven't already acquired CDB locks), set the - * read-modify-write flag. - */ - f_init = DB_SET; - f_next = DB_NEXT_DUP; - if (STD_LOCKING(dbc)) { - f_init |= DB_RMW; - f_next |= DB_RMW; - } - - /* Walk through the set of key/data pairs, deleting as we go. */ - if ((ret = dbc->c_get(dbc, key, &data, f_init)) != 0) - goto err; - for (;;) { - if ((ret = dbc->c_del(dbc, 0)) != 0) - goto err; - if ((ret = dbc->c_get(dbc, &lkey, &data, f_next)) != 0) { - if (ret == DB_NOTFOUND) { - ret = 0; - break; - } - goto err; - } - } - -err: /* Discard the cursor. */ - if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" /* * __bam_ditem -- @@ -151,14 +73,18 @@ __bam_ditem(dbc, h, indx) BINTERNAL *bi; BKEYDATA *bk; DB *dbp; + DB_MPOOLFILE *mpf; u_int32_t nbytes; int ret; + db_indx_t *inp; dbp = dbc->dbp; + mpf = dbp->mpf; + inp = P_INP(dbp, h); switch (TYPE(h)) { case P_IBTREE: - bi = GET_BINTERNAL(h, indx); + bi = GET_BINTERNAL(dbp, h, indx); switch (B_TYPE(bi->type)) { case B_DUPLICATE: case B_KEYDATA: @@ -171,7 +97,7 @@ __bam_ditem(dbc, h, indx) return (ret); break; default: - return (__db_pgfmt(dbp, PGNO(h))); + return (__db_pgfmt(dbp->dbenv, PGNO(h))); } break; case P_IRECNO: @@ -195,7 +121,7 @@ __bam_ditem(dbc, h, indx) * won't work! */ if (indx + P_INDX < (u_int32_t)NUM_ENT(h) && - h->inp[indx] == h->inp[indx + P_INDX]) + inp[indx] == inp[indx + P_INDX]) return (__bam_adjindx(dbc, h, indx, indx + O_INDX, 0)); /* @@ -203,14 +129,14 @@ __bam_ditem(dbc, h, indx) * doesn't matter if we delete the key item before or * after the data item for the purposes of this one. */ - if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX]) + if (indx > 0 && inp[indx] == inp[indx - P_INDX]) return (__bam_adjindx(dbc, h, indx, indx - P_INDX, 0)); } /* FALLTHROUGH */ case P_LDUP: case P_LRECNO: - bk = GET_BKEYDATA(h, indx); + bk = GET_BKEYDATA(dbp, h, indx); switch (B_TYPE(bk->type)) { case B_DUPLICATE: nbytes = BOVERFLOW_SIZE; @@ -218,24 +144,24 @@ __bam_ditem(dbc, h, indx) case B_OVERFLOW: nbytes = BOVERFLOW_SIZE; if ((ret = __db_doff( - dbc, (GET_BOVERFLOW(h, indx))->pgno)) != 0) + dbc, (GET_BOVERFLOW(dbp, h, indx))->pgno)) != 0) return (ret); break; case B_KEYDATA: nbytes = BKEYDATA_SIZE(bk->len); break; default: - return (__db_pgfmt(dbp, PGNO(h))); + return (__db_pgfmt(dbp->dbenv, PGNO(h))); } break; default: - return (__db_pgfmt(dbp, PGNO(h))); + return (__db_pgfmt(dbp->dbenv, PGNO(h))); } /* Delete the item and mark the page dirty. */ if ((ret = __db_ditem(dbc, h, indx, nbytes)) != 0) return (ret); - if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0) + if ((ret = mpf->set(mpf, h, DB_MPOOL_DIRTY)) != 0) return (ret); return (0); @@ -255,33 +181,37 @@ __bam_adjindx(dbc, h, indx, indx_copy, is_insert) int is_insert; { DB *dbp; - db_indx_t copy; + DB_MPOOLFILE *mpf; + db_indx_t copy, *inp; int ret; dbp = dbc->dbp; + mpf = dbp->mpf; + inp = P_INP(dbp, h); /* Log the change. */ - if (DB_LOGGING(dbc) && - (ret = __bam_adj_log(dbp->dbenv, dbc->txn, &LSN(h), - 0, dbp->log_fileid, PGNO(h), &LSN(h), indx, indx_copy, - (u_int32_t)is_insert)) != 0) - return (ret); + if (DBC_LOGGING(dbc)) { + if ((ret = __bam_adj_log(dbp, dbc->txn, &LSN(h), 0, + PGNO(h), &LSN(h), indx, indx_copy, (u_int32_t)is_insert)) != 0) + return (ret); + } else + LSN_NOT_LOGGED(LSN(h)); /* Shuffle the indices and mark the page dirty. */ if (is_insert) { - copy = h->inp[indx_copy]; + copy = inp[indx_copy]; if (indx != NUM_ENT(h)) - memmove(&h->inp[indx + O_INDX], &h->inp[indx], + memmove(&inp[indx + O_INDX], &inp[indx], sizeof(db_indx_t) * (NUM_ENT(h) - indx)); - h->inp[indx] = copy; + inp[indx] = copy; ++NUM_ENT(h); } else { --NUM_ENT(h); if (indx != NUM_ENT(h)) - memmove(&h->inp[indx], &h->inp[indx + O_INDX], + memmove(&inp[indx], &inp[indx + O_INDX], sizeof(db_indx_t) * (NUM_ENT(h) - indx)); } - if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0) + if ((ret = mpf->set(mpf, h, DB_MPOOL_DIRTY)) != 0) return (ret); return (0); @@ -303,6 +233,7 @@ __bam_dpages(dbc, stack_epg) DB *dbp; DBT a, b; DB_LOCK c_lock, p_lock; + DB_MPOOLFILE *mpf; EPG *epg; PAGE *child, *parent; db_indx_t nitems; @@ -311,6 +242,7 @@ __bam_dpages(dbc, stack_epg) int done, ret, t_ret; dbp = dbc->dbp; + mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; /* @@ -328,8 +260,7 @@ __bam_dpages(dbc, stack_epg) */ ret = 0; for (epg = cp->sp; epg < stack_epg; ++epg) { - if ((t_ret = - memp_fput(dbp->mpf, epg->page, 0)) != 0 && ret == 0) + if ((t_ret = mpf->put(mpf, epg->page, 0)) != 0 && ret == 0) ret = t_ret; (void)__TLPUT(dbc, epg->lock); } @@ -364,7 +295,7 @@ __bam_dpages(dbc, stack_epg) pgno = PGNO(epg->page); nitems = NUM_ENT(epg->page); - if ((ret = memp_fput(dbp->mpf, epg->page, 0)) != 0) + if ((ret = mpf->put(mpf, epg->page, 0)) != 0) goto err_inc; (void)__TLPUT(dbc, epg->lock); @@ -394,7 +325,7 @@ __bam_dpages(dbc, stack_epg) err_inc: ++epg; err: for (; epg <= cp->csp; ++epg) { if (epg->page != NULL) - (void)memp_fput(dbp->mpf, epg->page, 0); + (void)mpf->put(mpf, epg->page, 0); (void)__TLPUT(dbc, epg->lock); } BT_STK_CLR(cp); @@ -415,14 +346,15 @@ err: for (; epg <= cp->csp; ++epg) { for (done = 0; !done;) { /* Initialize. */ parent = child = NULL; - p_lock.off = c_lock.off = LOCK_INVALID; + LOCK_INIT(p_lock); + LOCK_INIT(c_lock); /* Lock the root. */ pgno = root_pgno; if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, &p_lock)) != 0) goto stop; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &parent)) != 0) + if ((ret = mpf->get(mpf, &pgno, 0, &parent)) != 0) goto stop; if (NUM_ENT(parent) != 1) @@ -434,7 +366,7 @@ err: for (; epg <= cp->csp; ++epg) { * If this is overflow, then try to delete it. * The child may or may not still point at it. */ - bi = GET_BINTERNAL(parent, 0); + bi = GET_BINTERNAL(dbp, parent, 0); if (B_TYPE(bi->type) == B_OVERFLOW) if ((ret = __db_doff(dbc, ((BOVERFLOW *)bi->data)->pgno)) != 0) @@ -442,7 +374,7 @@ err: for (; epg <= cp->csp; ++epg) { pgno = bi->pgno; break; case P_IRECNO: - pgno = GET_RINTERNAL(parent, 0)->pgno; + pgno = GET_RINTERNAL(dbp, parent, 0)->pgno; break; default: goto stop; @@ -452,24 +384,24 @@ err: for (; epg <= cp->csp; ++epg) { if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, &c_lock)) != 0) goto stop; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &child)) != 0) + if ((ret = mpf->get(mpf, &pgno, 0, &child)) != 0) goto stop; /* Log the change. */ - if (DB_LOGGING(dbc)) { + if (DBC_LOGGING(dbc)) { memset(&a, 0, sizeof(a)); a.data = child; a.size = dbp->pgsize; memset(&b, 0, sizeof(b)); - b.data = P_ENTRY(parent, 0); + b.data = P_ENTRY(dbp, parent, 0); b.size = TYPE(parent) == P_IRECNO ? RINTERNAL_SIZE : BINTERNAL_SIZE(((BINTERNAL *)b.data)->len); - if ((ret = - __bam_rsplit_log(dbp->dbenv, dbc->txn, &child->lsn, - 0, dbp->log_fileid, PGNO(child), &a, PGNO(parent), - RE_NREC(parent), &b, &parent->lsn)) != 0) + if ((ret = __bam_rsplit_log(dbp, dbc->txn, + &child->lsn, 0, PGNO(child), &a, PGNO(parent), + RE_NREC(parent), &b, &parent->lsn)) != 0) goto stop; - } + } else + LSN_NOT_LOGGED(child->lsn); /* * Make the switch. @@ -491,9 +423,9 @@ err: for (; epg <= cp->csp; ++epg) { RE_NREC_SET(parent, rcnt); /* Mark the pages dirty. */ - if ((ret = memp_fset(dbp->mpf, parent, DB_MPOOL_DIRTY)) != 0) + if ((ret = mpf->set(mpf, parent, DB_MPOOL_DIRTY)) != 0) goto stop; - if ((ret = memp_fset(dbp->mpf, child, DB_MPOOL_DIRTY)) != 0) + if ((ret = mpf->set(mpf, child, DB_MPOOL_DIRTY)) != 0) goto stop; /* Adjust the cursors. */ @@ -514,15 +446,13 @@ err: for (; epg <= cp->csp; ++epg) { if (0) { stop: done = 1; } - if (p_lock.off != LOCK_INVALID) - (void)__TLPUT(dbc, p_lock); + (void)__TLPUT(dbc, p_lock); if (parent != NULL && - (t_ret = memp_fput(dbp->mpf, parent, 0)) != 0 && ret == 0) + (t_ret = mpf->put(mpf, parent, 0)) != 0 && ret == 0) ret = t_ret; - if (c_lock.off != LOCK_INVALID) - (void)__TLPUT(dbc, c_lock); + (void)__TLPUT(dbc, c_lock); if (child != NULL && - (t_ret = memp_fput(dbp->mpf, child, 0)) != 0 && ret == 0) + (t_ret = mpf->put(mpf, child, 0)) != 0 && ret == 0) ret = t_ret; } diff --git a/bdb/btree/bt_method.c b/bdb/btree/bt_method.c index 5e3af27d033..aa27ed6bab9 100644 --- a/bdb/btree/bt_method.c +++ b/bdb/btree/bt_method.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2000 + * Copyright (c) 1999-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_method.c,v 11.20 2000/11/30 00:58:28 ubell Exp $"; +static const char revid[] = "$Id: bt_method.c,v 11.29 2002/04/21 13:17:04 margo Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -16,9 +16,9 @@ static const char revid[] = "$Id: bt_method.c,v 11.20 2000/11/30 00:58:28 ubell #endif #include "db_int.h" -#include "db_page.h" -#include "btree.h" -#include "qam.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/qam.h" static int __bam_set_bt_compare __P((DB *, int (*)(DB *, const DBT *, const DBT *))); @@ -82,7 +82,8 @@ __bam_db_close(dbp) { BTREE *t; - t = dbp->bt_internal; + if ((t = dbp->bt_internal) == NULL) + return (0); /* Recno */ /* Close any backing source file descriptor. */ if (t->re_fp != NULL) @@ -90,9 +91,9 @@ __bam_db_close(dbp) /* Free any backing source file name. */ if (t->re_source != NULL) - __os_freestr(t->re_source); + __os_free(dbp->dbenv, t->re_source); - __os_free(t, sizeof(BTREE)); + __os_free(dbp->dbenv, t); dbp->bt_internal = NULL; return (0); @@ -127,7 +128,7 @@ __bam_set_flags(dbp, flagsp) if (LF_ISSET(DB_DUP | DB_DUPSORT)) { /* DB_DUP/DB_DUPSORT is incompatible with DB_RECNUM. */ - if (F_ISSET(dbp, DB_BT_RECNUM)) + if (F_ISSET(dbp, DB_AM_RECNUM)) goto incompat; if (LF_ISSET(DB_DUPSORT)) { @@ -145,12 +146,12 @@ __bam_set_flags(dbp, flagsp) if (F_ISSET(dbp, DB_AM_DUP)) goto incompat; - F_SET(dbp, DB_BT_RECNUM); + F_SET(dbp, DB_AM_RECNUM); LF_CLR(DB_RECNUM); } if (LF_ISSET(DB_REVSPLITOFF)) { - F_SET(dbp, DB_BT_REVSPLIT); + F_SET(dbp, DB_AM_REVSPLITOFF); LF_CLR(DB_REVSPLITOFF); } @@ -279,12 +280,12 @@ __ram_set_flags(dbp, flagsp) DB_ILLEGAL_METHOD(dbp, DB_OK_RECNO); if (LF_ISSET(DB_RENUMBER)) { - F_SET(dbp, DB_RE_RENUMBER); + F_SET(dbp, DB_AM_RENUMBER); LF_CLR(DB_RENUMBER); } if (LF_ISSET(DB_SNAPSHOT)) { - F_SET(dbp, DB_RE_SNAPSHOT); + F_SET(dbp, DB_AM_SNAPSHOT); LF_CLR(DB_SNAPSHOT); } @@ -310,7 +311,7 @@ __ram_set_re_delim(dbp, re_delim) t = dbp->bt_internal; t->re_delim = re_delim; - F_SET(dbp, DB_RE_DELIMITER); + F_SET(dbp, DB_AM_DELIMITER); return (0); } @@ -336,7 +337,7 @@ __ram_set_re_len(dbp, re_len) q = dbp->q_internal; q->re_len = re_len; - F_SET(dbp, DB_RE_FIXEDLEN); + F_SET(dbp, DB_AM_FIXEDLEN); return (0); } @@ -362,7 +363,7 @@ __ram_set_re_pad(dbp, re_pad) q = dbp->q_internal; q->re_pad = re_pad; - F_SET(dbp, DB_RE_PAD); + F_SET(dbp, DB_AM_PAD); return (0); } diff --git a/bdb/btree/bt_open.c b/bdb/btree/bt_open.c index 405c1880f5e..0b72391c267 100644 --- a/bdb/btree/bt_open.c +++ b/bdb/btree/bt_open.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ /* @@ -43,7 +43,7 @@ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_open.c,v 11.42 2000/11/30 00:58:28 ubell Exp $"; +static const char revid[] = "$Id: bt_open.c,v 11.76 2002/09/04 19:06:42 margo Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -54,33 +54,38 @@ static const char revid[] = "$Id: bt_open.c,v 11.42 2000/11/30 00:58:28 ubell Ex #endif #include "db_int.h" -#include "db_page.h" -#include "db_swap.h" -#include "btree.h" -#include "db_shash.h" -#include "lock.h" -#include "log.h" -#include "mp.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/btree.h" +#include "dbinc/db_shash.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/fop.h" + +static void __bam_init_meta __P((DB *, BTMETA *, db_pgno_t, DB_LSN *)); /* * __bam_open -- * Open a btree. * - * PUBLIC: int __bam_open __P((DB *, const char *, db_pgno_t, u_int32_t)); + * PUBLIC: int __bam_open __P((DB *, + * PUBLIC: DB_TXN *, const char *, db_pgno_t, u_int32_t)); */ int -__bam_open(dbp, name, base_pgno, flags) +__bam_open(dbp, txn, name, base_pgno, flags) DB *dbp; + DB_TXN *txn; const char *name; db_pgno_t base_pgno; u_int32_t flags; { BTREE *t; + COMPQUIET(name, NULL); t = dbp->bt_internal; /* Initialize the remaining fields/methods of the DB. */ - dbp->del = __bam_delete; dbp->key_range = __bam_key_range; dbp->stat = __bam_stat; @@ -99,8 +104,8 @@ __bam_open(dbp, name, base_pgno, flags) * Verify that the bt_minkey value specified won't cause the * calculation of ovflsize to underflow [#2406] for this pagesize. */ - if (B_MINKEY_TO_OVFLSIZE(t->bt_minkey, dbp->pgsize) > - B_MINKEY_TO_OVFLSIZE(DEFMINKEYPAGE, dbp->pgsize)) { + if (B_MINKEY_TO_OVFLSIZE(dbp, t->bt_minkey, dbp->pgsize) > + B_MINKEY_TO_OVFLSIZE(dbp, DEFMINKEYPAGE, dbp->pgsize)) { __db_err(dbp->dbenv, "bt_minkey value of %lu too high for page size of %lu", (u_long)t->bt_minkey, (u_long)dbp->pgsize); @@ -108,7 +113,7 @@ __bam_open(dbp, name, base_pgno, flags) } /* Start up the tree. */ - return (__bam_read_root(dbp, name, base_pgno, flags)); + return (__bam_read_root(dbp, txn, base_pgno, flags)); } /* @@ -143,6 +148,7 @@ __bam_metachk(dbp, name, btm) name, (u_long)vers); return (DB_OLD_VERSION); case 8: + case 9: break; default: __db_err(dbenv, @@ -187,13 +193,13 @@ __bam_metachk(dbp, name, btm) if (F_ISSET(&btm->dbmeta, BTM_RECNUM)) { if (dbp->type != DB_BTREE) goto wrong_type; - F_SET(dbp, DB_BT_RECNUM); + F_SET(dbp, DB_AM_RECNUM); if ((ret = __db_fcchk(dbenv, - "DB->open", dbp->flags, DB_AM_DUP, DB_BT_RECNUM)) != 0) + "DB->open", dbp->flags, DB_AM_DUP, DB_AM_RECNUM)) != 0) return (ret); } else - if (F_ISSET(dbp, DB_BT_RECNUM)) { + if (F_ISSET(dbp, DB_AM_RECNUM)) { __db_err(dbenv, "%s: DB_RECNUM specified to open method but not set in database", name); @@ -203,9 +209,9 @@ __bam_metachk(dbp, name, btm) if (F_ISSET(&btm->dbmeta, BTM_FIXEDLEN)) { if (dbp->type != DB_RECNO) goto wrong_type; - F_SET(dbp, DB_RE_FIXEDLEN); + F_SET(dbp, DB_AM_FIXEDLEN); } else - if (F_ISSET(dbp, DB_RE_FIXEDLEN)) { + if (F_ISSET(dbp, DB_AM_FIXEDLEN)) { __db_err(dbenv, "%s: DB_FIXEDLEN specified to open method but not set in database", name); @@ -215,9 +221,9 @@ __bam_metachk(dbp, name, btm) if (F_ISSET(&btm->dbmeta, BTM_RENUMBER)) { if (dbp->type != DB_RECNO) goto wrong_type; - F_SET(dbp, DB_RE_RENUMBER); + F_SET(dbp, DB_AM_RENUMBER); } else - if (F_ISSET(dbp, DB_RE_RENUMBER)) { + if (F_ISSET(dbp, DB_AM_RENUMBER)) { __db_err(dbenv, "%s: DB_RENUMBER specified to open method but not set in database", name); @@ -266,116 +272,129 @@ wrong_type: /* * __bam_read_root -- - * Check (and optionally create) a tree. + * Read the root page and check a tree. * - * PUBLIC: int __bam_read_root __P((DB *, const char *, db_pgno_t, u_int32_t)); + * PUBLIC: int __bam_read_root __P((DB *, DB_TXN *, db_pgno_t, u_int32_t)); */ int -__bam_read_root(dbp, name, base_pgno, flags) +__bam_read_root(dbp, txn, base_pgno, flags) DB *dbp; - const char *name; + DB_TXN *txn; db_pgno_t base_pgno; u_int32_t flags; { BTMETA *meta; BTREE *t; DBC *dbc; - DB_LSN orig_lsn; DB_LOCK metalock; - PAGE *root; - int locked, ret, t_ret; + DB_MPOOLFILE *mpf; + int ret, t_ret; - ret = 0; - t = dbp->bt_internal; meta = NULL; - root = NULL; - locked = 0; + t = dbp->bt_internal; + LOCK_INIT(metalock); + mpf = dbp->mpf; + ret = 0; - /* - * Get a cursor. If DB_CREATE is specified, we may be creating - * the root page, and to do that safely in CDB we need a write - * cursor. In STD_LOCKING mode, we'll synchronize using the - * meta page lock instead. - */ - if ((ret = dbp->cursor(dbp, dbp->open_txn, - &dbc, LF_ISSET(DB_CREATE) && CDB_LOCKING(dbp->dbenv) ? - DB_WRITECURSOR : 0)) != 0) + /* Get a cursor. */ + if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0) return (ret); - /* Get, and optionally create the metadata page. */ + /* Get the metadata page. */ if ((ret = __db_lget(dbc, 0, base_pgno, DB_LOCK_READ, 0, &metalock)) != 0) goto err; - if ((ret = memp_fget( - dbp->mpf, &base_pgno, DB_MPOOL_CREATE, (PAGE **)&meta)) != 0) + if ((ret = mpf->get(mpf, &base_pgno, 0, (PAGE **)&meta)) != 0) goto err; /* - * If the magic number is correct, we're not creating the tree. - * Correct any fields that may not be right. Note, all of the - * local flags were set by DB->open. + * If the magic number is set, the tree has been created. Correct + * any fields that may not be right. Note, all of the local flags + * were set by DB->open. + * + * Otherwise, we'd better be in recovery or abort, in which case the + * metadata page will be created/initialized elsewhere. */ -again: if (meta->dbmeta.magic != 0) { - t->bt_maxkey = meta->maxkey; - t->bt_minkey = meta->minkey; - t->re_pad = meta->re_pad; - t->re_len = meta->re_len; - - t->bt_meta = base_pgno; - t->bt_root = meta->root; - - (void)memp_fput(dbp->mpf, meta, 0); - meta = NULL; - goto done; - } + DB_ASSERT(meta->dbmeta.magic != 0 || + IS_RECOVERING(dbp->dbenv) || F_ISSET(dbp, DB_AM_RECOVER)); - /* In recovery if it's not there it will be created elsewhere.*/ - if (IS_RECOVERING(dbp->dbenv)) - goto done; - - /* If we're doing CDB; we now have to get the write lock. */ - if (CDB_LOCKING(dbp->dbenv)) { - /* - * We'd better have DB_CREATE set if we're actually doing - * the create. - */ - DB_ASSERT(LF_ISSET(DB_CREATE)); - if ((ret = lock_get(dbp->dbenv, dbc->locker, DB_LOCK_UPGRADE, - &dbc->lock_dbt, DB_LOCK_WRITE, &dbc->mylock)) != 0) - goto err; - } + t->bt_maxkey = meta->maxkey; + t->bt_minkey = meta->minkey; + t->re_pad = meta->re_pad; + t->re_len = meta->re_len; + + t->bt_meta = base_pgno; + t->bt_root = meta->root; /* - * If we are doing locking, relase the read lock and get a write lock. - * We want to avoid deadlock. + * !!! + * If creating a subdatabase, we've already done an insert when + * we put the subdatabase's entry into the master database, so + * our last-page-inserted value is wrongly initialized for the + * master database, not the subdatabase we're creating. I'm not + * sure where the *right* place to clear this value is, it's not + * intuitively obvious that it belongs here. */ - if (locked == 0 && STD_LOCKING(dbc)) { - if ((ret = __LPUT(dbc, metalock)) != 0) - goto err; - if ((ret = __db_lget(dbc, - 0, base_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) - goto err; - locked = 1; - goto again; - } + t->bt_lpgno = PGNO_INVALID; + + /* We must initialize last_pgno, it could be stale. */ + if (!LF_ISSET(DB_RDONLY) && dbp->meta_pgno == PGNO_BASE_MD) { + mpf->last_pgno(mpf, &meta->dbmeta.last_pgno); + ret = mpf->put(mpf, meta, DB_MPOOL_DIRTY); + } else + ret = mpf->put(mpf, meta, 0); + meta = NULL; + +err: /* Put the metadata page back. */ + if (meta != NULL && (t_ret = mpf->put(mpf, meta, 0)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + + if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __bam_init_meta -- + * + * Initialize a btree meta-data page. The following fields may need + * to be updated later: last_pgno, root. + */ +static void +__bam_init_meta(dbp, meta, pgno, lsnp) + DB *dbp; + BTMETA *meta; + db_pgno_t pgno; + DB_LSN *lsnp; +{ + BTREE *t; - /* Initialize the tree structure metadata information. */ - orig_lsn = meta->dbmeta.lsn; memset(meta, 0, sizeof(BTMETA)); - meta->dbmeta.lsn = orig_lsn; - meta->dbmeta.pgno = base_pgno; + meta->dbmeta.lsn = *lsnp; + meta->dbmeta.pgno = pgno; meta->dbmeta.magic = DB_BTREEMAGIC; meta->dbmeta.version = DB_BTREEVERSION; meta->dbmeta.pagesize = dbp->pgsize; + if (F_ISSET(dbp, DB_AM_CHKSUM)) + FLD_SET(meta->dbmeta.metaflags, DBMETA_CHKSUM); + if (F_ISSET(dbp, DB_AM_ENCRYPT)) { + meta->dbmeta.encrypt_alg = + ((DB_CIPHER *)dbp->dbenv->crypto_handle)->alg; + DB_ASSERT(meta->dbmeta.encrypt_alg != 0); + meta->crypto_magic = meta->dbmeta.magic; + } meta->dbmeta.type = P_BTREEMETA; meta->dbmeta.free = PGNO_INVALID; + meta->dbmeta.last_pgno = pgno; if (F_ISSET(dbp, DB_AM_DUP)) F_SET(&meta->dbmeta, BTM_DUP); - if (F_ISSET(dbp, DB_RE_FIXEDLEN)) + if (F_ISSET(dbp, DB_AM_FIXEDLEN)) F_SET(&meta->dbmeta, BTM_FIXEDLEN); - if (F_ISSET(dbp, DB_BT_RECNUM)) + if (F_ISSET(dbp, DB_AM_RECNUM)) F_SET(&meta->dbmeta, BTM_RECNUM); - if (F_ISSET(dbp, DB_RE_RENUMBER)) + if (F_ISSET(dbp, DB_AM_RENUMBER)) F_SET(&meta->dbmeta, BTM_RENUMBER); if (F_ISSET(dbp, DB_AM_SUBDB)) F_SET(&meta->dbmeta, BTM_SUBDB); @@ -385,14 +404,165 @@ again: if (meta->dbmeta.magic != 0) { F_SET(&meta->dbmeta, BTM_RECNO); memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN); + t = dbp->bt_internal; meta->maxkey = t->bt_maxkey; meta->minkey = t->bt_minkey; meta->re_len = t->re_len; meta->re_pad = t->re_pad; +} - /* If necessary, log the meta-data and root page creates. */ - if ((ret = __db_log_page(dbp, - name, &orig_lsn, base_pgno, (PAGE *)meta)) != 0) +/* + * __bam_new_file -- + * Create the necessary pages to begin a new database file. + * + * This code appears more complex than it is because of the two cases (named + * and unnamed). The way to read the code is that for each page being created, + * there are three parts: 1) a "get page" chunk (which either uses malloc'd + * memory or calls mpf->get), 2) the initialization, and 3) the "put page" + * chunk which either does a fop write or an mpf->put. + * + * PUBLIC: int __bam_new_file __P((DB *, DB_TXN *, DB_FH *, const char *)); + */ +int +__bam_new_file(dbp, txn, fhp, name) + DB *dbp; + DB_TXN *txn; + DB_FH *fhp; + const char *name; +{ + BTMETA *meta; + DB_ENV *dbenv; + DB_LSN lsn; + DB_MPOOLFILE *mpf; + DB_PGINFO pginfo; + DBT pdbt; + PAGE *root; + db_pgno_t pgno; + int ret; + void *buf; + + dbenv = dbp->dbenv; + mpf = dbp->mpf; + root = NULL; + meta = NULL; + memset(&pdbt, 0, sizeof(pdbt)); + + /* Build meta-data page. */ + + if (name == NULL) { + pgno = PGNO_BASE_MD; + ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &meta); + } else { + pginfo.db_pagesize = dbp->pgsize; + pginfo.flags = + F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); + pginfo.type = dbp->type; + pdbt.data = &pginfo; + pdbt.size = sizeof(pginfo); + ret = __os_calloc(dbp->dbenv, 1, dbp->pgsize, &buf); + meta = (BTMETA *)buf; + } + if (ret != 0) + return (ret); + + LSN_NOT_LOGGED(lsn); + __bam_init_meta(dbp, meta, PGNO_BASE_MD, &lsn); + meta->root = 1; + meta->dbmeta.last_pgno = 1; + + if (name == NULL) + ret = mpf->put(mpf, meta, DB_MPOOL_DIRTY); + else { + if ((ret = __db_pgout(dbenv, PGNO_BASE_MD, meta, &pdbt)) != 0) + goto err; + ret = __fop_write(dbenv, + txn, name, DB_APP_DATA, fhp, 0, buf, dbp->pgsize, 1); + } + if (ret != 0) + goto err; + meta = NULL; + + /* Now build root page. */ + if (name == NULL) { + pgno = 1; + if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &root)) != 0) + goto err; + } else { +#ifdef DIAGNOSTIC + memset(buf, dbp->pgsize, 0); +#endif + root = (PAGE *)buf; + } + + P_INIT(root, dbp->pgsize, 1, PGNO_INVALID, PGNO_INVALID, + LEAFLEVEL, dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE); + LSN_NOT_LOGGED(root->lsn); + + if (name == NULL) + ret = mpf->put(mpf, root, DB_MPOOL_DIRTY); + else { + if ((ret = __db_pgout(dbenv, root->pgno, root, &pdbt)) != 0) + goto err; + ret = __fop_write(dbenv, txn, + name, DB_APP_DATA, fhp, dbp->pgsize, buf, dbp->pgsize, 1); + } + if (ret != 0) + goto err; + root = NULL; + +err: if (name != NULL) + __os_free(dbenv, buf); + else { + if (meta != NULL) + (void)mpf->put(mpf, meta, 0); + if (root != NULL) + (void)mpf->put(mpf, root, 0); + } + return (ret); +} + +/* + * __bam_new_subdb -- + * Create a metadata page and a root page for a new btree. + * + * PUBLIC: int __bam_new_subdb __P((DB *, DB *, DB_TXN *)); + */ +int +__bam_new_subdb(mdbp, dbp, txn) + DB *mdbp, *dbp; + DB_TXN *txn; +{ + BTMETA *meta; + DBC *dbc; + DB_ENV *dbenv; + DB_LOCK metalock; + DB_LSN lsn; + DB_MPOOLFILE *mpf; + PAGE *root; + int ret, t_ret; + + dbenv = mdbp->dbenv; + mpf = mdbp->mpf; + dbc = NULL; + meta = NULL; + root = NULL; + + if ((ret = mdbp->cursor(mdbp, txn, + &dbc, CDB_LOCKING(dbenv) ? DB_WRITECURSOR : 0)) != 0) + return (ret); + + /* Get, and optionally create the metadata page. */ + if ((ret = __db_lget(dbc, + 0, dbp->meta_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + goto err; + if ((ret = mpf->get(mpf, &dbp->meta_pgno, DB_MPOOL_CREATE, &meta)) != 0) + goto err; + + /* Build meta-data page. */ + lsn = meta->dbmeta.lsn; + __bam_init_meta(dbp, meta, dbp->meta_pgno, &lsn); + if ((ret = __db_log_page(mdbp, + txn, &meta->dbmeta.lsn, dbp->meta_pgno, (PAGE *)meta)) != 0) goto err; /* Create and initialize a root page. */ @@ -401,68 +571,35 @@ again: if (meta->dbmeta.magic != 0) { goto err; root->level = LEAFLEVEL; - if (dbp->open_txn != NULL && (ret = __bam_root_log(dbp->dbenv, - dbp->open_txn, &meta->dbmeta.lsn, 0, dbp->log_fileid, + if (DBENV_LOGGING(dbenv) && + (ret = __bam_root_log(mdbp, txn, &meta->dbmeta.lsn, 0, meta->dbmeta.pgno, root->pgno, &meta->dbmeta.lsn)) != 0) goto err; meta->root = root->pgno; - - DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOGMETA, ret, name); - if ((ret = __db_log_page(dbp, - name, &root->lsn, root->pgno, root)) != 0) + if ((ret = + __db_log_page(mdbp, txn, &root->lsn, root->pgno, root)) != 0) goto err; - DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, name); - - t->bt_meta = base_pgno; - t->bt_root = root->pgno; /* Release the metadata and root pages. */ - if ((ret = memp_fput(dbp->mpf, meta, DB_MPOOL_DIRTY)) != 0) + if ((ret = mpf->put(mpf, meta, DB_MPOOL_DIRTY)) != 0) goto err; meta = NULL; - if ((ret = memp_fput(dbp->mpf, root, DB_MPOOL_DIRTY)) != 0) + if ((ret = mpf->put(mpf, root, DB_MPOOL_DIRTY)) != 0) goto err; root = NULL; - - /* - * Flush the metadata and root pages to disk. - * - * !!! - * It's not useful to return not-yet-flushed here -- convert it to - * an error. - */ - if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE) { - __db_err(dbp->dbenv, "Metapage flush failed"); - ret = EINVAL; - } - DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name); - -done: /* - * !!! - * We already did an insert and so the last-page-inserted has been - * set. I'm not sure where the *right* place to clear this value - * is, it's not intuitively obvious that it belongs here. - */ - t->bt_lpgno = PGNO_INVALID; - err: -DB_TEST_RECOVERY_LABEL - /* Put any remaining pages back. */ if (meta != NULL) - if ((t_ret = memp_fput(dbp->mpf, meta, 0)) != 0 && - ret == 0) + if ((t_ret = mpf->put(mpf, meta, 0)) != 0 && ret == 0) ret = t_ret; if (root != NULL) - if ((t_ret = memp_fput(dbp->mpf, root, 0)) != 0 && - ret == 0) + if ((t_ret = mpf->put(mpf, root, 0)) != 0 && ret == 0) + ret = t_ret; + if (LOCK_ISSET(metalock)) + if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + if (dbc != NULL) + if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) ret = t_ret; - - /* We can release the metapage lock when we are done. */ - if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) - ret = t_ret; - - if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; return (ret); } diff --git a/bdb/btree/bt_put.c b/bdb/btree/bt_put.c index 19a04526d1b..39bd2024e76 100644 --- a/bdb/btree/bt_put.c +++ b/bdb/btree/bt_put.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ /* @@ -43,7 +43,7 @@ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_put.c,v 11.46 2001/01/17 18:48:46 bostic Exp $"; +static const char revid[] = "$Id: bt_put.c,v 11.69 2002/08/06 06:11:12 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -53,12 +53,16 @@ static const char revid[] = "$Id: bt_put.c,v 11.46 2001/01/17 18:48:46 bostic Ex #endif #include "db_int.h" -#include "db_page.h" -#include "btree.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +static int __bam_build + __P((DBC *, u_int32_t, DBT *, PAGE *, u_int32_t, u_int32_t)); static int __bam_dup_convert __P((DBC *, PAGE *, u_int32_t)); static int __bam_ovput __P((DBC *, u_int32_t, db_pgno_t, PAGE *, u_int32_t, DBT *)); +static u_int32_t + __bam_partsize __P((DB *, u_int32_t, DBT *, PAGE *, u_int32_t)); /* * __bam_iitem -- @@ -77,6 +81,7 @@ __bam_iitem(dbc, key, data, op, flags) BTREE_CURSOR *cp; DB *dbp; DBT bk_hdr, tdbt; + DB_MPOOLFILE *mpf; PAGE *h; db_indx_t indx; u_int32_t data_size, have_bytes, need_bytes, needed; @@ -85,6 +90,7 @@ __bam_iitem(dbc, key, data, op, flags) COMPQUIET(bk, NULL); dbp = dbc->dbp; + mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; t = dbp->bt_internal; h = cp->page; @@ -95,7 +101,7 @@ __bam_iitem(dbc, key, data, op, flags) * Fixed-length records with partial puts: it's an error to specify * anything other simple overwrite. */ - if (F_ISSET(dbp, DB_RE_FIXEDLEN) && + if (F_ISSET(dbp, DB_AM_FIXEDLEN) && F_ISSET(data, DB_DBT_PARTIAL) && data->dlen != data->size) { data_size = data->size; goto len_err; @@ -110,16 +116,18 @@ __bam_iitem(dbc, key, data, op, flags) * the fixed-length record size. */ data_size = F_ISSET(data, DB_DBT_PARTIAL) ? - __bam_partsize(op, data, h, indx) : data->size; + __bam_partsize(dbp, op, data, h, indx) : data->size; padrec = 0; - if (F_ISSET(dbp, DB_RE_FIXEDLEN)) { + if (F_ISSET(dbp, DB_AM_FIXEDLEN)) { if (data_size > t->re_len) { len_err: __db_err(dbp->dbenv, "Length improper for fixed length record %lu", (u_long)data_size); return (EINVAL); } - if (data_size < t->re_len) { + + /* Records that are deleted anyway needn't be padded out. */ + if (!LF_ISSET(BI_DELETED) && data_size < t->re_len) { padrec = 1; data_size = t->re_len; } @@ -146,8 +154,8 @@ len_err: __db_err(dbp->dbenv, */ if (op == DB_CURRENT && dbp->dup_compare != NULL) { if ((ret = __bam_cmp(dbp, data, h, - indx + (TYPE(h) == P_LBTREE ? O_INDX : 0), - dbp->dup_compare, &cmp)) != 0) + indx + (TYPE(h) == P_LBTREE ? O_INDX : 0), + dbp->dup_compare, &cmp)) != 0) return (ret); if (cmp != 0) { __db_err(dbp->dbenv, @@ -190,7 +198,7 @@ len_err: __db_err(dbp->dbenv, */ bigkey = 0; if (op == DB_CURRENT) { - bk = GET_BKEYDATA(h, + bk = GET_BKEYDATA(dbp, h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); if (B_TYPE(bk->type) == B_KEYDATA) have_bytes = BKEYDATA_PSIZE(bk->len); @@ -221,7 +229,7 @@ len_err: __db_err(dbp->dbenv, * The t->bt_maxkey test here may be insufficient -- do we have to * check in the btree split code, so we don't undo it there!?!? */ - if (P_FREESPACE(h) < needed || + if (P_FREESPACE(dbp, h) < needed || (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey)) return (DB_NEEDSPLIT); @@ -328,6 +336,11 @@ len_err: __db_err(dbp->dbenv, /* Add the data. */ if (bigdata) { + /* + * We do not have to handle deleted (BI_DELETED) records + * in this case; the actual records should never be created. + */ + DB_ASSERT(!LF_ISSET(BI_DELETED)); if ((ret = __bam_ovput(dbc, B_OVERFLOW, PGNO_INVALID, h, indx, data)) != 0) return (ret); @@ -347,7 +360,7 @@ len_err: __db_err(dbp->dbenv, if (ret != 0) return (ret); } - if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0) + if ((ret = mpf->set(mpf, h, DB_MPOOL_DIRTY)) != 0) return (ret); /* @@ -375,7 +388,7 @@ len_err: __db_err(dbp->dbenv, * up at least 25% of the space on the page. If it does, move it onto * its own page. */ - if (dupadjust && P_FREESPACE(h) <= dbp->pgsize / 2) { + if (dupadjust && P_FREESPACE(dbp, h) <= dbp->pgsize / 2) { if ((ret = __bam_dup_convert(dbc, h, indx - O_INDX)) != 0) return (ret); } @@ -390,11 +403,10 @@ len_err: __db_err(dbp->dbenv, /* * __bam_partsize -- * Figure out how much space a partial data item is in total. - * - * PUBLIC: u_int32_t __bam_partsize __P((u_int32_t, DBT *, PAGE *, u_int32_t)); */ -u_int32_t -__bam_partsize(op, data, h, indx) +static u_int32_t +__bam_partsize(dbp, op, data, h, indx) + DB *dbp; u_int32_t op, indx; DBT *data; PAGE *h; @@ -413,38 +425,18 @@ __bam_partsize(op, data, h, indx) * Otherwise, it's the data provided plus any already existing data * that we're not replacing. */ - bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); + bk = GET_BKEYDATA(dbp, h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); nbytes = B_TYPE(bk->type) == B_OVERFLOW ? ((BOVERFLOW *)bk)->tlen : bk->len; - /* - * There are really two cases here: - * - * Case 1: We are replacing some bytes that do not exist (i.e., they - * are past the end of the record). In this case the number of bytes - * we are replacing is irrelevant and all we care about is how many - * bytes we are going to add from offset. So, the new record length - * is going to be the size of the new bytes (size) plus wherever those - * new bytes begin (doff). - * - * Case 2: All the bytes we are replacing exist. Therefore, the new - * size is the oldsize (nbytes) minus the bytes we are replacing (dlen) - * plus the bytes we are adding (size). - */ - if (nbytes < data->doff + data->dlen) /* Case 1 */ - return (data->doff + data->size); - - return (nbytes + data->size - data->dlen); /* Case 2 */ + return (__db_partsize(nbytes, data)); } /* * __bam_build -- * Build the real record for a partial put, or short fixed-length record. - * - * PUBLIC: int __bam_build __P((DBC *, u_int32_t, - * PUBLIC: DBT *, PAGE *, u_int32_t, u_int32_t)); */ -int +static int __bam_build(dbc, op, dbt, h, indx, nbytes) DBC *dbc; u_int32_t op, indx, nbytes; @@ -454,9 +446,8 @@ __bam_build(dbc, op, dbt, h, indx, nbytes) BKEYDATA *bk, tbk; BOVERFLOW *bo; BTREE *t; - BTREE_CURSOR *cp; DB *dbp; - DBT copy; + DBT copy, *rdata; u_int32_t len, tlen; u_int8_t *p; int ret; @@ -464,26 +455,26 @@ __bam_build(dbc, op, dbt, h, indx, nbytes) COMPQUIET(bo, NULL); dbp = dbc->dbp; - cp = (BTREE_CURSOR *) dbc->internal; t = dbp->bt_internal; /* We use the record data return memory, it's only a short-term use. */ - if (dbc->rdata.ulen < nbytes) { + rdata = &dbc->my_rdata; + if (rdata->ulen < nbytes) { if ((ret = __os_realloc(dbp->dbenv, - nbytes, NULL, &dbc->rdata.data)) != 0) { - dbc->rdata.ulen = 0; - dbc->rdata.data = NULL; + nbytes, &rdata->data)) != 0) { + rdata->ulen = 0; + rdata->data = NULL; return (ret); } - dbc->rdata.ulen = nbytes; + rdata->ulen = nbytes; } /* * We use nul or pad bytes for any part of the record that isn't * specified; get it over with. */ - memset(dbc->rdata.data, - F_ISSET(dbp, DB_RE_FIXEDLEN) ? t->re_pad : 0, nbytes); + memset(rdata->data, + F_ISSET(dbp, DB_AM_FIXEDLEN) ? t->re_pad : 0, nbytes); /* * In the next clauses, we need to do three things: a) set p to point @@ -495,14 +486,15 @@ __bam_build(dbc, op, dbt, h, indx, nbytes) * the chase. */ if (!F_ISSET(dbt, DB_DBT_PARTIAL) || op != DB_CURRENT) { - p = (u_int8_t *)dbc->rdata.data + dbt->doff; + p = (u_int8_t *)rdata->data + dbt->doff; tlen = dbt->doff; goto user_copy; } /* Find the current record. */ if (indx < NUM_ENT(h)) { - bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); + bk = GET_BKEYDATA(dbp, h, indx + (TYPE(h) == P_LBTREE ? + O_INDX : 0)); bo = (BOVERFLOW *)bk; } else { bk = &tbk; @@ -516,12 +508,12 @@ __bam_build(dbc, op, dbt, h, indx, nbytes) */ memset(©, 0, sizeof(copy)); if ((ret = __db_goff(dbp, ©, bo->tlen, - bo->pgno, &dbc->rdata.data, &dbc->rdata.ulen)) != 0) + bo->pgno, &rdata->data, &rdata->ulen)) != 0) return (ret); /* Skip any leading data from the original record. */ tlen = dbt->doff; - p = (u_int8_t *)dbc->rdata.data + dbt->doff; + p = (u_int8_t *)rdata->data + dbt->doff; /* * Copy in any trailing data from the original record. @@ -542,10 +534,10 @@ __bam_build(dbc, op, dbt, h, indx, nbytes) } } else { /* Copy in any leading data from the original record. */ - memcpy(dbc->rdata.data, + memcpy(rdata->data, bk->data, dbt->doff > bk->len ? bk->len : dbt->doff); tlen = dbt->doff; - p = (u_int8_t *)dbc->rdata.data + dbt->doff; + p = (u_int8_t *)rdata->data + dbt->doff; /* Copy in any trailing data from the original record. */ len = dbt->doff + dbt->dlen; @@ -564,11 +556,11 @@ user_copy: tlen += dbt->size; /* Set the DBT to reference our new record. */ - dbc->rdata.size = F_ISSET(dbp, DB_RE_FIXEDLEN) ? t->re_len : tlen; - dbc->rdata.dlen = 0; - dbc->rdata.doff = 0; - dbc->rdata.flags = 0; - *dbt = dbc->rdata; + rdata->size = F_ISSET(dbp, DB_AM_FIXEDLEN) ? t->re_len : tlen; + rdata->dlen = 0; + rdata->doff = 0; + rdata->flags = 0; + *dbt = *rdata; return (0); } @@ -591,6 +583,7 @@ __bam_ritem(dbc, h, indx, data) db_indx_t cnt, lo, ln, min, off, prefix, suffix; int32_t nbytes; int ret; + db_indx_t *inp; u_int8_t *p, *t; dbp = dbc->dbp; @@ -600,10 +593,10 @@ __bam_ritem(dbc, h, indx, data) * to insert and whether it fits is handled in the caller. All we do * here is manage the page shuffling. */ - bk = GET_BKEYDATA(h, indx); + bk = GET_BKEYDATA(dbp, h, indx); /* Log the change. */ - if (DB_LOGGING(dbc)) { + if (DBC_LOGGING(dbc)) { /* * We might as well check to see if the two data items share * a common prefix and suffix -- it can save us a lot of log @@ -627,17 +620,18 @@ __bam_ritem(dbc, h, indx, data) orig.size = bk->len - (prefix + suffix); repl.data = (u_int8_t *)data->data + prefix; repl.size = data->size - (prefix + suffix); - if ((ret = __bam_repl_log(dbp->dbenv, dbc->txn, - &LSN(h), 0, dbp->log_fileid, PGNO(h), &LSN(h), - (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type), + if ((ret = __bam_repl_log(dbp, dbc->txn, &LSN(h), 0, PGNO(h), + &LSN(h), (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type), &orig, &repl, (u_int32_t)prefix, (u_int32_t)suffix)) != 0) return (ret); - } + } else + LSN_NOT_LOGGED(LSN(h)); /* * Set references to the first in-use byte on the page and the * first byte of the item being replaced. */ + inp = P_INP(dbp, h); p = (u_int8_t *)h + HOFFSET(h); t = (u_int8_t *)bk; @@ -648,19 +642,19 @@ __bam_ritem(dbc, h, indx, data) * the regions overlap. */ lo = BKEYDATA_SIZE(bk->len); - ln = BKEYDATA_SIZE(data->size); + ln = (db_indx_t)BKEYDATA_SIZE(data->size); if (lo != ln) { nbytes = lo - ln; /* Signed difference. */ if (p == t) /* First index is fast. */ - h->inp[indx] += nbytes; + inp[indx] += nbytes; else { /* Else, shift the page. */ memmove(p + nbytes, p, t - p); /* Adjust the indices' offsets. */ - off = h->inp[indx]; + off = inp[indx]; for (cnt = 0; cnt < NUM_ENT(h); ++cnt) - if (h->inp[cnt] <= off) - h->inp[cnt] += nbytes; + if (inp[cnt] <= off) + inp[cnt] += nbytes; } /* Clean up the page and adjust the item's reference. */ @@ -688,30 +682,31 @@ __bam_dup_convert(dbc, h, indx) PAGE *h; u_int32_t indx; { - BTREE_CURSOR *cp; BKEYDATA *bk; DB *dbp; DBT hdr; + DB_MPOOLFILE *mpf; PAGE *dp; - db_indx_t cnt, cpindx, dindx, first, sz; + db_indx_t cnt, cpindx, dindx, first, *inp, sz; int ret; dbp = dbc->dbp; - cp = (BTREE_CURSOR *)dbc->internal; + mpf = dbp->mpf; + inp = P_INP(dbp, h); /* * Count the duplicate records and calculate how much room they're * using on the page. */ - while (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX]) + while (indx > 0 && inp[indx] == inp[indx - P_INDX]) indx -= P_INDX; for (cnt = 0, sz = 0, first = indx;; ++cnt, indx += P_INDX) { - if (indx >= NUM_ENT(h) || h->inp[first] != h->inp[indx]) + if (indx >= NUM_ENT(h) || inp[first] != inp[indx]) break; - bk = GET_BKEYDATA(h, indx); + bk = GET_BKEYDATA(dbp, h, indx); sz += B_TYPE(bk->type) == B_KEYDATA ? BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE; - bk = GET_BKEYDATA(h, indx + O_INDX); + bk = GET_BKEYDATA(dbp, h, indx + O_INDX); sz += B_TYPE(bk->type) == B_KEYDATA ? BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE; } @@ -766,7 +761,7 @@ __bam_dup_convert(dbc, h, indx) * deleted entries are discarded (if the deleted entry is * overflow, then free up those pages). */ - bk = GET_BKEYDATA(h, dindx + 1); + bk = GET_BKEYDATA(dbp, h, dindx + 1); hdr.data = bk; hdr.size = B_TYPE(bk->type) == B_KEYDATA ? BKEYDATA_SIZE(bk->len) : BOVERFLOW_SIZE; @@ -778,7 +773,7 @@ __bam_dup_convert(dbc, h, indx) */ if (B_TYPE(bk->type) == B_OVERFLOW && (ret = __db_doff(dbc, - (GET_BOVERFLOW(h, dindx + 1))->pgno)) != 0) + (GET_BOVERFLOW(dbp, h, dindx + 1))->pgno)) != 0) goto err; } else { if ((ret = __db_pitem( @@ -802,7 +797,7 @@ __bam_dup_convert(dbc, h, indx) /* Put in a new data item that points to the duplicates page. */ if ((ret = __bam_ovput(dbc, - B_DUPLICATE, dp->pgno, h, first + 1, NULL)) != 0) + B_DUPLICATE, dp->pgno, h, first + 1, NULL)) != 0) goto err; /* Adjust cursors for all the above movments. */ @@ -810,9 +805,9 @@ __bam_dup_convert(dbc, h, indx) PGNO(h), first + P_INDX, first + P_INDX - indx)) != 0) goto err; - return (memp_fput(dbp->mpf, dp, DB_MPOOL_DIRTY)); + return (mpf->put(mpf, dp, DB_MPOOL_DIRTY)); -err: (void)__db_free(dbc, dp); +err: (void)mpf->put(mpf, dp, 0); return (ret); } diff --git a/bdb/btree/bt_rec.c b/bdb/btree/bt_rec.c index 24dc9bc6a6e..b6443547aa5 100644 --- a/bdb/btree/bt_rec.c +++ b/bdb/btree/bt_rec.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_rec.c,v 11.35 2001/01/10 16:24:47 ubell Exp $"; +static const char revid[] = "$Id: bt_rec.c,v 11.57 2002/08/06 16:53:53 ubell Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -18,287 +18,17 @@ static const char revid[] = "$Id: bt_rec.c,v 11.35 2001/01/10 16:24:47 ubell Exp #endif #include "db_int.h" -#include "db_page.h" -#include "hash.h" -#include "btree.h" -#include "log.h" +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" #define IS_BTREE_PAGE(pagep) \ (TYPE(pagep) == P_IBTREE || \ TYPE(pagep) == P_LBTREE || TYPE(pagep) == P_LDUP) /* - * __bam_pg_alloc_recover -- - * Recovery function for pg_alloc. - * - * PUBLIC: int __bam_pg_alloc_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__bam_pg_alloc_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __bam_pg_alloc_args *argp; - DB *file_dbp; - DBC *dbc; - DBMETA *meta; - DB_MPOOLFILE *mpf; - PAGE *pagep; - db_pgno_t pgno; - int cmp_n, cmp_p, level, modified, ret; - - REC_PRINT(__bam_pg_alloc_print); - REC_INTRO(__bam_pg_alloc_read, 0); - - /* - * Fix up the allocated page. If we're redoing the operation, we have - * to get the page (creating it if it doesn't exist), and update its - * LSN. If we're undoing the operation, we have to reset the page's - * LSN and put it on the free list. - * - * Fix up the metadata page. If we're redoing the operation, we have - * to get the metadata page and update its LSN and its free pointer. - * If we're undoing the operation and the page was ever created, we put - * it on the freelist. - */ - pgno = PGNO_BASE_MD; - meta = NULL; - if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) { - /* The metadata page must always exist on redo. */ - if (DB_REDO(op)) { - (void)__db_pgerr(file_dbp, pgno); - goto out; - } else - goto done; - } - if ((ret = memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) { - /* - * We specify creation and check for it later, because this - * operation was supposed to create the page, and even in - * the undo case it's going to get linked onto the freelist - * which we're also fixing up. - */ - (void)__db_pgerr(file_dbp, argp->pgno); - goto err; - } - - /* Fix up the allocated page. */ - modified = 0; - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->page_lsn); - - /* - * If an inital allocation is aborted and then reallocated - * during an archival restore the log record will have - * an LSN for the page but the page will be empty. - */ - if (IS_ZERO_LSN(LSN(pagep))) - cmp_p = 0; - CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->page_lsn); - /* - * If we we rolled back this allocation previously during an - * archive restore, the page may have the LSN of the meta page - * at the point of the roll back. This will be no more - * than the LSN of the metadata page at the time of this allocation. - */ - if (DB_REDO(op) && - (cmp_p == 0 || - (IS_ZERO_LSN(argp->page_lsn) && - log_compare(&LSN(pagep), &argp->meta_lsn) <= 0))) { - /* Need to redo update described. */ - switch (argp->ptype) { - case P_LBTREE: - case P_LRECNO: - case P_LDUP: - level = LEAFLEVEL; - break; - default: - level = 0; - break; - } - P_INIT(pagep, file_dbp->pgsize, - argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype); - - pagep->lsn = *lsnp; - modified = 1; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* - * Undo the allocation, reinitialize the page and - * link its next pointer to the free list. - */ - P_INIT(pagep, file_dbp->pgsize, - argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID); - - pagep->lsn = argp->page_lsn; - modified = 1; - } - - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) { - goto err; - } - - /* - * If the page was newly created, put it on the limbo list. - */ - if (IS_ZERO_LSN(LSN(pagep)) && - IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) { - /* Put the page in limbo.*/ - if ((ret = __db_add_limbo(dbenv, - info, argp->fileid, argp->pgno, 1)) != 0) - goto err; - } - - /* Fix up the metadata page. */ - modified = 0; - cmp_n = log_compare(lsnp, &LSN(meta)); - cmp_p = log_compare(&LSN(meta), &argp->meta_lsn); - CHECK_LSN(op, cmp_p, &LSN(meta), &argp->meta_lsn); - if (cmp_p == 0 && DB_REDO(op)) { - /* Need to redo update described. */ - LSN(meta) = *lsnp; - meta->free = argp->next; - modified = 1; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Need to undo update described. */ - LSN(meta) = argp->meta_lsn; - - /* - * If the page has a zero LSN then its newly created - * and will go into limbo rather than directly on the - * free list. - */ - if (!IS_ZERO_LSN(argp->page_lsn)) - meta->free = argp->pgno; - modified = 1; - } - if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) - goto out; - /* - * This could be the metapage from a subdb which is read from disk - * to recover its creation. - */ - if (F_ISSET(file_dbp, DB_AM_SUBDB)) - switch (argp->type) { - case P_BTREEMETA: - case P_HASHMETA: - case P_QAMMETA: - file_dbp->sync(file_dbp, 0); - break; - } - -done: *lsnp = argp->prev_lsn; - ret = 0; - - if (0) { -err: - if (meta != NULL) - (void)memp_fput(mpf, meta, 0); - } -out: REC_CLOSE; -} - -/* - * __bam_pg_free_recover -- - * Recovery function for pg_free. - * - * PUBLIC: int __bam_pg_free_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__bam_pg_free_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __bam_pg_free_args *argp; - DB *file_dbp; - DBC *dbc; - DBMETA *meta; - DB_LSN copy_lsn; - DB_MPOOLFILE *mpf; - PAGE *pagep; - db_pgno_t pgno; - int cmp_n, cmp_p, modified, ret; - - COMPQUIET(info, NULL); - REC_PRINT(__bam_pg_free_print); - REC_INTRO(__bam_pg_free_read, 1); - - /* - * Fix up the freed page. If we're redoing the operation we get the - * page and explicitly discard its contents, then update its LSN. If - * we're undoing the operation, we get the page and restore its header. - * Create the page if necessary, we may be freeing an aborted - * create. - */ - if ((ret = memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; - modified = 0; - __ua_memcpy(©_lsn, &LSN(argp->header.data), sizeof(DB_LSN)); - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), ©_lsn); - CHECK_LSN(op, cmp_p, &LSN(pagep), ©_lsn); - if (DB_REDO(op) && - (cmp_p == 0 || - (IS_ZERO_LSN(copy_lsn) && - log_compare(&LSN(pagep), &argp->meta_lsn) <= 0))) { - /* Need to redo update described. */ - P_INIT(pagep, file_dbp->pgsize, - argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID); - pagep->lsn = *lsnp; - - modified = 1; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Need to undo update described. */ - memcpy(pagep, argp->header.data, argp->header.size); - - modified = 1; - } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) - goto out; - - /* - * Fix up the metadata page. If we're redoing or undoing the operation - * we get the page and update its LSN and free pointer. - */ - pgno = PGNO_BASE_MD; - if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) { - /* The metadata page must always exist. */ - (void)__db_pgerr(file_dbp, pgno); - goto out; - } - - modified = 0; - cmp_n = log_compare(lsnp, &LSN(meta)); - cmp_p = log_compare(&LSN(meta), &argp->meta_lsn); - CHECK_LSN(op, cmp_p, &LSN(meta), &argp->meta_lsn); - if (cmp_p == 0 && DB_REDO(op)) { - /* Need to redo the deallocation. */ - meta->free = argp->pgno; - LSN(meta) = *lsnp; - modified = 1; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Need to undo the deallocation. */ - meta->free = argp->next; - LSN(meta) = argp->meta_lsn; - modified = 1; - } - if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) - goto out; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: REC_CLOSE; -} - -/* * __bam_split_recover -- * Recovery function for split. * @@ -320,7 +50,7 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info) PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp; db_pgno_t pgno, root_pgno; u_int32_t ptype; - int cmp, l_update, p_update, r_update, rc, ret, rootsplit, t_ret; + int cmp, l_update, p_update, r_update, rc, ret, ret_l, rootsplit, t_ret; COMPQUIET(info, NULL); REC_PRINT(__bam_split_print); @@ -345,16 +75,16 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info) * so it's got to be aligned. Copying it into allocated memory is * the only way to guarantee this. */ - if ((ret = __os_malloc(dbenv, argp->pg.size, NULL, &sp)) != 0) + if ((ret = __os_malloc(dbenv, argp->pg.size, &sp)) != 0) goto out; memcpy(sp, argp->pg.data, argp->pg.size); pgno = PGNO(sp); root_pgno = argp->root_pgno; - rootsplit = pgno == root_pgno; - if (memp_fget(mpf, &argp->left, 0, &lp) != 0) + rootsplit = root_pgno != PGNO_INVALID; + if ((ret_l = mpf->get(mpf, &argp->left, 0, &lp)) != 0) lp = NULL; - if (memp_fget(mpf, &argp->right, 0, &rp) != 0) + if (mpf->get(mpf, &argp->right, 0, &rp) != 0) rp = NULL; if (DB_REDO(op)) { @@ -368,8 +98,8 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info) * same reason. */ if (rootsplit) { - if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) { - (void)__db_pgerr(file_dbp, pgno); + if ((ret = mpf->get(mpf, &pgno, 0, &pp)) != 0) { + __db_pgerr(file_dbp, pgno, ret); pp = NULL; goto out; } @@ -377,7 +107,7 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info) CHECK_LSN(op, cmp, &LSN(pp), &LSN(argp->pg.data)); p_update = cmp == 0; } else if (lp == NULL) { - (void)__db_pgerr(file_dbp, argp->left); + __db_pgerr(file_dbp, argp->left, ret_l); goto out; } @@ -400,10 +130,8 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info) goto check_next; /* Allocate and initialize new left/right child pages. */ - if ((ret = - __os_malloc(dbenv, file_dbp->pgsize, NULL, &_lp)) != 0 - || (ret = - __os_malloc(dbenv, file_dbp->pgsize, NULL, &_rp)) != 0) + if ((ret = __os_malloc(dbenv, file_dbp->pgsize, &_lp)) != 0 || + (ret = __os_malloc(dbenv, file_dbp->pgsize, &_rp)) != 0) goto out; if (rootsplit) { P_INIT(_lp, file_dbp->pgsize, argp->left, @@ -431,31 +159,31 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info) goto out; /* If the left child is wrong, update it. */ - if (lp == NULL && (ret = - memp_fget(mpf, &argp->left, DB_MPOOL_CREATE, &lp)) != 0) { - (void)__db_pgerr(file_dbp, argp->left); + if (lp == NULL && (ret = mpf->get( + mpf, &argp->left, DB_MPOOL_CREATE, &lp)) != 0) { + __db_pgerr(file_dbp, argp->left, ret); lp = NULL; goto out; } if (l_update) { memcpy(lp, _lp, file_dbp->pgsize); lp->lsn = *lsnp; - if ((ret = memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0) + if ((ret = mpf->put(mpf, lp, DB_MPOOL_DIRTY)) != 0) goto out; lp = NULL; } /* If the right child is wrong, update it. */ - if (rp == NULL && (ret = memp_fget(mpf, - &argp->right, DB_MPOOL_CREATE, &rp)) != 0) { - (void)__db_pgerr(file_dbp, argp->right); + if (rp == NULL && (ret = mpf->get( + mpf, &argp->right, DB_MPOOL_CREATE, &rp)) != 0) { + __db_pgerr(file_dbp, argp->right, ret); rp = NULL; goto out; } if (r_update) { memcpy(rp, _rp, file_dbp->pgsize); rp->lsn = *lsnp; - if ((ret = memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0) + if ((ret = mpf->put(mpf, rp, DB_MPOOL_DIRTY)) != 0) goto out; rp = NULL; } @@ -477,11 +205,11 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info) P_INIT(pp, file_dbp->pgsize, root_pgno, PGNO_INVALID, PGNO_INVALID, _lp->level + 1, ptype); - RE_NREC_SET(pp, - rc ? __bam_total(_lp) + __bam_total(_rp) : 0); + RE_NREC_SET(pp, rc ? __bam_total(file_dbp, _lp) + + __bam_total(file_dbp, _rp) : 0); pp->lsn = *lsnp; - if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0) + if ((ret = mpf->put(mpf, pp, DB_MPOOL_DIRTY)) != 0) goto out; pp = NULL; } @@ -494,8 +222,8 @@ check_next: /* * page must exist because we're redoing the operation. */ if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) { - if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) { - (void)__db_pgerr(file_dbp, argp->npgno); + if ((ret = mpf->get(mpf, &argp->npgno, 0, &np)) != 0) { + __db_pgerr(file_dbp, argp->npgno, ret); np = NULL; goto out; } @@ -505,7 +233,7 @@ check_next: /* PREV_PGNO(np) = argp->right; np->lsn = *lsnp; if ((ret = - memp_fput(mpf, np, DB_MPOOL_DIRTY)) != 0) + mpf->put(mpf, np, DB_MPOOL_DIRTY)) != 0) goto out; np = NULL; } @@ -518,13 +246,13 @@ check_next: /* * the adds onto the page that caused the split, and there's * really no undo-ing to be done. */ - if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) { + if ((ret = mpf->get(mpf, &pgno, 0, &pp)) != 0) { pp = NULL; goto lrundo; } if (log_compare(lsnp, &LSN(pp)) == 0) { memcpy(pp, argp->pg.data, argp->pg.size); - if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0) + if ((ret = mpf->put(mpf, pp, DB_MPOOL_DIRTY)) != 0) goto out; pp = NULL; } @@ -542,7 +270,7 @@ lrundo: if ((rootsplit && lp != NULL) || rp != NULL) { log_compare(lsnp, &LSN(lp)) == 0) { lp->lsn = argp->llsn; if ((ret = - memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0) + mpf->put(mpf, lp, DB_MPOOL_DIRTY)) != 0) goto out; lp = NULL; } @@ -550,7 +278,7 @@ lrundo: if ((rootsplit && lp != NULL) || rp != NULL) { log_compare(lsnp, &LSN(rp)) == 0) { rp->lsn = argp->rlsn; if ((ret = - memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0) + mpf->put(mpf, rp, DB_MPOOL_DIRTY)) != 0) goto out; rp = NULL; } @@ -565,14 +293,14 @@ lrundo: if ((rootsplit && lp != NULL) || rp != NULL) { * if there's nothing to undo. */ if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) { - if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) { + if ((ret = mpf->get(mpf, &argp->npgno, 0, &np)) != 0) { np = NULL; goto done; } if (log_compare(lsnp, &LSN(np)) == 0) { PREV_PGNO(np) = argp->left; np->lsn = argp->nlsn; - if (memp_fput(mpf, np, DB_MPOOL_DIRTY)) + if (mpf->put(mpf, np, DB_MPOOL_DIRTY)) goto out; np = NULL; } @@ -583,22 +311,22 @@ done: *lsnp = argp->prev_lsn; ret = 0; out: /* Free any pages that weren't dirtied. */ - if (pp != NULL && (t_ret = memp_fput(mpf, pp, 0)) != 0 && ret == 0) + if (pp != NULL && (t_ret = mpf->put(mpf, pp, 0)) != 0 && ret == 0) ret = t_ret; - if (lp != NULL && (t_ret = memp_fput(mpf, lp, 0)) != 0 && ret == 0) + if (lp != NULL && (t_ret = mpf->put(mpf, lp, 0)) != 0 && ret == 0) ret = t_ret; - if (np != NULL && (t_ret = memp_fput(mpf, np, 0)) != 0 && ret == 0) + if (np != NULL && (t_ret = mpf->put(mpf, np, 0)) != 0 && ret == 0) ret = t_ret; - if (rp != NULL && (t_ret = memp_fput(mpf, rp, 0)) != 0 && ret == 0) + if (rp != NULL && (t_ret = mpf->put(mpf, rp, 0)) != 0 && ret == 0) ret = t_ret; /* Free any allocated space. */ if (_lp != NULL) - __os_free(_lp, file_dbp->pgsize); + __os_free(dbenv, _lp); if (_rp != NULL) - __os_free(_rp, file_dbp->pgsize); + __os_free(dbenv, _rp); if (sp != NULL) - __os_free(sp, argp->pg.size); + __os_free(dbenv, sp); REC_CLOSE; } @@ -627,23 +355,24 @@ __bam_rsplit_recover(dbenv, dbtp, lsnp, op, info) db_pgno_t pgno, root_pgno; int cmp_n, cmp_p, modified, ret; + pagep = NULL; COMPQUIET(info, NULL); REC_PRINT(__bam_rsplit_print); REC_INTRO(__bam_rsplit_read, 1); /* Fix the root page. */ pgno = root_pgno = argp->root_pgno; - if ((ret = memp_fget(mpf, &pgno, 0, &pagep)) != 0) { + if ((ret = mpf->get(mpf, &pgno, 0, &pagep)) != 0) { /* The root page must always exist if we are going forward. */ if (DB_REDO(op)) { - __db_pgerr(file_dbp, pgno); + __db_pgerr(file_dbp, pgno, ret); goto out; } /* This must be the root of an OPD tree. */ DB_ASSERT(root_pgno != ((BTREE *)file_dbp->bt_internal)->bt_root); ret = 0; - goto done; + goto do_page; } modified = 0; cmp_n = log_compare(lsnp, &LSN(pagep)); @@ -666,22 +395,23 @@ __bam_rsplit_recover(dbenv, dbtp, lsnp, op, info) pagep->lsn = argp->rootlsn; modified = 1; } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) + if ((ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) goto out; +do_page: /* * Fix the page copied over the root page. It's possible that the * page never made it to disk, so if we're undo-ing and the page * doesn't exist, it's okay and there's nothing further to do. */ - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) { if (DB_UNDO(op)) goto done; - (void)__db_pgerr(file_dbp, argp->pgno); + __db_pgerr(file_dbp, argp->pgno, ret); goto out; } modified = 0; - __ua_memcpy(©_lsn, &LSN(argp->pgdbt.data), sizeof(DB_LSN)); + (void)__ua_memcpy(©_lsn, &LSN(argp->pgdbt.data), sizeof(DB_LSN)); cmp_n = log_compare(lsnp, &LSN(pagep)); cmp_p = log_compare(&LSN(pagep), ©_lsn); CHECK_LSN(op, cmp_p, &LSN(pagep), ©_lsn); @@ -694,13 +424,16 @@ __bam_rsplit_recover(dbenv, dbtp, lsnp, op, info) memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size); modified = 1; } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) + if ((ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) goto out; + pagep = NULL; done: *lsnp = argp->prev_lsn; ret = 0; -out: REC_CLOSE; +out: if (pagep != NULL) + (void)mpf->put(mpf, pagep, 0); + REC_CLOSE; } /* @@ -725,15 +458,16 @@ __bam_adj_recover(dbenv, dbtp, lsnp, op, info) PAGE *pagep; int cmp_n, cmp_p, modified, ret; + pagep = NULL; COMPQUIET(info, NULL); REC_PRINT(__bam_adj_print); REC_INTRO(__bam_adj_read, 1); /* Get the page; if it never existed and we're undoing, we're done. */ - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) { if (DB_UNDO(op)) goto done; - (void)__db_pgerr(file_dbp, argp->pgno); + __db_pgerr(file_dbp, argp->pgno, ret); goto out; } @@ -745,7 +479,7 @@ __bam_adj_recover(dbenv, dbtp, lsnp, op, info) /* Need to redo update described. */ if ((ret = __bam_adjindx(dbc, pagep, argp->indx, argp->indx_copy, argp->is_insert)) != 0) - goto err; + goto out; LSN(pagep) = *lsnp; modified = 1; @@ -753,21 +487,21 @@ __bam_adj_recover(dbenv, dbtp, lsnp, op, info) /* Need to undo update described. */ if ((ret = __bam_adjindx(dbc, pagep, argp->indx, argp->indx_copy, !argp->is_insert)) != 0) - goto err; + goto out; LSN(pagep) = argp->lsn; modified = 1; } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) + if ((ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) goto out; + pagep = NULL; done: *lsnp = argp->prev_lsn; ret = 0; - if (0) { -err: (void)memp_fput(mpf, pagep, 0); - } -out: REC_CLOSE; +out: if (pagep != NULL) + (void)mpf->put(mpf, pagep, 0); + REC_CLOSE; } /* @@ -793,15 +527,16 @@ __bam_cadjust_recover(dbenv, dbtp, lsnp, op, info) PAGE *pagep; int cmp_n, cmp_p, modified, ret; + pagep = NULL; COMPQUIET(info, NULL); REC_PRINT(__bam_cadjust_print); REC_INTRO(__bam_cadjust_read, 1); /* Get the page; if it never existed and we're undoing, we're done. */ - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) { if (DB_UNDO(op)) goto done; - (void)__db_pgerr(file_dbp, argp->pgno); + __db_pgerr(file_dbp, argp->pgno, ret); goto out; } @@ -812,11 +547,13 @@ __bam_cadjust_recover(dbenv, dbtp, lsnp, op, info) if (cmp_p == 0 && DB_REDO(op)) { /* Need to redo update described. */ if (IS_BTREE_PAGE(pagep)) { - GET_BINTERNAL(pagep, argp->indx)->nrecs += argp->adjust; + GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs += + argp->adjust; if (argp->opflags & CAD_UPDATEROOT) RE_NREC_ADJ(pagep, argp->adjust); } else { - GET_RINTERNAL(pagep, argp->indx)->nrecs += argp->adjust; + GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs += + argp->adjust; if (argp->opflags & CAD_UPDATEROOT) RE_NREC_ADJ(pagep, argp->adjust); } @@ -826,24 +563,29 @@ __bam_cadjust_recover(dbenv, dbtp, lsnp, op, info) } else if (cmp_n == 0 && DB_UNDO(op)) { /* Need to undo update described. */ if (IS_BTREE_PAGE(pagep)) { - GET_BINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust; + GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs -= + argp->adjust; if (argp->opflags & CAD_UPDATEROOT) RE_NREC_ADJ(pagep, -(argp->adjust)); } else { - GET_RINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust; + GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs -= + argp->adjust; if (argp->opflags & CAD_UPDATEROOT) RE_NREC_ADJ(pagep, -(argp->adjust)); } LSN(pagep) = argp->lsn; modified = 1; } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) + if ((ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) goto out; + pagep = NULL; done: *lsnp = argp->prev_lsn; ret = 0; -out: REC_CLOSE; +out: if (pagep != NULL) + (void)mpf->put(mpf, pagep, 0); + REC_CLOSE; } /* @@ -869,15 +611,16 @@ __bam_cdel_recover(dbenv, dbtp, lsnp, op, info) u_int32_t indx; int cmp_n, cmp_p, modified, ret; + pagep = NULL; COMPQUIET(info, NULL); REC_PRINT(__bam_cdel_print); REC_INTRO(__bam_cdel_read, 1); /* Get the page; if it never existed and we're undoing, we're done. */ - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) { if (DB_UNDO(op)) goto done; - (void)__db_pgerr(file_dbp, argp->pgno); + __db_pgerr(file_dbp, argp->pgno, ret); goto out; } @@ -888,27 +631,30 @@ __bam_cdel_recover(dbenv, dbtp, lsnp, op, info) if (cmp_p == 0 && DB_REDO(op)) { /* Need to redo update described. */ indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0); - B_DSET(GET_BKEYDATA(pagep, indx)->type); + B_DSET(GET_BKEYDATA(file_dbp, pagep, indx)->type); LSN(pagep) = *lsnp; modified = 1; } else if (cmp_n == 0 && DB_UNDO(op)) { /* Need to undo update described. */ indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0); - B_DCLR(GET_BKEYDATA(pagep, indx)->type); + B_DCLR(GET_BKEYDATA(file_dbp, pagep, indx)->type); (void)__bam_ca_delete(file_dbp, argp->pgno, argp->indx, 0); LSN(pagep) = argp->lsn; modified = 1; } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) + if ((ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) goto out; + pagep = NULL; done: *lsnp = argp->prev_lsn; ret = 0; -out: REC_CLOSE; +out: if (pagep != NULL) + (void)mpf->put(mpf, pagep, 0); + REC_CLOSE; } /* @@ -936,18 +682,19 @@ __bam_repl_recover(dbenv, dbtp, lsnp, op, info) int cmp_n, cmp_p, modified, ret; u_int8_t *p; + pagep = NULL; COMPQUIET(info, NULL); REC_PRINT(__bam_repl_print); REC_INTRO(__bam_repl_read, 1); /* Get the page; if it never existed and we're undoing, we're done. */ - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { + if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) { if (DB_UNDO(op)) goto done; - (void)__db_pgerr(file_dbp, argp->pgno); + __db_pgerr(file_dbp, argp->pgno, ret); goto out; } - bk = GET_BKEYDATA(pagep, argp->indx); + bk = GET_BKEYDATA(file_dbp, pagep, argp->indx); modified = 0; cmp_n = log_compare(lsnp, &LSN(pagep)); @@ -961,8 +708,8 @@ __bam_repl_recover(dbenv, dbtp, lsnp, op, info) */ memset(&dbt, 0, sizeof(dbt)); dbt.size = argp->prefix + argp->suffix + argp->repl.size; - if ((ret = __os_malloc(dbenv, dbt.size, NULL, &dbt.data)) != 0) - goto err; + if ((ret = __os_malloc(dbenv, dbt.size, &dbt.data)) != 0) + goto out; p = dbt.data; memcpy(p, bk->data, argp->prefix); p += argp->prefix; @@ -971,9 +718,9 @@ __bam_repl_recover(dbenv, dbtp, lsnp, op, info) memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix); ret = __bam_ritem(dbc, pagep, argp->indx, &dbt); - __os_free(dbt.data, dbt.size); + __os_free(dbenv, dbt.data); if (ret != 0) - goto err; + goto out; LSN(pagep) = *lsnp; modified = 1; @@ -985,8 +732,8 @@ __bam_repl_recover(dbenv, dbtp, lsnp, op, info) */ memset(&dbt, 0, sizeof(dbt)); dbt.size = argp->prefix + argp->suffix + argp->orig.size; - if ((ret = __os_malloc(dbenv, dbt.size, NULL, &dbt.data)) != 0) - goto err; + if ((ret = __os_malloc(dbenv, dbt.size, &dbt.data)) != 0) + goto out; p = dbt.data; memcpy(p, bk->data, argp->prefix); p += argp->prefix; @@ -995,27 +742,27 @@ __bam_repl_recover(dbenv, dbtp, lsnp, op, info) memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix); ret = __bam_ritem(dbc, pagep, argp->indx, &dbt); - __os_free(dbt.data, dbt.size); + __os_free(dbenv, dbt.data); if (ret != 0) - goto err; + goto out; /* Reset the deleted flag, if necessary. */ if (argp->isdeleted) - B_DSET(GET_BKEYDATA(pagep, argp->indx)->type); + B_DSET(GET_BKEYDATA(file_dbp, pagep, argp->indx)->type); LSN(pagep) = argp->lsn; modified = 1; } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) + if ((ret = mpf->put(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) goto out; + pagep = NULL; done: *lsnp = argp->prev_lsn; ret = 0; - if (0) { -err: (void)memp_fput(mpf, pagep, 0); - } -out: REC_CLOSE; +out: if (pagep != NULL) + (void)mpf->put(mpf, pagep, 0); + REC_CLOSE; } /* @@ -1040,14 +787,15 @@ __bam_root_recover(dbenv, dbtp, lsnp, op, info) DB_MPOOLFILE *mpf; int cmp_n, cmp_p, modified, ret; + meta = NULL; COMPQUIET(info, NULL); REC_PRINT(__bam_root_print); REC_INTRO(__bam_root_read, 0); - if ((ret = memp_fget(mpf, &argp->meta_pgno, 0, &meta)) != 0) { + if ((ret = mpf->get(mpf, &argp->meta_pgno, 0, &meta)) != 0) { /* The metadata page must always exist on redo. */ if (DB_REDO(op)) { - (void)__db_pgerr(file_dbp, argp->meta_pgno); + __db_pgerr(file_dbp, argp->meta_pgno, ret); goto out; } else goto done; @@ -1068,13 +816,16 @@ __bam_root_recover(dbenv, dbtp, lsnp, op, info) meta->dbmeta.lsn = argp->meta_lsn; modified = 1; } - if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) + if ((ret = mpf->put(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) goto out; + meta = NULL; done: *lsnp = argp->prev_lsn; ret = 0; -out: REC_CLOSE; +out: if (meta != NULL) + (void)mpf->put(mpf, meta, 0); + REC_CLOSE; } /* @@ -1116,7 +867,7 @@ __bam_curadj_recover(dbenv, dbtp, lsnp, op, info) break; case DB_CA_DUP: if ((ret = __bam_ca_undodup(file_dbp, argp->first_indx, - argp->from_pgno, argp->from_indx, argp->to_indx)) != 0) + argp->from_pgno, argp->from_indx, argp->to_indx)) != 0) goto out; break; @@ -1181,7 +932,8 @@ __bam_rcuradj_recover(dbenv, dbtp, lsnp, op, info) * this function know anything about how offpage dups work. */ if ((ret = - __db_icursor(file_dbp, NULL, DB_RECNO, argp->root, 0, &rdbc)) != 0) + __db_icursor(file_dbp, + NULL, DB_RECNO, argp->root, 0, DB_LOCK_INVALIDID, &rdbc)) != 0) goto out; cp = (BTREE_CURSOR *)rdbc->internal; diff --git a/bdb/btree/bt_reclaim.c b/bdb/btree/bt_reclaim.c index 538d837c2d2..ae4554ea7d6 100644 --- a/bdb/btree/bt_reclaim.c +++ b/bdb/btree/bt_reclaim.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1998, 1999, 2000 + * Copyright (c) 1998-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_reclaim.c,v 11.5 2000/03/22 04:21:01 ubell Exp $"; +static const char revid[] = "$Id: bt_reclaim.c,v 11.11 2002/03/29 20:46:26 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -18,10 +18,8 @@ static const char revid[] = "$Id: bt_reclaim.c,v 11.5 2000/03/22 04:21:01 ubell #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "lock.h" -#include "btree.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" /* * __bam_reclaim -- @@ -51,3 +49,38 @@ __bam_reclaim(dbp, txn) return (ret); } + +/* + * __bam_truncate -- + * Truncate a database. + * + * PUBLIC: int __bam_truncate __P((DB *, DB_TXN *, u_int32_t *)); + */ +int +__bam_truncate(dbp, txn, countp) + DB *dbp; + DB_TXN *txn; + u_int32_t *countp; +{ + DBC *dbc; + db_trunc_param trunc; + int ret, t_ret; + + /* Acquire a cursor. */ + if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0) + return (ret); + + trunc.count = 0; + trunc.dbc = dbc; + /* Walk the tree, freeing pages. */ + ret = __bam_traverse(dbc, + DB_LOCK_WRITE, dbc->internal->root, __db_truncate_callback, &trunc); + + /* Discard the cursor. */ + if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + *countp = trunc.count; + + return (ret); +} diff --git a/bdb/btree/bt_recno.c b/bdb/btree/bt_recno.c index 6ac0cac350d..fab684f3a5f 100644 --- a/bdb/btree/bt_recno.c +++ b/bdb/btree/bt_recno.c @@ -1,36 +1,31 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997, 1998, 1999, 2000 + * Copyright (c) 1997-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_recno.c,v 11.65 2001/01/18 14:33:22 bostic Exp $"; +static const char revid[] = "$Id: bt_recno.c,v 11.106 2002/08/16 04:56:30 ubell Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <limits.h> +#include <stdio.h> #include <string.h> #endif #include "db_int.h" -#include "db_page.h" -#include "btree.h" -#include "db_ext.h" -#include "db_shash.h" -#include "lock.h" -#include "lock_ext.h" -#include "qam.h" -#include "txn.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/db_shash.h" +#include "dbinc/lock.h" static int __ram_add __P((DBC *, db_recno_t *, DBT *, u_int32_t, u_int32_t)); -static int __ram_delete __P((DB *, DB_TXN *, DBT *, u_int32_t)); -static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); static int __ram_source __P((DB *)); static int __ram_sread __P((DBC *, db_recno_t)); static int __ram_update __P((DBC *, db_recno_t, int)); @@ -90,17 +85,32 @@ static int __ram_update __P((DBC *, db_recno_t, int)); * Do we need to log the current cursor adjustment? */ #define CURADJ_LOG(dbc) \ - (DB_LOGGING((dbc)) && (dbc)->txn != NULL && (dbc)->txn->parent != NULL) + (DBC_LOGGING((dbc)) && (dbc)->txn != NULL && (dbc)->txn->parent != NULL) + +/* + * After a search, copy the found page into the cursor, discarding any + * currently held lock. + */ +#define STACK_TO_CURSOR(cp) { \ + (cp)->page = (cp)->csp->page; \ + (cp)->pgno = (cp)->csp->page->pgno; \ + (cp)->indx = (cp)->csp->indx; \ + (void)__TLPUT(dbc, (cp)->lock); \ + (cp)->lock = (cp)->csp->lock; \ + (cp)->lock_mode = (cp)->csp->lock_mode; \ +} /* * __ram_open -- * Recno open function. * - * PUBLIC: int __ram_open __P((DB *, const char *, db_pgno_t, u_int32_t)); + * PUBLIC: int __ram_open __P((DB *, + * PUBLIC: DB_TXN *, const char *, db_pgno_t, u_int32_t)); */ int -__ram_open(dbp, name, base_pgno, flags) +__ram_open(dbp, txn, name, base_pgno, flags) DB *dbp; + DB_TXN *txn; const char *name; db_pgno_t base_pgno; u_int32_t flags; @@ -109,15 +119,14 @@ __ram_open(dbp, name, base_pgno, flags) DBC *dbc; int ret, t_ret; + COMPQUIET(name, NULL); t = dbp->bt_internal; /* Initialize the remaining fields/methods of the DB. */ - dbp->del = __ram_delete; - dbp->put = __ram_put; dbp->stat = __bam_stat; /* Start up the tree. */ - if ((ret = __bam_read_root(dbp, name, base_pgno, flags)) != 0) + if ((ret = __bam_read_root(dbp, txn, base_pgno, flags)) != 0) return (ret); /* @@ -132,7 +141,7 @@ __ram_open(dbp, name, base_pgno, flags) return (ret); /* If we're snapshotting an underlying source file, do it now. */ - if (F_ISSET(dbp, DB_RE_SNAPSHOT)) { + if (F_ISSET(dbp, DB_AM_SNAPSHOT)) { /* Allocate a cursor. */ if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) return (ret); @@ -147,104 +156,38 @@ __ram_open(dbp, name, base_pgno, flags) ret = t_ret; } - return (0); -} - -/* - * __ram_delete -- - * Recno db->del function. - */ -static int -__ram_delete(dbp, txn, key, flags) - DB *dbp; - DB_TXN *txn; - DBT *key; - u_int32_t flags; -{ - BTREE_CURSOR *cp; - DBC *dbc; - db_recno_t recno; - int ret, t_ret; - - PANIC_CHECK(dbp->dbenv); - - /* Check for invalid flags. */ - if ((ret = __db_delchk(dbp, - key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0) - return (ret); - - /* Acquire a cursor. */ - if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0) - return (ret); - - DEBUG_LWRITE(dbc, txn, "ram_delete", key, NULL, flags); - - /* Check the user's record number and fill in as necessary. */ - if ((ret = __ram_getno(dbc, key, &recno, 0)) != 0) - goto err; - - /* Do the delete. */ - cp = (BTREE_CURSOR *)dbc->internal; - cp->recno = recno; - - ret = __ram_c_del(dbc); - - /* Release the cursor. */ -err: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; - return (ret); } /* - * __ram_put -- - * Recno db->put function. + * __ram_append -- + * Recno append function. + * + * PUBLIC: int __ram_append __P((DBC *, DBT *, DBT *)); */ -static int -__ram_put(dbp, txn, key, data, flags) - DB *dbp; - DB_TXN *txn; +int +__ram_append(dbc, key, data) + DBC *dbc; DBT *key, *data; - u_int32_t flags; { - DBC *dbc; - db_recno_t recno; - int ret, t_ret; - - PANIC_CHECK(dbp->dbenv); - - /* Check for invalid flags. */ - if ((ret = __db_putchk(dbp, - key, data, flags, F_ISSET(dbp, DB_AM_RDONLY), 0)) != 0) - return (ret); - - /* Allocate a cursor. */ - if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0) - return (ret); + BTREE_CURSOR *cp; + int ret; - DEBUG_LWRITE(dbc, txn, "ram_put", key, data, flags); + cp = (BTREE_CURSOR *)dbc->internal; /* - * If we're appending to the tree, make sure we've read in all of - * the backing source file. Otherwise, check the user's record - * number and fill in as necessary. If we found the record or it - * simply didn't exist, add the user's record. + * Make sure we've read in all of the backing source file. If + * we found the record or it simply didn't exist, add the + * user's record. */ - if (flags == DB_APPEND) - ret = __ram_update(dbc, DB_MAX_RECORDS, 0); - else - ret = __ram_getno(dbc, key, &recno, 1); + ret = __ram_update(dbc, DB_MAX_RECORDS, 0); if (ret == 0 || ret == DB_NOTFOUND) - ret = __ram_add(dbc, &recno, data, flags, 0); - - /* Discard the cursor. */ - if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; + ret = __ram_add(dbc, &cp->recno, data, DB_APPEND, 0); - /* Return the record number if we're appending to the tree. */ - if (ret == 0 && flags == DB_APPEND) - ret = __db_retcopy(dbp, key, &recno, sizeof(recno), - &dbc->rkey.data, &dbc->rkey.ulen); + /* Return the record number. */ + if (ret == 0) + ret = __db_retcopy(dbc->dbp->dbenv, key, &cp->recno, + sizeof(cp->recno), &dbc->rkey->data, &dbc->rkey->ulen); return (ret); } @@ -295,9 +238,9 @@ __ram_c_del(dbc) goto err; } stack = 1; - cp->page = cp->csp->page; - cp->pgno = cp->csp->page->pgno; - cp->indx = cp->csp->indx; + + /* Copy the page into the cursor. */ + STACK_TO_CURSOR(cp); /* * If re-numbering records, the on-page deleted flag can only mean @@ -310,7 +253,7 @@ __ram_c_del(dbc) * delete records they never created, the latter is an error because * if the record was "deleted", we could never have found it. */ - if (B_DISSET(GET_BKEYDATA(cp->page, cp->indx)->type)) { + if (B_DISSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type)) { ret = DB_KEYEMPTY; goto err; } @@ -321,9 +264,8 @@ __ram_c_del(dbc) goto err; __bam_adjust(dbc, -1); if (__ram_ca(dbc, CA_DELETE) > 0 && - CURADJ_LOG(dbc) && (ret = __bam_rcuradj_log(dbp->dbenv, - dbc->txn, &lsn, 0, dbp->log_fileid, CA_DELETE, - cp->root, cp->recno, cp->order)) != 0) + CURADJ_LOG(dbc) && (ret = __bam_rcuradj_log(dbp, dbc->txn, + &lsn, 0, CA_DELETE, cp->root, cp->recno, cp->order)) != 0) goto err; /* @@ -346,15 +288,15 @@ __ram_c_del(dbc) * going to be emptied by removing the single reference * to the emptied page (or one of its parents). */ - for (epg = cp->sp; epg <= cp->csp; ++epg) - if (NUM_ENT(epg->page) <= 1) + for (epg = cp->csp; epg >= cp->sp; --epg) + if (NUM_ENT(epg->page) > 1) break; /* * We want to delete a single item out of the last page - * that we're not deleting, back up to that page. + * that we're not deleting. */ - ret = __bam_dpages(dbc, --epg); + ret = __bam_dpages(dbc, epg); /* * Regardless of the return from __bam_dpages, it will @@ -412,6 +354,7 @@ __ram_c_get(dbc, key, data, flags, pgnop) dbp = dbc->dbp; cp = (BTREE_CURSOR *)dbc->internal; + LF_CLR(DB_MULTIPLE|DB_MULTIPLE_KEY); retry: switch (flags) { case DB_CURRENT: /* @@ -504,6 +447,7 @@ retry: switch (flags) { goto err; /* NOTREACHED */ case DB_GET_BOTH: + case DB_GET_BOTH_RANGE: /* * If we're searching a set of off-page dups, we start * a new linear search from the first record. Otherwise, @@ -531,6 +475,8 @@ retry: switch (flags) { * read from the backing source file. Do it now for DB_CURRENT (if * the current record was deleted we may need more records from the * backing file for a DB_CURRENT operation), DB_FIRST and DB_NEXT. + * (We don't have to test for flags == DB_FIRST, because the switch + * statement above re-set flags to DB_NEXT in that case.) */ if ((flags == DB_NEXT || flags == DB_CURRENT) && ((ret = __ram_update(dbc, cp->recno, 0)) != 0) && ret != DB_NOTFOUND) @@ -547,16 +493,8 @@ retry: switch (flags) { goto err; } - /* - * Copy the page into the cursor, discarding any lock we - * are currently holding. - */ - cp->page = cp->csp->page; - cp->pgno = cp->csp->page->pgno; - cp->indx = cp->csp->indx; - (void)__TLPUT(dbc, cp->lock); - cp->lock = cp->csp->lock; - cp->lock_mode = cp->csp->lock_mode; + /* Copy the page into the cursor. */ + STACK_TO_CURSOR(cp); /* * If re-numbering records, the on-page deleted flag means this @@ -567,21 +505,34 @@ retry: switch (flags) { * walking through off-page duplicates, and fail if they were * requested explicitly by the application. */ - if (B_DISSET(GET_BKEYDATA(cp->page, cp->indx)->type)) + if (B_DISSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type)) switch (flags) { case DB_NEXT: case DB_PREV: (void)__bam_stkrel(dbc, STK_CLRDBC); goto retry; case DB_GET_BOTH: - (void)__bam_stkrel(dbc, STK_CLRDBC); - continue; + case DB_GET_BOTH_RANGE: + /* + * If we're an OPD tree, we don't care about + * matching a record number on a DB_GET_BOTH + * -- everything belongs to the same tree. A + * normal recno should give up and return + * DB_NOTFOUND if the matching recno is deleted. + */ + if (F_ISSET(dbc, DBC_OPD)) { + (void)__bam_stkrel(dbc, STK_CLRDBC); + continue; + } + ret = DB_NOTFOUND; + goto err; default: ret = DB_KEYEMPTY; goto err; } - if (flags == DB_GET_BOTH || flags == DB_GET_BOTHC) { + if (flags == DB_GET_BOTH || + flags == DB_GET_BOTHC || flags == DB_GET_BOTH_RANGE) { if ((ret = __bam_cmp(dbp, data, cp->page, cp->indx, __bam_defcmp, &cmp)) != 0) return (ret); @@ -598,10 +549,11 @@ retry: switch (flags) { /* Return the key if the user didn't give us one. */ if (!F_ISSET(dbc, DBC_OPD)) { - if (flags != DB_SET && flags != DB_SET_RANGE) - ret = __db_retcopy(dbp, - key, &cp->recno, sizeof(cp->recno), - &dbc->rkey.data, &dbc->rkey.ulen); + if (flags != DB_GET_BOTH && flags != DB_GET_BOTH_RANGE && + flags != DB_SET && flags != DB_SET_RANGE) + ret = __db_retcopy(dbp->dbenv, + key, &cp->recno, sizeof(cp->recno), + &dbc->rkey->data, &dbc->rkey->ulen); F_SET(key, DB_DBT_ISSET); } @@ -637,23 +589,43 @@ __ram_c_put(dbc, key, data, flags, pgnop) cp = (BTREE_CURSOR *)dbc->internal; /* - * DB_KEYFIRST and DB_KEYLAST will only be set if we're dealing with - * an off-page duplicate tree, they can't be specified at user level. - * Translate them into something else. + * DB_KEYFIRST and DB_KEYLAST mean different things if they're + * used in an off-page duplicate tree. If we're an off-page + * duplicate tree, they really mean "put at the beginning of the + * tree" and "put at the end of the tree" respectively, so translate + * them to something else. */ - switch (flags) { - case DB_KEYFIRST: - cp->recno = 1; - flags = DB_BEFORE; - break; - case DB_KEYLAST: - if ((ret = __ram_add(dbc, &cp->recno, data, DB_APPEND, 0)) != 0) - return (ret); - if (CURADJ_LOG(dbc) && (ret = __bam_rcuradj_log(dbp->dbenv, - dbc->txn, &lsn, 0, dbp->log_fileid, CA_ICURRENT, - cp->root, cp->recno, cp->order))) - return (ret); - return (0); + if (F_ISSET(dbc, DBC_OPD)) + switch (flags) { + case DB_KEYFIRST: + cp->recno = 1; + flags = DB_BEFORE; + break; + case DB_KEYLAST: + if ((ret = __ram_add(dbc, + &cp->recno, data, DB_APPEND, 0)) != 0) + return (ret); + if (CURADJ_LOG(dbc) && + (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0, + CA_ICURRENT, cp->root, cp->recno, cp->order))) + return (ret); + return (0); + } + + /* + * Handle normal DB_KEYFIRST/DB_KEYLAST; for a recno, which has + * no duplicates, these are identical and mean "put the given + * datum at the given recno". + * + * Note that the code here used to be in __ram_put; now, we + * go through the access-method-common __db_put function, which + * handles DB_NOOVERWRITE, so we and __ram_add don't have to. + */ + if (flags == DB_KEYFIRST || flags == DB_KEYLAST) { + ret = __ram_getno(dbc, key, &cp->recno, 1); + if (ret == 0 || ret == DB_NOTFOUND) + ret = __ram_add(dbc, &cp->recno, data, 0, 0); + return (ret); } /* @@ -677,9 +649,8 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0) */ DB_ASSERT(exact || CD_ISSET(cp)); - cp->page = cp->csp->page; - cp->pgno = cp->csp->page->pgno; - cp->indx = cp->csp->indx; + /* Copy the page into the cursor. */ + STACK_TO_CURSOR(cp); ret = __bam_iitem(dbc, key, data, iiflags, 0); t_ret = __bam_stkrel(dbc, STK_CLRDBC); @@ -688,7 +659,7 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0) ret = t_ret; else if (ret == DB_NEEDSPLIT) { arg = &cp->recno; - if ((ret = __bam_split(dbc, arg)) != 0) + if ((ret = __bam_split(dbc, arg, NULL)) != 0) goto err; goto split; } @@ -709,8 +680,7 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0) /* Only log if __ram_ca found any relevant cursors. */ if (nc > 0 && CURADJ_LOG(dbc) && - (ret = __bam_rcuradj_log(dbp->dbenv, - dbc->txn, &lsn, 0, dbp->log_fileid, CA_IAFTER, + (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0, CA_IAFTER, cp->root, cp->recno, cp->order)) != 0) goto err; break; @@ -720,8 +690,7 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0) /* Only log if __ram_ca found any relevant cursors. */ if (nc > 0 && CURADJ_LOG(dbc) && - (ret = __bam_rcuradj_log(dbp->dbenv, - dbc->txn, &lsn, 0, dbp->log_fileid, CA_IBEFORE, + (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0, CA_IBEFORE, cp->root, cp->recno, cp->order)) != 0) goto err; break; @@ -734,8 +703,8 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0) * Only log if __ram_ca found any relevant cursors. */ if (CD_ISSET(cp) && __ram_ca(dbc, CA_ICURRENT) > 0 && - CURADJ_LOG(dbc) && (ret = __bam_rcuradj_log( - dbp->dbenv, dbc->txn, &lsn, 0, dbp->log_fileid, + CURADJ_LOG(dbc) && + (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0, CA_ICURRENT, cp->root, cp->recno, cp->order)) != 0) goto err; break; @@ -743,8 +712,8 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0) /* Return the key if we've created a new record. */ if (!F_ISSET(dbc, DBC_OPD) && (flags == DB_AFTER || flags == DB_BEFORE)) - ret = __db_retcopy(dbp, key, &cp->recno, - sizeof(cp->recno), &dbc->rkey.data, &dbc->rkey.ulen); + ret = __db_retcopy(dbp->dbenv, key, &cp->recno, + sizeof(cp->recno), &dbc->rkey->data, &dbc->rkey->ulen); /* The cursor was reset, no further delete adjustment is necessary. */ err: CD_CLR(cp); @@ -940,13 +909,12 @@ __ram_update(dbc, recno, can_create) int can_create; { BTREE *t; - BTREE_CURSOR *cp; DB *dbp; + DBT *rdata; db_recno_t nrecs; int ret; dbp = dbc->dbp; - cp = (BTREE_CURSOR *)dbc->internal; t = dbp->bt_internal; /* @@ -976,27 +944,13 @@ __ram_update(dbc, recno, can_create) if (!can_create || recno <= nrecs + 1) return (0); - dbc->rdata.dlen = 0; - dbc->rdata.doff = 0; - dbc->rdata.flags = 0; - if (F_ISSET(dbp, DB_RE_FIXEDLEN)) { - if (dbc->rdata.ulen < t->re_len) { - if ((ret = __os_realloc(dbp->dbenv, - t->re_len, NULL, &dbc->rdata.data)) != 0) { - dbc->rdata.ulen = 0; - dbc->rdata.data = NULL; - return (ret); - } - dbc->rdata.ulen = t->re_len; - } - dbc->rdata.size = t->re_len; - memset(dbc->rdata.data, t->re_pad, t->re_len); - } else - dbc->rdata.size = 0; + rdata = &dbc->my_rdata; + rdata->flags = 0; + rdata->size = 0; while (recno > ++nrecs) if ((ret = __ram_add(dbc, - &nrecs, &dbc->rdata, 0, BI_DELETED)) != 0) + &nrecs, rdata, 0, BI_DELETED)) != 0) return (ret); return (0); } @@ -1017,9 +971,9 @@ __ram_source(dbp) /* Find the real name, and swap out the one we had before. */ if ((ret = __db_appname(dbp->dbenv, - DB_APP_DATA, NULL, t->re_source, 0, NULL, &source)) != 0) + DB_APP_DATA, t->re_source, 0, NULL, &source)) != 0) return (ret); - __os_freestr(t->re_source); + __os_free(dbp->dbenv, t->re_source); t->re_source = source; /* @@ -1060,6 +1014,7 @@ __ram_writeback(dbp) t = dbp->bt_internal; dbenv = dbp->dbenv; fp = NULL; + pad = NULL; /* If the file wasn't modified, we're done. */ if (!t->re_modified) @@ -1119,40 +1074,45 @@ __ram_writeback(dbp) /* * We step through the records, writing each one out. Use the record * number and the dbp->get() function, instead of a cursor, so we find - * and write out "deleted" or non-existent records. + * and write out "deleted" or non-existent records. The DB handle may + * be threaded, so allocate memory as we go. */ memset(&key, 0, sizeof(key)); - memset(&data, 0, sizeof(data)); key.size = sizeof(db_recno_t); key.data = &keyno; + memset(&data, 0, sizeof(data)); + F_SET(&data, DB_DBT_REALLOC); /* * We'll need the delimiter if we're doing variable-length records, * and the pad character if we're doing fixed-length records. */ delim = t->re_delim; - if (F_ISSET(dbp, DB_RE_FIXEDLEN)) { - if ((ret = __os_malloc(dbenv, t->re_len, NULL, &pad)) != 0) + if (F_ISSET(dbp, DB_AM_FIXEDLEN)) { + if ((ret = __os_malloc(dbenv, t->re_len, &pad)) != 0) goto err; memset(pad, t->re_pad, t->re_len); - } else - COMPQUIET(pad, NULL); + } for (keyno = 1;; ++keyno) { switch (ret = dbp->get(dbp, NULL, &key, &data, 0)) { case 0: - if (fwrite(data.data, 1, data.size, fp) != data.size) + if (data.size != 0 && (u_int32_t)fwrite( + data.data, 1, data.size, fp) != data.size) goto write_err; break; case DB_KEYEMPTY: - if (F_ISSET(dbp, DB_RE_FIXEDLEN) && - fwrite(pad, 1, t->re_len, fp) != t->re_len) + if (F_ISSET(dbp, DB_AM_FIXEDLEN) && + (u_int32_t)fwrite(pad, 1, t->re_len, fp) != + t->re_len) goto write_err; break; case DB_NOTFOUND: ret = 0; goto done; + default: + goto err; } - if (!F_ISSET(dbp, DB_RE_FIXEDLEN) && + if (!F_ISSET(dbp, DB_AM_FIXEDLEN) && fwrite(&delim, 1, 1, fp) != 1) { write_err: ret = errno; __db_err(dbp->dbenv, @@ -1174,6 +1134,12 @@ done: /* Close the file descriptor. */ if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) ret = t_ret; + /* Discard memory allocated to hold the data items. */ + if (data.data != NULL) + __os_ufree(dbenv, data.data); + if (pad != NULL) + __os_free(dbenv, pad); + if (ret == 0) t->re_modified = 0; @@ -1191,7 +1157,7 @@ __ram_sread(dbc, top) { BTREE *t; DB *dbp; - DBT data; + DBT data, *rdata; db_recno_t recno; size_t len; int ch, ret, was_modified; @@ -1203,45 +1169,56 @@ __ram_sread(dbc, top) if ((ret = __bam_nrecs(dbc, &recno)) != 0) return (ret); - /* Use the record data return memory, it's only a short-term use. */ - len = F_ISSET(dbp, DB_RE_FIXEDLEN) ? t->re_len : 256; - if (dbc->rdata.ulen < len) { + /* + * Use the record key return memory, it's only a short-term use. + * The record data return memory is used by __bam_iitem, which + * we'll indirectly call, so use the key so as not to collide. + */ + len = F_ISSET(dbp, DB_AM_FIXEDLEN) ? t->re_len : 256; + rdata = &dbc->my_rkey; + if (rdata->ulen < len) { if ((ret = __os_realloc( - dbp->dbenv, len, NULL, &dbc->rdata.data)) != 0) { - dbc->rdata.ulen = 0; - dbc->rdata.data = NULL; + dbp->dbenv, len, &rdata->data)) != 0) { + rdata->ulen = 0; + rdata->data = NULL; return (ret); } - dbc->rdata.ulen = len; + rdata->ulen = (u_int32_t)len; } memset(&data, 0, sizeof(data)); while (recno < top) { - data.data = dbc->rdata.data; + data.data = rdata->data; data.size = 0; - if (F_ISSET(dbp, DB_RE_FIXEDLEN)) + if (F_ISSET(dbp, DB_AM_FIXEDLEN)) for (len = t->re_len; len > 0; --len) { - if ((ch = getc(t->re_fp)) == EOF) - goto eof; + if ((ch = getc(t->re_fp)) == EOF) { + if (data.size == 0) + goto eof; + break; + } ((u_int8_t *)data.data)[data.size++] = ch; } else for (;;) { - if ((ch = getc(t->re_fp)) == EOF) - goto eof; + if ((ch = getc(t->re_fp)) == EOF) { + if (data.size == 0) + goto eof; + break; + } if (ch == t->re_delim) break; ((u_int8_t *)data.data)[data.size++] = ch; - if (data.size == dbc->rdata.ulen) { + if (data.size == rdata->ulen) { if ((ret = __os_realloc(dbp->dbenv, - dbc->rdata.ulen *= 2, - NULL, &dbc->rdata.data)) != 0) { - dbc->rdata.ulen = 0; - dbc->rdata.data = NULL; + rdata->ulen *= 2, + &rdata->data)) != 0) { + rdata->ulen = 0; + rdata->data = NULL; return (ret); } else - data.data = dbc->rdata.data; + data.data = rdata->data; } } @@ -1281,7 +1258,6 @@ __ram_add(dbc, recnop, data, flags, bi_flags) DBT *data; u_int32_t flags, bi_flags; { - BKEYDATA *bk; BTREE_CURSOR *cp; int exact, ret, stack; @@ -1292,9 +1268,9 @@ retry: /* Find the slot for insertion. */ S_INSERT | (flags == DB_APPEND ? S_APPEND : 0), 1, &exact)) != 0) return (ret); stack = 1; - cp->page = cp->csp->page; - cp->pgno = cp->csp->page->pgno; - cp->indx = cp->csp->indx; + + /* Copy the page into the cursor. */ + STACK_TO_CURSOR(cp); /* * The application may modify the data based on the selected record @@ -1305,24 +1281,6 @@ retry: /* Find the slot for insertion. */ goto err; /* - * If re-numbering records, the on-page deleted flag means this record - * was implicitly created. If not re-numbering records, the on-page - * deleted flag means this record was implicitly created, or, it was - * deleted at some time. - * - * If DB_NOOVERWRITE is set and the item already exists in the tree, - * return an error unless the item was either marked for deletion or - * only implicitly created. - */ - if (exact) { - bk = GET_BKEYDATA(cp->page, cp->indx); - if (!B_DISSET(bk->type) && flags == DB_NOOVERWRITE) { - ret = DB_KEYEXIST; - goto err; - } - } - - /* * Select the arguments for __bam_iitem() and do the insert. If the * key is an exact match, or we're replacing the data item with a * new data item, replace the current item. If the key isn't an exact @@ -1353,7 +1311,7 @@ retry: /* Find the slot for insertion. */ (void)__bam_stkrel(dbc, STK_CLRDBC); stack = 0; - if ((ret = __bam_split(dbc, recnop)) != 0) + if ((ret = __bam_split(dbc, recnop, NULL)) != 0) goto err; goto retry; diff --git a/bdb/btree/bt_rsearch.c b/bdb/btree/bt_rsearch.c index 7102cd715aa..a75181b44e2 100644 --- a/bdb/btree/bt_rsearch.c +++ b/bdb/btree/bt_rsearch.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ /* @@ -40,7 +40,7 @@ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_rsearch.c,v 11.21 2000/03/28 21:50:04 ubell Exp $"; +static const char revid[] = "$Id: bt_rsearch.c,v 11.34 2002/07/03 19:03:50 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -48,10 +48,10 @@ static const char revid[] = "$Id: bt_rsearch.c,v 11.21 2000/03/28 21:50:04 ubell #endif #include "db_int.h" -#include "db_page.h" -#include "btree.h" -#include "db_shash.h" -#include "lock.h" +#include "dbinc/db_page.h" +#include "dbinc/btree.h" +#include "dbinc/db_shash.h" +#include "dbinc/lock.h" /* * __bam_rsearch -- @@ -70,6 +70,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp) BTREE_CURSOR *cp; DB *dbp; DB_LOCK lock; + DB_MPOOLFILE *mpf; PAGE *h; RINTERNAL *ri; db_indx_t adjust, deloffset, indx, top; @@ -79,6 +80,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp) int ret, stack; dbp = dbc->dbp; + mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; BT_STK_CLR(cp); @@ -99,11 +101,11 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp) * Retrieve the root page. */ pg = cp->root; - stack = LF_ISSET(S_STACK); + stack = LF_ISSET(S_STACK) ? 1 : 0; lock_mode = stack ? DB_LOCK_WRITE : DB_LOCK_READ; if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0) return (ret); - if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) { + if ((ret = mpf->get(mpf, &pg, 0, &h)) != 0) { /* Did not read it, so we can release the lock */ (void)__LPUT(dbc, lock); return (ret); @@ -120,12 +122,12 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp) if (!stack && ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) || (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) { - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); (void)__LPUT(dbc, lock); lock_mode = DB_LOCK_WRITE; if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0) return (ret); - if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) { + if ((ret = mpf->get(mpf, &pg, 0, &h)) != 0) { /* Did not read it, so we can release the lock */ (void)__LPUT(dbc, lock); return (ret); @@ -164,7 +166,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp) * eliminate any concurrency. A possible fix * would be to lock the last leaf page instead. */ - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); (void)__TLPUT(dbc, lock); return (DB_NOTFOUND); } @@ -202,8 +204,8 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp) goto err; } } - if (!B_DISSET( - GET_BKEYDATA(h, indx + deloffset)->type) && + if (!B_DISSET(GET_BKEYDATA(dbp, h, + indx + deloffset)->type) && ++t_recno == recno) break; } @@ -216,7 +218,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp) return (0); case P_IBTREE: for (indx = 0, top = NUM_ENT(h);;) { - bi = GET_BINTERNAL(h, indx); + bi = GET_BINTERNAL(dbp, h, indx); if (++indx == top || total + bi->nrecs >= recno) break; total += bi->nrecs; @@ -235,7 +237,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp) return (0); case P_IRECNO: for (indx = 0, top = NUM_ENT(h);;) { - ri = GET_RINTERNAL(h, indx); + ri = GET_RINTERNAL(dbp, h, indx); if (++indx == top || total + ri->nrecs >= recno) break; total += ri->nrecs; @@ -243,7 +245,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp) pg = ri->pgno; break; default: - return (__db_pgfmt(dbp, h->pgno)); + return (__db_pgfmt(dbp->dbenv, h->pgno)); } --indx; @@ -276,12 +278,12 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp) (h->level - 1) == LEAFLEVEL) stack = 1; - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); lock_mode = stack && LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ; if ((ret = __db_lget(dbc, - LCK_COUPLE, pg, lock_mode, 0, &lock)) != 0) { + LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) { /* * If we fail, discard the lock we held. This * is OK because this only happens when we are @@ -292,7 +294,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp) } } - if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) + if ((ret = mpf->get(mpf, &pg, 0, &h)) != 0) goto err; } /* NOTREACHED */ @@ -315,12 +317,14 @@ __bam_adjust(dbc, adjust) { BTREE_CURSOR *cp; DB *dbp; + DB_MPOOLFILE *mpf; EPG *epg; PAGE *h; db_pgno_t root_pgno; int ret; dbp = dbc->dbp; + mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; root_pgno = cp->root; @@ -328,22 +332,27 @@ __bam_adjust(dbc, adjust) for (epg = cp->sp; epg <= cp->csp; ++epg) { h = epg->page; if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) { - if (DB_LOGGING(dbc) && - (ret = __bam_cadjust_log(dbp->dbenv, - dbc->txn, &LSN(h), 0, dbp->log_fileid, - PGNO(h), &LSN(h), (u_int32_t)epg->indx, adjust, - PGNO(h) == root_pgno ? CAD_UPDATEROOT : 0)) != 0) - return (ret); + if (DBC_LOGGING(dbc)) { + if ((ret = __bam_cadjust_log(dbp, dbc->txn, + &LSN(h), 0, PGNO(h), &LSN(h), + (u_int32_t)epg->indx, adjust, + PGNO(h) == root_pgno ? + CAD_UPDATEROOT : 0)) != 0) + return (ret); + } else + LSN_NOT_LOGGED(LSN(h)); if (TYPE(h) == P_IBTREE) - GET_BINTERNAL(h, epg->indx)->nrecs += adjust; + GET_BINTERNAL(dbp, h, epg->indx)->nrecs += + adjust; else - GET_RINTERNAL(h, epg->indx)->nrecs += adjust; + GET_RINTERNAL(dbp, h, epg->indx)->nrecs += + adjust; if (PGNO(h) == root_pgno) RE_NREC_ADJ(h, adjust); - if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0) + if ((ret = mpf->set(mpf, h, DB_MPOOL_DIRTY)) != 0) return (ret); } } @@ -363,21 +372,23 @@ __bam_nrecs(dbc, rep) { DB *dbp; DB_LOCK lock; + DB_MPOOLFILE *mpf; PAGE *h; db_pgno_t pgno; int ret; dbp = dbc->dbp; + mpf = dbp->mpf; pgno = dbc->internal->root; if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &lock)) != 0) return (ret); - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) + if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0) return (ret); *rep = RE_NREC(h); - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); (void)__TLPUT(dbc, lock); return (0); @@ -387,10 +398,11 @@ __bam_nrecs(dbc, rep) * __bam_total -- * Return the number of records below a page. * - * PUBLIC: db_recno_t __bam_total __P((PAGE *)); + * PUBLIC: db_recno_t __bam_total __P((DB *, PAGE *)); */ db_recno_t -__bam_total(h) +__bam_total(dbp, h) + DB *dbp; PAGE *h; { db_recno_t nrecs; @@ -403,25 +415,26 @@ __bam_total(h) case P_LBTREE: /* Check for logically deleted records. */ for (indx = 0; indx < top; indx += P_INDX) - if (!B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type)) + if (!B_DISSET( + GET_BKEYDATA(dbp, h, indx + O_INDX)->type)) ++nrecs; break; case P_LDUP: /* Check for logically deleted records. */ for (indx = 0; indx < top; indx += O_INDX) - if (!B_DISSET(GET_BKEYDATA(h, indx)->type)) + if (!B_DISSET(GET_BKEYDATA(dbp, h, indx)->type)) ++nrecs; break; case P_IBTREE: for (indx = 0; indx < top; indx += O_INDX) - nrecs += GET_BINTERNAL(h, indx)->nrecs; + nrecs += GET_BINTERNAL(dbp, h, indx)->nrecs; break; case P_LRECNO: nrecs = NUM_ENT(h); break; case P_IRECNO: for (indx = 0; indx < top; indx += O_INDX) - nrecs += GET_RINTERNAL(h, indx)->nrecs; + nrecs += GET_RINTERNAL(dbp, h, indx)->nrecs; break; } diff --git a/bdb/btree/bt_search.c b/bdb/btree/bt_search.c index d822198f243..92b2106311d 100644 --- a/bdb/btree/bt_search.c +++ b/bdb/btree/bt_search.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ /* @@ -43,7 +43,7 @@ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_search.c,v 11.32 2001/01/17 20:19:46 bostic Exp $"; +static const char revid[] = "$Id: bt_search.c,v 11.43 2002/07/03 19:03:50 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -53,21 +53,22 @@ static const char revid[] = "$Id: bt_search.c,v 11.32 2001/01/17 20:19:46 bostic #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "btree.h" -#include "lock.h" +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" /* * __bam_search -- * Search a btree for a key. * - * PUBLIC: int __bam_search __P((DBC *, + * PUBLIC: int __bam_search __P((DBC *, db_pgno_t, * PUBLIC: const DBT *, u_int32_t, int, db_recno_t *, int *)); */ int -__bam_search(dbc, key, flags, stop, recnop, exactp) +__bam_search(dbc, root_pgno, key, flags, stop, recnop, exactp) DBC *dbc; + db_pgno_t root_pgno; const DBT *key; u_int32_t flags; int stop, *exactp; @@ -77,8 +78,9 @@ __bam_search(dbc, key, flags, stop, recnop, exactp) BTREE_CURSOR *cp; DB *dbp; DB_LOCK lock; + DB_MPOOLFILE *mpf; PAGE *h; - db_indx_t base, i, indx, lim; + db_indx_t base, i, indx, *inp, lim; db_lockmode_t lock_mode; db_pgno_t pg; db_recno_t recno; @@ -86,6 +88,7 @@ __bam_search(dbc, key, flags, stop, recnop, exactp) int (*func) __P((DB *, const DBT *, const DBT *)); dbp = dbc->dbp; + mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; t = dbp->bt_internal; recno = 0; @@ -109,12 +112,12 @@ __bam_search(dbc, key, flags, stop, recnop, exactp) * Retrieve the root page. */ try_again: - pg = cp->root; + pg = root_pgno == PGNO_INVALID ? cp->root : root_pgno; stack = LF_ISSET(S_STACK) && F_ISSET(cp, C_RECNUM); lock_mode = stack ? DB_LOCK_WRITE : DB_LOCK_READ; if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0) return (ret); - if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) { + if ((ret = mpf->get(mpf, &pg, 0, &h)) != 0) { /* Did not read it, so we can release the lock */ (void)__LPUT(dbc, lock); return (ret); @@ -131,21 +134,21 @@ try_again: if (!stack && ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) || (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) { - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); (void)__LPUT(dbc, lock); lock_mode = DB_LOCK_WRITE; if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0) return (ret); - if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) { + if ((ret = mpf->get(mpf, &pg, 0, &h)) != 0) { /* Did not read it, so we can release the lock */ (void)__LPUT(dbc, lock); return (ret); } - if (!((LF_ISSET(S_PARENT) - && (u_int8_t)(stop + 1) >= h->level) || + if (!((LF_ISSET(S_PARENT) && + (u_int8_t)(stop + 1) >= h->level) || (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) { /* Someone else split the root, start over. */ - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); (void)__LPUT(dbc, lock); goto try_again; } @@ -158,6 +161,7 @@ try_again: t->bt_compare; for (;;) { + inp = P_INP(dbp, h); /* * Do a binary search on the current page. If we're searching * a Btree leaf page, we have to walk the indices in groups of @@ -199,7 +203,7 @@ try_again: if (LF_ISSET(S_STK_ONLY)) { BT_STK_NUM(dbp->dbenv, cp, h, base, ret); __LPUT(dbc, lock); - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); return (ret); } @@ -232,21 +236,21 @@ try_again: */ next: if (recnop != NULL) for (i = 0; i < indx; ++i) - recno += GET_BINTERNAL(h, i)->nrecs; + recno += GET_BINTERNAL(dbp, h, i)->nrecs; - pg = GET_BINTERNAL(h, indx)->pgno; + pg = GET_BINTERNAL(dbp, h, indx)->pgno; if (LF_ISSET(S_STK_ONLY)) { if (stop == h->level) { BT_STK_NUM(dbp->dbenv, cp, h, indx, ret); __LPUT(dbc, lock); - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); return (ret); } BT_STK_NUMPUSH(dbp->dbenv, cp, h, indx, ret); - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); if ((ret = __db_lget(dbc, - LCK_COUPLE, pg, lock_mode, 0, &lock)) != 0) { + LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) { /* * Discard our lock and return on failure. This * is OK because it only happens when descending @@ -284,12 +288,12 @@ next: if (recnop != NULL) (h->level - 1) == LEAFLEVEL) stack = 1; - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); lock_mode = stack && LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ; if ((ret = __db_lget(dbc, - LCK_COUPLE, pg, lock_mode, 0, &lock)) != 0) { + LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) { /* * If we fail, discard the lock we held. This * is OK because this only happens when we are @@ -299,7 +303,7 @@ next: if (recnop != NULL) goto err; } } - if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) + if ((ret = mpf->get(mpf, &pg, 0, &h)) != 0) goto err; } /* NOTREACHED */ @@ -327,11 +331,11 @@ found: *exactp = 1; if (TYPE(h) == P_LBTREE) { if (LF_ISSET(S_DUPLAST)) while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) && - h->inp[indx] == h->inp[indx + P_INDX]) + inp[indx] == inp[indx + P_INDX]) indx += P_INDX; else while (indx > 0 && - h->inp[indx] == h->inp[indx - P_INDX]) + inp[indx] == inp[indx - P_INDX]) indx -= P_INDX; } @@ -344,29 +348,29 @@ found: *exactp = 1; if (LF_ISSET(S_DELNO)) { deloffset = TYPE(h) == P_LBTREE ? O_INDX : 0; if (LF_ISSET(S_DUPLAST)) - while (B_DISSET(GET_BKEYDATA( + while (B_DISSET(GET_BKEYDATA(dbp, h, indx + deloffset)->type) && indx > 0 && - h->inp[indx] == h->inp[indx - adjust]) + inp[indx] == inp[indx - adjust]) indx -= adjust; else - while (B_DISSET(GET_BKEYDATA( + while (B_DISSET(GET_BKEYDATA(dbp, h, indx + deloffset)->type) && indx < (db_indx_t)(NUM_ENT(h) - adjust) && - h->inp[indx] == h->inp[indx + adjust]) + inp[indx] == inp[indx + adjust]) indx += adjust; /* * If we weren't able to find a non-deleted duplicate, return * DB_NOTFOUND. */ - if (B_DISSET(GET_BKEYDATA(h, indx + deloffset)->type)) + if (B_DISSET(GET_BKEYDATA(dbp, h, indx + deloffset)->type)) goto notfound; } if (LF_ISSET(S_STK_ONLY)) { BT_STK_NUM(dbp->dbenv, cp, h, indx, ret); __LPUT(dbc, lock); - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); } else { BT_STK_ENTER(dbp->dbenv, cp, h, indx, lock, lock_mode, ret); if (ret != 0) @@ -376,7 +380,7 @@ found: *exactp = 1; notfound: /* Keep the page locked for serializability. */ - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); (void)__TLPUT(dbc, lock); ret = DB_NOTFOUND; @@ -398,10 +402,12 @@ __bam_stkrel(dbc, flags) { BTREE_CURSOR *cp; DB *dbp; + DB_MPOOLFILE *mpf; EPG *epg; int ret, t_ret; dbp = dbc->dbp; + mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; /* @@ -414,10 +420,10 @@ __bam_stkrel(dbc, flags) if (epg->page != NULL) { if (LF_ISSET(STK_CLRDBC) && cp->page == epg->page) { cp->page = NULL; - cp->lock.off = LOCK_INVALID; + LOCK_INIT(cp->lock); } - if ((t_ret = memp_fput( - dbp->mpf, epg->page, 0)) != 0 && ret == 0) + if ((t_ret = + mpf->put(mpf, epg->page, 0)) != 0 && ret == 0) ret = t_ret; /* * XXX @@ -428,12 +434,10 @@ __bam_stkrel(dbc, flags) */ epg->page = NULL; } - if (epg->lock.off != LOCK_INVALID) { - if (LF_ISSET(STK_NOLOCK)) - (void)__LPUT(dbc, epg->lock); - else - (void)__TLPUT(dbc, epg->lock); - } + if (LF_ISSET(STK_NOLOCK)) + (void)__LPUT(dbc, epg->lock); + else + (void)__TLPUT(dbc, epg->lock); } /* Clear the stack, all pages have been released. */ @@ -463,7 +467,7 @@ __bam_stkgrow(dbenv, cp) return (ret); memcpy(p, cp->sp, entries * sizeof(EPG)); if (cp->sp != cp->stack) - __os_free(cp->sp, entries * sizeof(EPG)); + __os_free(dbenv, cp->sp); cp->sp = p; cp->csp = p + entries; cp->esp = p + entries * 2; diff --git a/bdb/btree/bt_split.c b/bdb/btree/bt_split.c index f76337b1944..f3302a6905f 100644 --- a/bdb/btree/bt_split.c +++ b/bdb/btree/bt_split.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ /* @@ -40,7 +40,7 @@ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_split.c,v 11.31 2000/12/22 19:08:27 bostic Exp $"; +static const char revid[] = "$Id: bt_split.c,v 11.58 2002/07/03 19:03:50 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -51,10 +51,10 @@ static const char revid[] = "$Id: bt_split.c,v 11.31 2000/12/22 19:08:27 bostic #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "lock.h" -#include "btree.h" +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/lock.h" +#include "dbinc/btree.h" static int __bam_broot __P((DBC *, PAGE *, PAGE *, PAGE *)); static int __bam_page __P((DBC *, EPG *, EPG *)); @@ -67,21 +67,19 @@ static int __ram_root __P((DBC *, PAGE *, PAGE *, PAGE *)); * __bam_split -- * Split a page. * - * PUBLIC: int __bam_split __P((DBC *, void *)); + * PUBLIC: int __bam_split __P((DBC *, void *, db_pgno_t *)); */ int -__bam_split(dbc, arg) +__bam_split(dbc, arg, root_pgnop) DBC *dbc; void *arg; + db_pgno_t *root_pgnop; { - BTREE *t; BTREE_CURSOR *cp; - DB *dbp; enum { UP, DOWN } dir; db_pgno_t root_pgno; int exact, level, ret; - dbp = dbc->dbp; cp = (BTREE_CURSOR *)dbc->internal; root_pgno = cp->root; @@ -112,17 +110,20 @@ __bam_split(dbc, arg) * split. This would be an easy change for this code, but I have no * numbers that indicate it's worthwhile. */ - t = dbp->bt_internal; for (dir = UP, level = LEAFLEVEL;; dir == UP ? ++level : --level) { /* * Acquire a page and its parent, locked. */ if ((ret = (dbc->dbtype == DB_BTREE ? - __bam_search(dbc, arg, S_WRPAIR, level, NULL, &exact) : + __bam_search(dbc, PGNO_INVALID, + arg, S_WRPAIR, level, NULL, &exact) : __bam_rsearch(dbc, (db_recno_t *)arg, S_WRPAIR, level, &exact))) != 0) return (ret); + if (root_pgnop != NULL) + *root_pgnop = cp->csp[0].page->pgno == root_pgno ? + root_pgno : cp->csp[-1].page->pgno; /* * Split the page if it still needs it (it's possible another * thread of control has already split the page). If we are @@ -130,7 +131,7 @@ __bam_split(dbc, arg) * is no longer necessary. */ if (2 * B_MAXSIZEONPAGE(cp->ovflsize) - <= (db_indx_t)P_FREESPACE(cp->csp[0].page)) { + <= (db_indx_t)P_FREESPACE(dbc->dbp, cp->csp[0].page)) { __bam_stkrel(dbc, STK_NOLOCK); return (0); } @@ -178,12 +179,14 @@ __bam_root(dbc, cp) DB *dbp; DBT log_dbt; DB_LSN log_lsn; + DB_MPOOLFILE *mpf; PAGE *lp, *rp; db_indx_t split; u_int32_t opflags; int ret; dbp = dbc->dbp; + mpf = dbp->mpf; /* Yeah, right. */ if (cp->page->level >= MAXBTREELEVEL) { @@ -210,21 +213,22 @@ __bam_root(dbc, cp) goto err; /* Log the change. */ - if (DB_LOGGING(dbc)) { + if (DBC_LOGGING(dbc)) { memset(&log_dbt, 0, sizeof(log_dbt)); log_dbt.data = cp->page; log_dbt.size = dbp->pgsize; ZERO_LSN(log_lsn); opflags = F_ISSET( (BTREE_CURSOR *)dbc->internal, C_RECNUM) ? SPL_NRECS : 0; - if ((ret = __bam_split_log(dbp->dbenv, dbc->txn, - &LSN(cp->page), 0, dbp->log_fileid, PGNO(lp), &LSN(lp), - PGNO(rp), &LSN(rp), (u_int32_t)NUM_ENT(lp), 0, &log_lsn, + if ((ret = __bam_split_log(dbp, + dbc->txn, &LSN(cp->page), 0, PGNO(lp), &LSN(lp), PGNO(rp), + &LSN(rp), (u_int32_t)NUM_ENT(lp), 0, &log_lsn, dbc->internal->root, &log_dbt, opflags)) != 0) goto err; - LSN(lp) = LSN(cp->page); - LSN(rp) = LSN(cp->page); - } + } else + LSN_NOT_LOGGED(LSN(cp->page)); + LSN(lp) = LSN(cp->page); + LSN(rp) = LSN(cp->page); /* Clean up the new root page. */ if ((ret = (dbc->dbtype == DB_RECNO ? @@ -238,18 +242,18 @@ __bam_root(dbc, cp) goto err; /* Success -- write the real pages back to the store. */ - (void)memp_fput(dbp->mpf, cp->page, DB_MPOOL_DIRTY); + (void)mpf->put(mpf, cp->page, DB_MPOOL_DIRTY); (void)__TLPUT(dbc, cp->lock); - (void)memp_fput(dbp->mpf, lp, DB_MPOOL_DIRTY); - (void)memp_fput(dbp->mpf, rp, DB_MPOOL_DIRTY); + (void)mpf->put(mpf, lp, DB_MPOOL_DIRTY); + (void)mpf->put(mpf, rp, DB_MPOOL_DIRTY); return (0); err: if (lp != NULL) - (void)__db_free(dbc, lp); + (void)mpf->put(mpf, lp, 0); if (rp != NULL) - (void)__db_free(dbc, rp); - (void)memp_fput(dbp->mpf, cp->page, 0); + (void)mpf->put(mpf, rp, 0); + (void)mpf->put(mpf, cp->page, 0); (void)__TLPUT(dbc, cp->lock); return (ret); } @@ -267,7 +271,8 @@ __bam_page(dbc, pp, cp) DBT log_dbt; DB_LSN log_lsn; DB *dbp; - DB_LOCK tplock; + DB_LOCK rplock, tplock; + DB_MPOOLFILE *mpf; DB_LSN save_lsn; PAGE *lp, *rp, *alloc_rp, *tp; db_indx_t split; @@ -275,8 +280,10 @@ __bam_page(dbc, pp, cp) int ret, t_ret; dbp = dbc->dbp; + mpf = dbp->mpf; alloc_rp = lp = rp = tp = NULL; - tplock.off = LOCK_INVALID; + LOCK_INIT(rplock); + LOCK_INIT(tplock); ret = -1; /* @@ -296,7 +303,7 @@ __bam_page(dbc, pp, cp) * up the tree badly, because we've violated the rule of always locking * down the tree, and never up. */ - if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &rp)) != 0) + if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &rp)) != 0) goto err; P_INIT(rp, dbp->pgsize, 0, ISINTERNAL(cp->page) ? PGNO_INVALID : PGNO(cp->page), @@ -307,7 +314,7 @@ __bam_page(dbc, pp, cp) * Create new left page for the split, and fill in everything * except its LSN and next-page page number. */ - if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &lp)) != 0) + if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &lp)) != 0) goto err; P_INIT(lp, dbp->pgsize, PGNO(cp->page), ISINTERNAL(cp->page) ? PGNO_INVALID : PREV_PGNO(cp->page), @@ -351,8 +358,7 @@ __bam_page(dbc, pp, cp) if ((ret = __db_lget(dbc, 0, NEXT_PGNO(cp->page), DB_LOCK_WRITE, 0, &tplock)) != 0) goto err; - if ((ret = - memp_fget(dbp->mpf, &NEXT_PGNO(cp->page), 0, &tp)) != 0) + if ((ret = mpf->get(mpf, &NEXT_PGNO(cp->page), 0, &tp)) != 0) goto err; } @@ -364,6 +370,15 @@ __bam_page(dbc, pp, cp) goto err; /* + * Lock the new page. We need to do this because someone + * could get here through bt_lpgno if this page was recently + * dealocated. They can't look at it before we commit. + */ + if ((ret = __db_lget(dbc, + 0, PGNO(alloc_rp), DB_LOCK_WRITE, 0, &rplock)) != 0) + goto err; + + /* * Fix up the page numbers we didn't have before. We have to do this * before calling __bam_pinsert because it may copy a page number onto * the parent page and it takes the page number from its page argument. @@ -376,29 +391,30 @@ __bam_page(dbc, pp, cp) bc = (BTREE_CURSOR *)dbc->internal; /* Log the change. */ - if (DB_LOGGING(dbc)) { + if (DBC_LOGGING(dbc)) { memset(&log_dbt, 0, sizeof(log_dbt)); log_dbt.data = cp->page; log_dbt.size = dbp->pgsize; if (tp == NULL) ZERO_LSN(log_lsn); opflags = F_ISSET(bc, C_RECNUM) ? SPL_NRECS : 0; - if ((ret = __bam_split_log(dbp->dbenv, dbc->txn, - &LSN(cp->page), 0, dbp->log_fileid, PGNO(cp->page), - &LSN(cp->page), PGNO(alloc_rp), &LSN(alloc_rp), - (u_int32_t)NUM_ENT(lp), + if ((ret = __bam_split_log(dbp, dbc->txn, &LSN(cp->page), 0, + PGNO(cp->page), &LSN(cp->page), PGNO(alloc_rp), + &LSN(alloc_rp), (u_int32_t)NUM_ENT(lp), tp == NULL ? 0 : PGNO(tp), tp == NULL ? &log_lsn : &LSN(tp), - bc->root, &log_dbt, opflags)) != 0) + PGNO_INVALID, &log_dbt, opflags)) != 0) goto err; - /* Update the LSNs for all involved pages. */ - LSN(alloc_rp) = LSN(cp->page); - LSN(lp) = LSN(cp->page); - LSN(rp) = LSN(cp->page); - if (tp != NULL) - LSN(tp) = LSN(cp->page); - } + } else + LSN_NOT_LOGGED(LSN(cp->page)); + + /* Update the LSNs for all involved pages. */ + LSN(alloc_rp) = LSN(cp->page); + LSN(lp) = LSN(cp->page); + LSN(rp) = LSN(cp->page); + if (tp != NULL) + LSN(tp) = LSN(cp->page); /* * Copy the left and right pages into place. There are two paths @@ -411,13 +427,13 @@ __bam_page(dbc, pp, cp) * do the copy. */ save_lsn = alloc_rp->lsn; - memcpy(alloc_rp, rp, LOFFSET(rp)); + memcpy(alloc_rp, rp, LOFFSET(dbp, rp)); memcpy((u_int8_t *)alloc_rp + HOFFSET(rp), (u_int8_t *)rp + HOFFSET(rp), dbp->pgsize - HOFFSET(rp)); alloc_rp->lsn = save_lsn; save_lsn = cp->page->lsn; - memcpy(cp->page, lp, LOFFSET(lp)); + memcpy(cp->page, lp, LOFFSET(dbp, lp)); memcpy((u_int8_t *)cp->page + HOFFSET(lp), (u_int8_t *)lp + HOFFSET(lp), dbp->pgsize - HOFFSET(lp)); cp->page->lsn = save_lsn; @@ -431,8 +447,8 @@ __bam_page(dbc, pp, cp) PGNO(cp->page), PGNO(cp->page), PGNO(rp), split, 0)) != 0) goto err; - __os_free(lp, dbp->pgsize); - __os_free(rp, dbp->pgsize); + __os_free(dbp->dbenv, lp); + __os_free(dbp->dbenv, rp); /* * Success -- write the real pages back to the store. As we never @@ -440,45 +456,43 @@ __bam_page(dbc, pp, cp) * releasing locks on the pages that reference it. We're finished * modifying the page so it's not really necessary, but it's neater. */ - if ((t_ret = - memp_fput(dbp->mpf, alloc_rp, DB_MPOOL_DIRTY)) != 0 && ret == 0) + if ((t_ret = mpf->put(mpf, alloc_rp, DB_MPOOL_DIRTY)) != 0 && ret == 0) ret = t_ret; - if ((t_ret = - memp_fput(dbp->mpf, pp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0) + (void)__TLPUT(dbc, rplock); + if ((t_ret = mpf->put(mpf, pp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0) ret = t_ret; (void)__TLPUT(dbc, pp->lock); - if ((t_ret = - memp_fput(dbp->mpf, cp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0) + if ((t_ret = mpf->put(mpf, cp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0) ret = t_ret; (void)__TLPUT(dbc, cp->lock); if (tp != NULL) { if ((t_ret = - memp_fput(dbp->mpf, tp, DB_MPOOL_DIRTY)) != 0 && ret == 0) + mpf->put(mpf, tp, DB_MPOOL_DIRTY)) != 0 && ret == 0) ret = t_ret; (void)__TLPUT(dbc, tplock); } return (ret); err: if (lp != NULL) - __os_free(lp, dbp->pgsize); + __os_free(dbp->dbenv, lp); if (rp != NULL) - __os_free(rp, dbp->pgsize); + __os_free(dbp->dbenv, rp); if (alloc_rp != NULL) - (void)__db_free(dbc, alloc_rp); - + (void)mpf->put(mpf, alloc_rp, 0); if (tp != NULL) - (void)memp_fput(dbp->mpf, tp, 0); - if (tplock.off != LOCK_INVALID) - /* We never updated the next page, we can release it. */ - (void)__LPUT(dbc, tplock); + (void)mpf->put(mpf, tp, 0); + + /* We never updated the new or next pages, we can release them. */ + (void)__LPUT(dbc, rplock); + (void)__LPUT(dbc, tplock); - (void)memp_fput(dbp->mpf, pp->page, 0); + (void)mpf->put(mpf, pp->page, 0); if (ret == DB_NEEDSPLIT) (void)__LPUT(dbc, pp->lock); else (void)__TLPUT(dbc, pp->lock); - (void)memp_fput(dbp->mpf, cp->page, 0); + (void)mpf->put(mpf, cp->page, 0); if (ret == DB_NEEDSPLIT) (void)__LPUT(dbc, cp->lock); else @@ -529,7 +543,7 @@ __bam_broot(dbc, rootp, lp, rp) B_TSET(bi.type, B_KEYDATA, 0); bi.pgno = lp->pgno; if (F_ISSET(cp, C_RECNUM)) { - bi.nrecs = __bam_total(lp); + bi.nrecs = __bam_total(dbp, lp); RE_NREC_SET(rootp, bi.nrecs); } hdr.data = &bi; @@ -541,13 +555,13 @@ __bam_broot(dbc, rootp, lp, rp) switch (TYPE(rp)) { case P_IBTREE: /* Copy the first key of the child page onto the root page. */ - child_bi = GET_BINTERNAL(rp, 0); + child_bi = GET_BINTERNAL(dbp, rp, 0); bi.len = child_bi->len; B_TSET(bi.type, child_bi->type, 0); bi.pgno = rp->pgno; if (F_ISSET(cp, C_RECNUM)) { - bi.nrecs = __bam_total(rp); + bi.nrecs = __bam_total(dbp, rp); RE_NREC_ADJ(rootp, bi.nrecs); } hdr.data = &bi; @@ -567,14 +581,14 @@ __bam_broot(dbc, rootp, lp, rp) case P_LDUP: case P_LBTREE: /* Copy the first key of the child page onto the root page. */ - child_bk = GET_BKEYDATA(rp, 0); + child_bk = GET_BKEYDATA(dbp, rp, 0); switch (B_TYPE(child_bk->type)) { case B_KEYDATA: bi.len = child_bk->len; B_TSET(bi.type, child_bk->type, 0); bi.pgno = rp->pgno; if (F_ISSET(cp, C_RECNUM)) { - bi.nrecs = __bam_total(rp); + bi.nrecs = __bam_total(dbp, rp); RE_NREC_ADJ(rootp, bi.nrecs); } hdr.data = &bi; @@ -591,7 +605,7 @@ __bam_broot(dbc, rootp, lp, rp) B_TSET(bi.type, child_bk->type, 0); bi.pgno = rp->pgno; if (F_ISSET(cp, C_RECNUM)) { - bi.nrecs = __bam_total(rp); + bi.nrecs = __bam_total(dbp, rp); RE_NREC_ADJ(rootp, bi.nrecs); } hdr.data = &bi; @@ -609,11 +623,11 @@ __bam_broot(dbc, rootp, lp, rp) return (ret); break; default: - return (__db_pgfmt(dbp, rp->pgno)); + return (__db_pgfmt(dbp->dbenv, rp->pgno)); } break; default: - return (__db_pgfmt(dbp, rp->pgno)); + return (__db_pgfmt(dbp->dbenv, rp->pgno)); } return (0); } @@ -647,12 +661,12 @@ __ram_root(dbc, rootp, lp, rp) /* Insert the left and right keys, set the header information. */ ri.pgno = lp->pgno; - ri.nrecs = __bam_total(lp); + ri.nrecs = __bam_total(dbp, lp); if ((ret = __db_pitem(dbc, rootp, 0, RINTERNAL_SIZE, &hdr, NULL)) != 0) return (ret); RE_NREC_SET(rootp, ri.nrecs); ri.pgno = rp->pgno; - ri.nrecs = __bam_total(rp); + ri.nrecs = __bam_total(dbp, rp); if ((ret = __db_pitem(dbc, rootp, 1, RINTERNAL_SIZE, &hdr, NULL)) != 0) return (ret); RE_NREC_ADJ(rootp, ri.nrecs); @@ -690,7 +704,8 @@ __bam_pinsert(dbc, parent, lchild, rchild, space_check) ppage = parent->page; /* If handling record numbers, count records split to the right page. */ - nrecs = F_ISSET(cp, C_RECNUM) && !space_check ? __bam_total(rchild) : 0; + nrecs = F_ISSET(cp, C_RECNUM) && + !space_check ? __bam_total(dbp, rchild) : 0; /* * Now we insert the new page's first key into the parent page, which @@ -721,10 +736,10 @@ __bam_pinsert(dbc, parent, lchild, rchild, space_check) */ switch (TYPE(rchild)) { case P_IBTREE: - child_bi = GET_BINTERNAL(rchild, 0); + child_bi = GET_BINTERNAL(dbp, rchild, 0); nbytes = BINTERNAL_PSIZE(child_bi->len); - if (P_FREESPACE(ppage) < nbytes) + if (P_FREESPACE(dbp, ppage) < nbytes) return (DB_NEEDSPLIT); if (space_check) return (0); @@ -753,7 +768,7 @@ __bam_pinsert(dbc, parent, lchild, rchild, space_check) break; case P_LDUP: case P_LBTREE: - child_bk = GET_BKEYDATA(rchild, 0); + child_bk = GET_BKEYDATA(dbp, rchild, 0); switch (B_TYPE(child_bk->type)) { case B_KEYDATA: /* @@ -783,7 +798,7 @@ __bam_pinsert(dbc, parent, lchild, rchild, space_check) goto noprefix; if (ppage->prev_pgno == PGNO_INVALID && off <= 1) goto noprefix; - tmp_bk = GET_BKEYDATA(lchild, NUM_ENT(lchild) - + tmp_bk = GET_BKEYDATA(dbp, lchild, NUM_ENT(lchild) - (TYPE(lchild) == P_LDUP ? O_INDX : P_INDX)); if (B_TYPE(tmp_bk->type) != B_KEYDATA) goto noprefix; @@ -793,13 +808,13 @@ __bam_pinsert(dbc, parent, lchild, rchild, space_check) memset(&b, 0, sizeof(b)); b.size = child_bk->len; b.data = child_bk->data; - nksize = func(dbp, &a, &b); + nksize = (u_int32_t)func(dbp, &a, &b); if ((n = BINTERNAL_PSIZE(nksize)) < nbytes) nbytes = n; else noprefix: nksize = child_bk->len; - if (P_FREESPACE(ppage) < nbytes) + if (P_FREESPACE(dbp, ppage) < nbytes) return (DB_NEEDSPLIT); if (space_check) return (0); @@ -823,7 +838,7 @@ noprefix: nksize = child_bk->len; case B_OVERFLOW: nbytes = BINTERNAL_PSIZE(BOVERFLOW_SIZE); - if (P_FREESPACE(ppage) < nbytes) + if (P_FREESPACE(dbp, ppage) < nbytes) return (DB_NEEDSPLIT); if (space_check) return (0); @@ -850,14 +865,14 @@ noprefix: nksize = child_bk->len; return (ret); break; default: - return (__db_pgfmt(dbp, rchild->pgno)); + return (__db_pgfmt(dbp->dbenv, rchild->pgno)); } break; case P_IRECNO: case P_LRECNO: nbytes = RINTERNAL_PSIZE; - if (P_FREESPACE(ppage) < nbytes) + if (P_FREESPACE(dbp, ppage) < nbytes) return (DB_NEEDSPLIT); if (space_check) return (0); @@ -873,7 +888,7 @@ noprefix: nksize = child_bk->len; return (ret); break; default: - return (__db_pgfmt(dbp, rchild->pgno)); + return (__db_pgfmt(dbp->dbenv, rchild->pgno)); } /* @@ -882,17 +897,19 @@ noprefix: nksize = child_bk->len; */ if (F_ISSET(cp, C_RECNUM)) { /* Log the change. */ - if (DB_LOGGING(dbc) && - (ret = __bam_cadjust_log(dbp->dbenv, dbc->txn, - &LSN(ppage), 0, dbp->log_fileid, PGNO(ppage), + if (DBC_LOGGING(dbc)) { + if ((ret = __bam_cadjust_log(dbp, dbc->txn, + &LSN(ppage), 0, PGNO(ppage), &LSN(ppage), parent->indx, -(int32_t)nrecs, 0)) != 0) return (ret); + } else + LSN_NOT_LOGGED(LSN(ppage)); /* Update the left page count. */ if (dbc->dbtype == DB_RECNO) - GET_RINTERNAL(ppage, parent->indx)->nrecs -= nrecs; + GET_RINTERNAL(dbp, ppage, parent->indx)->nrecs -= nrecs; else - GET_BINTERNAL(ppage, parent->indx)->nrecs -= nrecs; + GET_BINTERNAL(dbp, ppage, parent->indx)->nrecs -= nrecs; } return (0); @@ -911,28 +928,52 @@ __bam_psplit(dbc, cp, lp, rp, splitret) { DB *dbp; PAGE *pp; - db_indx_t half, nbytes, off, splitp, top; + db_indx_t half, *inp, nbytes, off, splitp, top; int adjust, cnt, iflag, isbigkey, ret; dbp = dbc->dbp; pp = cp->page; + inp = P_INP(dbp, pp); adjust = TYPE(pp) == P_LBTREE ? P_INDX : O_INDX; /* * If we're splitting the first (last) page on a level because we're * inserting (appending) a key to it, it's likely that the data is * sorted. Moving a single item to the new page is less work and can - * push the fill factor higher than normal. If we're wrong it's not - * a big deal, we'll just do the split the right way next time. + * push the fill factor higher than normal. This is trivial when we + * are splitting a new page before the beginning of the tree, all of + * the interesting tests are against values of 0. + * + * Catching appends to the tree is harder. In a simple append, we're + * inserting an item that sorts past the end of the tree; the cursor + * will point past the last element on the page. But, in trees with + * duplicates, the cursor may point to the last entry on the page -- + * in this case, the entry will also be the last element of a duplicate + * set (the last because the search call specified the S_DUPLAST flag). + * The only way to differentiate between an insert immediately before + * the last item in a tree or an append after a duplicate set which is + * also the last item in the tree is to call the comparison function. + * When splitting internal pages during an append, the search code + * guarantees the cursor always points to the largest page item less + * than the new internal entry. To summarize, we want to catch three + * possible index values: + * + * NUM_ENT(page) Btree/Recno leaf insert past end-of-tree + * NUM_ENT(page) - O_INDX Btree or Recno internal insert past EOT + * NUM_ENT(page) - P_INDX Btree leaf insert past EOT after a set + * of duplicates + * + * two of which, (NUM_ENT(page) - O_INDX or P_INDX) might be an insert + * near the end of the tree, and not after the end of the tree at all. + * Do a simple test which might be wrong because calling the comparison + * functions is expensive. Regardless, it's not a big deal if we're + * wrong, we'll do the split the right way next time. */ off = 0; - if (NEXT_PGNO(pp) == PGNO_INVALID && - ((ISINTERNAL(pp) && cp->indx == NUM_ENT(cp->page) - 1) || - (!ISINTERNAL(pp) && cp->indx == NUM_ENT(cp->page)))) - off = NUM_ENT(cp->page) - adjust; + if (NEXT_PGNO(pp) == PGNO_INVALID && cp->indx >= NUM_ENT(pp) - adjust) + off = NUM_ENT(pp) - adjust; else if (PREV_PGNO(pp) == PGNO_INVALID && cp->indx == 0) off = adjust; - if (off != 0) goto sort; @@ -962,16 +1003,18 @@ __bam_psplit(dbc, cp, lp, rp, splitret) for (nbytes = 0, off = 0; off < top && nbytes < half; ++off) switch (TYPE(pp)) { case P_IBTREE: - if (B_TYPE(GET_BINTERNAL(pp, off)->type) == B_KEYDATA) - nbytes += - BINTERNAL_SIZE(GET_BINTERNAL(pp, off)->len); + if (B_TYPE( + GET_BINTERNAL(dbp, pp, off)->type) == B_KEYDATA) + nbytes += BINTERNAL_SIZE( + GET_BINTERNAL(dbp, pp, off)->len); else nbytes += BINTERNAL_SIZE(BOVERFLOW_SIZE); break; case P_LBTREE: - if (B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA) - nbytes += - BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len); + if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) == + B_KEYDATA) + nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp, + pp, off)->len); else nbytes += BOVERFLOW_SIZE; @@ -979,9 +1022,10 @@ __bam_psplit(dbc, cp, lp, rp, splitret) /* FALLTHROUGH */ case P_LDUP: case P_LRECNO: - if (B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA) - nbytes += - BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len); + if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) == + B_KEYDATA) + nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp, + pp, off)->len); else nbytes += BOVERFLOW_SIZE; break; @@ -989,7 +1033,7 @@ __bam_psplit(dbc, cp, lp, rp, splitret) nbytes += RINTERNAL_SIZE; break; default: - return (__db_pgfmt(dbp, pp->pgno)); + return (__db_pgfmt(dbp->dbenv, pp->pgno)); } sort: splitp = off; @@ -1002,12 +1046,14 @@ sort: splitp = off; switch (TYPE(pp)) { case P_IBTREE: iflag = 1; - isbigkey = B_TYPE(GET_BINTERNAL(pp, off)->type) != B_KEYDATA; + isbigkey = + B_TYPE(GET_BINTERNAL(dbp, pp, off)->type) != B_KEYDATA; break; case P_LBTREE: case P_LDUP: iflag = 0; - isbigkey = B_TYPE(GET_BKEYDATA(pp, off)->type) != B_KEYDATA; + isbigkey = B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) != + B_KEYDATA; break; default: iflag = isbigkey = 0; @@ -1016,18 +1062,20 @@ sort: splitp = off; for (cnt = 1; cnt <= 3; ++cnt) { off = splitp + cnt * adjust; if (off < (db_indx_t)NUM_ENT(pp) && - ((iflag && - B_TYPE(GET_BINTERNAL(pp,off)->type) == B_KEYDATA) || - B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA)) { + ((iflag && B_TYPE( + GET_BINTERNAL(dbp, pp,off)->type) == B_KEYDATA) || + B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) == + B_KEYDATA)) { splitp = off; break; } if (splitp <= (db_indx_t)(cnt * adjust)) continue; off = splitp - cnt * adjust; - if (iflag ? - B_TYPE(GET_BINTERNAL(pp, off)->type) == B_KEYDATA : - B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA) { + if (iflag ? B_TYPE( + GET_BINTERNAL(dbp, pp, off)->type) == B_KEYDATA : + B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) == + B_KEYDATA) { splitp = off; break; } @@ -1040,18 +1088,18 @@ sort: splitp = off; * page set. So, this loop can't be unbounded. */ if (TYPE(pp) == P_LBTREE && - pp->inp[splitp] == pp->inp[splitp - adjust]) + inp[splitp] == inp[splitp - adjust]) for (cnt = 1;; ++cnt) { off = splitp + cnt * adjust; if (off < NUM_ENT(pp) && - pp->inp[splitp] != pp->inp[off]) { + inp[splitp] != inp[off]) { splitp = off; break; } if (splitp <= (db_indx_t)(cnt * adjust)) continue; off = splitp - cnt * adjust; - if (pp->inp[splitp] != pp->inp[off]) { + if (inp[splitp] != inp[off]) { splitp = off + adjust; break; } @@ -1079,18 +1127,20 @@ __bam_copy(dbp, pp, cp, nxt, stop) PAGE *pp, *cp; u_int32_t nxt, stop; { - db_indx_t nbytes, off; + db_indx_t *cinp, nbytes, off, *pinp; + cinp = P_INP(dbp, cp); + pinp = P_INP(dbp, pp); /* - * Copy the rest of the data to the right page. Nxt is the next - * offset placed on the target page. + * Nxt is the offset of the next record to be placed on the target page. */ for (off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) { switch (TYPE(pp)) { case P_IBTREE: - if (B_TYPE(GET_BINTERNAL(pp, nxt)->type) == B_KEYDATA) - nbytes = - BINTERNAL_SIZE(GET_BINTERNAL(pp, nxt)->len); + if (B_TYPE( + GET_BINTERNAL(dbp, pp, nxt)->type) == B_KEYDATA) + nbytes = BINTERNAL_SIZE( + GET_BINTERNAL(dbp, pp, nxt)->len); else nbytes = BINTERNAL_SIZE(BOVERFLOW_SIZE); break; @@ -1100,16 +1150,17 @@ __bam_copy(dbp, pp, cp, nxt, stop) * the offset. */ if (off != 0 && (nxt % P_INDX) == 0 && - pp->inp[nxt] == pp->inp[nxt - P_INDX]) { - cp->inp[off] = cp->inp[off - P_INDX]; + pinp[nxt] == pinp[nxt - P_INDX]) { + cinp[off] = cinp[off - P_INDX]; continue; } /* FALLTHROUGH */ case P_LDUP: case P_LRECNO: - if (B_TYPE(GET_BKEYDATA(pp, nxt)->type) == B_KEYDATA) - nbytes = - BKEYDATA_SIZE(GET_BKEYDATA(pp, nxt)->len); + if (B_TYPE(GET_BKEYDATA(dbp, pp, nxt)->type) == + B_KEYDATA) + nbytes = BKEYDATA_SIZE(GET_BKEYDATA(dbp, + pp, nxt)->len); else nbytes = BOVERFLOW_SIZE; break; @@ -1117,10 +1168,10 @@ __bam_copy(dbp, pp, cp, nxt, stop) nbytes = RINTERNAL_SIZE; break; default: - return (__db_pgfmt(dbp, pp->pgno)); + return (__db_pgfmt(dbp->dbenv, pp->pgno)); } - cp->inp[off] = HOFFSET(cp) -= nbytes; - memcpy(P_ENTRY(cp, off), P_ENTRY(pp, nxt), nbytes); + cinp[off] = HOFFSET(cp) -= nbytes; + memcpy(P_ENTRY(dbp, cp, off), P_ENTRY(dbp, pp, nxt), nbytes); } return (0); } diff --git a/bdb/btree/bt_stat.c b/bdb/btree/bt_stat.c index 349bb40cf8b..4428de98294 100644 --- a/bdb/btree/bt_stat.c +++ b/bdb/btree/bt_stat.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_stat.c,v 11.29 2000/11/28 21:42:27 bostic Exp $"; +static const char revid[] = "$Id: bt_stat.c,v 11.52 2002/05/30 15:40:27 krinsky Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -18,22 +18,22 @@ static const char revid[] = "$Id: bt_stat.c,v 11.29 2000/11/28 21:42:27 bostic E #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "lock.h" -#include "btree.h" +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" /* * __bam_stat -- * Gather/print the btree statistics * - * PUBLIC: int __bam_stat __P((DB *, void *, void *(*)(size_t), u_int32_t)); + * PUBLIC: int __bam_stat __P((DB *, void *, u_int32_t)); */ int -__bam_stat(dbp, spp, db_malloc, flags) +__bam_stat(dbp, spp, flags) DB *dbp; void *spp; - void *(*db_malloc) __P((size_t)); u_int32_t flags; { BTMETA *meta; @@ -42,9 +42,10 @@ __bam_stat(dbp, spp, db_malloc, flags) DBC *dbc; DB_BTREE_STAT *sp; DB_LOCK lock, metalock; + DB_MPOOLFILE *mpf; PAGE *h; db_pgno_t pgno; - int ret, t_ret; + int ret, t_ret, write_meta; PANIC_CHECK(dbp->dbenv); DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->stat"); @@ -52,9 +53,12 @@ __bam_stat(dbp, spp, db_malloc, flags) meta = NULL; t = dbp->bt_internal; sp = NULL; - metalock.off = lock.off = LOCK_INVALID; + LOCK_INIT(metalock); + LOCK_INIT(lock); + mpf = dbp->mpf; h = NULL; ret = 0; + write_meta = 0; /* Check for invalid flags. */ if ((ret = __db_statchk(dbp, flags)) != 0) @@ -68,52 +72,31 @@ __bam_stat(dbp, spp, db_malloc, flags) DEBUG_LWRITE(dbc, NULL, "bam_stat", NULL, NULL, flags); /* Allocate and clear the structure. */ - if ((ret = __os_malloc(dbp->dbenv, sizeof(*sp), db_malloc, &sp)) != 0) + if ((ret = __os_umalloc(dbp->dbenv, sizeof(*sp), &sp)) != 0) goto err; memset(sp, 0, sizeof(*sp)); - /* If the app just wants the record count, make it fast. */ - if (flags == DB_RECORDCOUNT) { - if ((ret = __db_lget(dbc, 0, - cp->root, DB_LOCK_READ, 0, &lock)) != 0) - goto err; - if ((ret = memp_fget(dbp->mpf, - &cp->root, 0, (PAGE **)&h)) != 0) - goto err; - - sp->bt_nkeys = RE_NREC(h); - - goto done; - } - if (flags == DB_CACHED_COUNTS) { - if ((ret = __db_lget(dbc, - 0, t->bt_meta, DB_LOCK_READ, 0, &lock)) != 0) - goto err; - if ((ret = - memp_fget(dbp->mpf, &t->bt_meta, 0, (PAGE **)&meta)) != 0) - goto err; - sp->bt_nkeys = meta->dbmeta.key_count; - sp->bt_ndata = meta->dbmeta.record_count; - - goto done; - } - /* Get the metadata page for the entire database. */ pgno = PGNO_BASE_MD; if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &metalock)) != 0) goto err; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, (PAGE **)&meta)) != 0) + if ((ret = mpf->get(mpf, &pgno, 0, (PAGE **)&meta)) != 0) goto err; + if (flags == DB_RECORDCOUNT || flags == DB_CACHED_COUNTS) + flags = DB_FAST_STAT; + if (flags == DB_FAST_STAT) + goto meta_only; + /* Walk the metadata free list, counting pages. */ for (sp->bt_free = 0, pgno = meta->dbmeta.free; pgno != PGNO_INVALID;) { ++sp->bt_free; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) + if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0) goto err; pgno = h->next_pgno; - if ((ret = memp_fput(dbp->mpf, h, 0)) != 0) + if ((ret = mpf->put(mpf, h, 0)) != 0) goto err; h = NULL; } @@ -122,14 +105,14 @@ __bam_stat(dbp, spp, db_malloc, flags) pgno = cp->root; if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &lock)) != 0) goto err; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) + if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0) goto err; /* Get the levels from the root page. */ sp->bt_levels = h->level; /* Discard the root page. */ - if ((ret = memp_fput(dbp->mpf, h, 0)) != 0) + if ((ret = mpf->put(mpf, h, 0)) != 0) goto err; h = NULL; __LPUT(dbc, lock); @@ -143,20 +126,36 @@ __bam_stat(dbp, spp, db_malloc, flags) * Get the subdatabase metadata page if it's not the same as the * one we already have. */ - if (t->bt_meta != PGNO_BASE_MD || !F_ISSET(dbp, DB_AM_RDONLY)) { - if ((ret = memp_fput(dbp->mpf, meta, 0)) != 0) + write_meta = !F_ISSET(dbp, DB_AM_RDONLY); +meta_only: + if (t->bt_meta != PGNO_BASE_MD || write_meta != 0) { + if ((ret = mpf->put(mpf, meta, 0)) != 0) goto err; meta = NULL; __LPUT(dbc, metalock); if ((ret = __db_lget(dbc, - 0, t->bt_meta, F_ISSET(dbp, DB_AM_RDONLY) ? + 0, t->bt_meta, write_meta == 0 ? DB_LOCK_READ : DB_LOCK_WRITE, 0, &metalock)) != 0) goto err; - if ((ret = - memp_fget(dbp->mpf, &t->bt_meta, 0, (PAGE **)&meta)) != 0) + if ((ret = mpf->get(mpf, &t->bt_meta, 0, (PAGE **)&meta)) != 0) goto err; } + if (flags == DB_FAST_STAT) { + if (dbp->type == DB_RECNO || + (dbp->type == DB_BTREE && F_ISSET(dbp, DB_AM_RECNUM))) { + if ((ret = __db_lget(dbc, 0, + cp->root, DB_LOCK_READ, 0, &lock)) != 0) + goto err; + if ((ret = + mpf->get(mpf, &cp->root, 0, (PAGE **)&h)) != 0) + goto err; + + sp->bt_nkeys = RE_NREC(h); + } else + sp->bt_nkeys = meta->dbmeta.key_count; + sp->bt_ndata = meta->dbmeta.record_count; + } /* Get metadata page statistics. */ sp->bt_metaflags = meta->dbmeta.flags; @@ -167,39 +166,33 @@ __bam_stat(dbp, spp, db_malloc, flags) sp->bt_pagesize = meta->dbmeta.pagesize; sp->bt_magic = meta->dbmeta.magic; sp->bt_version = meta->dbmeta.version; - if (!F_ISSET(dbp, DB_AM_RDONLY)) { + + if (write_meta != 0) { meta->dbmeta.key_count = sp->bt_nkeys; meta->dbmeta.record_count = sp->bt_ndata; } - /* Discard the metadata page. */ - if ((ret = memp_fput(dbp->mpf, - meta, F_ISSET(dbp, DB_AM_RDONLY) ? 0 : DB_MPOOL_DIRTY)) != 0) - goto err; - meta = NULL; - __LPUT(dbc, metalock); - -done: *(DB_BTREE_STAT **)spp = sp; - - if (0) { -err: if (sp != NULL) - __os_free(sp, sizeof(*sp)); - } + *(DB_BTREE_STAT **)spp = sp; - if (h != NULL && - (t_ret = memp_fput(dbp->mpf, h, 0)) != 0 && ret == 0) +err: /* Discard the second page. */ + __LPUT(dbc, lock); + if (h != NULL && (t_ret = mpf->put(mpf, h, 0)) != 0 && ret == 0) ret = t_ret; - if (meta != NULL && - (t_ret = memp_fput(dbp->mpf, meta, 0)) != 0 && ret == 0) + /* Discard the metadata page. */ + __LPUT(dbc, metalock); + if (meta != NULL && (t_ret = mpf->put( + mpf, meta, write_meta == 0 ? 0 : DB_MPOOL_DIRTY)) != 0 && ret == 0) ret = t_ret; - if (lock.off != LOCK_INVALID) - __LPUT(dbc, lock); - if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) ret = t_ret; + if (ret != 0 && sp != NULL) { + __os_ufree(dbp->dbenv, sp); + *(DB_BTREE_STAT **)spp = NULL; + } + return (ret); } @@ -222,22 +215,27 @@ __bam_traverse(dbc, mode, root_pgno, callback, cookie) BKEYDATA *bk; DB *dbp; DB_LOCK lock; + DB_MPOOLFILE *mpf; PAGE *h; RINTERNAL *ri; db_indx_t indx; int already_put, ret, t_ret; dbp = dbc->dbp; + mpf = dbp->mpf; + already_put = 0; if ((ret = __db_lget(dbc, 0, root_pgno, mode, 0, &lock)) != 0) return (ret); - if ((ret = memp_fget(dbp->mpf, &root_pgno, 0, &h)) != 0) - goto err; + if ((ret = mpf->get(mpf, &root_pgno, 0, &h)) != 0) { + __LPUT(dbc, lock); + return (ret); + } switch (TYPE(h)) { case P_IBTREE: for (indx = 0; indx < NUM_ENT(h); indx += O_INDX) { - bi = GET_BINTERNAL(h, indx); + bi = GET_BINTERNAL(dbp, h, indx); if (B_TYPE(bi->type) == B_OVERFLOW && (ret = __db_traverse_big(dbp, ((BOVERFLOW *)bi->data)->pgno, @@ -245,34 +243,34 @@ __bam_traverse(dbc, mode, root_pgno, callback, cookie) goto err; if ((ret = __bam_traverse( dbc, mode, bi->pgno, callback, cookie)) != 0) - break; + goto err; } break; case P_IRECNO: for (indx = 0; indx < NUM_ENT(h); indx += O_INDX) { - ri = GET_RINTERNAL(h, indx); + ri = GET_RINTERNAL(dbp, h, indx); if ((ret = __bam_traverse( dbc, mode, ri->pgno, callback, cookie)) != 0) - break; + goto err; } break; case P_LBTREE: for (indx = 0; indx < NUM_ENT(h); indx += P_INDX) { - bk = GET_BKEYDATA(h, indx); + bk = GET_BKEYDATA(dbp, h, indx); if (B_TYPE(bk->type) == B_OVERFLOW && (ret = __db_traverse_big(dbp, - GET_BOVERFLOW(h, indx)->pgno, + GET_BOVERFLOW(dbp, h, indx)->pgno, callback, cookie)) != 0) goto err; - bk = GET_BKEYDATA(h, indx + O_INDX); + bk = GET_BKEYDATA(dbp, h, indx + O_INDX); if (B_TYPE(bk->type) == B_DUPLICATE && (ret = __bam_traverse(dbc, mode, - GET_BOVERFLOW(h, indx + O_INDX)->pgno, + GET_BOVERFLOW(dbp, h, indx + O_INDX)->pgno, callback, cookie)) != 0) goto err; if (B_TYPE(bk->type) == B_OVERFLOW && (ret = __db_traverse_big(dbp, - GET_BOVERFLOW(h, indx + O_INDX)->pgno, + GET_BOVERFLOW(dbp, h, indx + O_INDX)->pgno, callback, cookie)) != 0) goto err; } @@ -280,22 +278,19 @@ __bam_traverse(dbc, mode, root_pgno, callback, cookie) case P_LDUP: case P_LRECNO: for (indx = 0; indx < NUM_ENT(h); indx += O_INDX) { - bk = GET_BKEYDATA(h, indx); + bk = GET_BKEYDATA(dbp, h, indx); if (B_TYPE(bk->type) == B_OVERFLOW && (ret = __db_traverse_big(dbp, - GET_BOVERFLOW(h, indx)->pgno, + GET_BOVERFLOW(dbp, h, indx)->pgno, callback, cookie)) != 0) goto err; } break; } - already_put = 0; - if ((ret = callback(dbp, h, cookie, &already_put)) != 0) - goto err; + ret = callback(dbp, h, cookie, &already_put); -err: if (!already_put && - (t_ret = memp_fput(dbp->mpf, h, 0)) != 0 && ret != 0) +err: if (!already_put && (t_ret = mpf->put(mpf, h, 0)) != 0 && ret != 0) ret = t_ret; __LPUT(dbc, lock); @@ -316,33 +311,34 @@ __bam_stat_callback(dbp, h, cookie, putp) int *putp; { DB_BTREE_STAT *sp; - db_indx_t indx, top; + db_indx_t indx, *inp, top; u_int8_t type; sp = cookie; *putp = 0; top = NUM_ENT(h); + inp = P_INP(dbp, h); switch (TYPE(h)) { case P_IBTREE: case P_IRECNO: ++sp->bt_int_pg; - sp->bt_int_pgfree += P_FREESPACE(h); + sp->bt_int_pgfree += P_FREESPACE(dbp, h); break; case P_LBTREE: /* Correct for on-page duplicates and deleted items. */ for (indx = 0; indx < top; indx += P_INDX) { if (indx + P_INDX >= top || - h->inp[indx] != h->inp[indx + P_INDX]) + inp[indx] != inp[indx + P_INDX]) ++sp->bt_nkeys; - type = GET_BKEYDATA(h, indx + O_INDX)->type; + type = GET_BKEYDATA(dbp, h, indx + O_INDX)->type; if (!B_DISSET(type) && B_TYPE(type) != B_DUPLICATE) ++sp->bt_ndata; } ++sp->bt_leaf_pg; - sp->bt_leaf_pgfree += P_FREESPACE(h); + sp->bt_leaf_pgfree += P_FREESPACE(dbp, h); break; case P_LRECNO: /* @@ -356,39 +352,39 @@ __bam_stat_callback(dbp, h, cookie, putp) * Correct for deleted items in non-renumbering * Recno databases. */ - if (F_ISSET(dbp, DB_RE_RENUMBER)) + if (F_ISSET(dbp, DB_AM_RENUMBER)) sp->bt_ndata += top; else for (indx = 0; indx < top; indx += O_INDX) { - type = GET_BKEYDATA(h, indx)->type; + type = GET_BKEYDATA(dbp, h, indx)->type; if (!B_DISSET(type)) ++sp->bt_ndata; } ++sp->bt_leaf_pg; - sp->bt_leaf_pgfree += P_FREESPACE(h); + sp->bt_leaf_pgfree += P_FREESPACE(dbp, h); } else { sp->bt_ndata += top; ++sp->bt_dup_pg; - sp->bt_dup_pgfree += P_FREESPACE(h); + sp->bt_dup_pgfree += P_FREESPACE(dbp, h); } break; case P_LDUP: /* Correct for deleted items. */ for (indx = 0; indx < top; indx += O_INDX) - if (!B_DISSET(GET_BKEYDATA(h, indx)->type)) + if (!B_DISSET(GET_BKEYDATA(dbp, h, indx)->type)) ++sp->bt_ndata; ++sp->bt_dup_pg; - sp->bt_dup_pgfree += P_FREESPACE(h); + sp->bt_dup_pgfree += P_FREESPACE(dbp, h); break; case P_OVERFLOW: ++sp->bt_over_pg; - sp->bt_over_pgfree += P_OVFLSPACE(dbp->pgsize, h); + sp->bt_over_pgfree += P_OVFLSPACE(dbp, dbp->pgsize, h); break; default: - return (__db_pgfmt(dbp, h->pgno)); + return (__db_pgfmt(dbp->dbenv, h->pgno)); } return (0); } @@ -421,13 +417,18 @@ __bam_key_range(dbp, txn, dbt, kp, flags) if (flags != 0) return (__db_ferr(dbp->dbenv, "DB->key_range", 0)); + /* Check for consistent transaction usage. */ + if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 1)) != 0) + return (ret); + /* Acquire a cursor. */ if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0) return (ret); DEBUG_LWRITE(dbc, NULL, "bam_key_range", NULL, NULL, 0); - if ((ret = __bam_search(dbc, dbt, S_STK_ONLY, 1, NULL, &exact)) != 0) + if ((ret = __bam_search(dbc, PGNO_INVALID, + dbt, S_STK_ONLY, 1, NULL, &exact)) != 0) goto err; cp = (BTREE_CURSOR *)dbc->internal; diff --git a/bdb/btree/bt_upgrade.c b/bdb/btree/bt_upgrade.c index 4032dba3b36..9f92648d739 100644 --- a/bdb/btree/bt_upgrade.c +++ b/bdb/btree/bt_upgrade.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_upgrade.c,v 11.19 2000/11/30 00:58:29 ubell Exp $"; +static const char revid[] = "$Id: bt_upgrade.c,v 11.25 2002/08/06 06:11:13 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -18,11 +18,9 @@ static const char revid[] = "$Id: bt_upgrade.c,v 11.19 2000/11/30 00:58:29 ubell #endif #include "db_int.h" -#include "db_page.h" -#include "db_swap.h" -#include "btree.h" -#include "db_am.h" -#include "db_upgrade.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/db_upgrade.h" /* * __bam_30_btreemeta -- @@ -107,7 +105,7 @@ __bam_31_btreemeta(dbp, real_name, flags, fhp, h, dirtyp) newmeta->minkey = oldmeta->minkey; newmeta->maxkey = oldmeta->maxkey; memmove(newmeta->dbmeta.uid, - oldmeta->dbmeta.uid, sizeof(oldmeta->dbmeta.uid)); + oldmeta->dbmeta.uid, sizeof(oldmeta->dbmeta.uid)); newmeta->dbmeta.flags = oldmeta->dbmeta.flags; newmeta->dbmeta.record_count = 0; newmeta->dbmeta.key_count = 0; @@ -126,7 +124,7 @@ __bam_31_btreemeta(dbp, real_name, flags, fhp, h, dirtyp) /* * __bam_31_lbtree -- - * Upgrade the database btree leaf pages. + * Upgrade the database btree leaf pages. * * PUBLIC: int __bam_31_lbtree * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); @@ -147,15 +145,15 @@ __bam_31_lbtree(dbp, real_name, flags, fhp, h, dirtyp) ret = 0; for (indx = O_INDX; indx < NUM_ENT(h); indx += P_INDX) { - bk = GET_BKEYDATA(h, indx); + bk = GET_BKEYDATA(dbp, h, indx); if (B_TYPE(bk->type) == B_DUPLICATE) { - pgno = GET_BOVERFLOW(h, indx)->pgno; + pgno = GET_BOVERFLOW(dbp, h, indx)->pgno; if ((ret = __db_31_offdup(dbp, real_name, fhp, LF_ISSET(DB_DUPSORT) ? 1 : 0, &pgno)) != 0) break; - if (pgno != GET_BOVERFLOW(h, indx)->pgno) { + if (pgno != GET_BOVERFLOW(dbp, h, indx)->pgno) { *dirtyp = 1; - GET_BOVERFLOW(h, indx)->pgno = pgno; + GET_BOVERFLOW(dbp, h, indx)->pgno = pgno; } } } diff --git a/bdb/btree/bt_verify.c b/bdb/btree/bt_verify.c index 9f8647e7e2a..0cf8a47e476 100644 --- a/bdb/btree/bt_verify.c +++ b/bdb/btree/bt_verify.c @@ -1,16 +1,16 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2000 + * Copyright (c) 1999-2002 * Sleepycat Software. All rights reserved. * - * $Id: bt_verify.c,v 1.44 2000/12/06 19:55:44 ubell Exp $ + * $Id: bt_verify.c,v 1.76 2002/07/03 19:03:51 bostic Exp $ */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: bt_verify.c,v 1.44 2000/12/06 19:55:44 ubell Exp $"; +static const char revid[] = "$Id: bt_verify.c,v 1.76 2002/07/03 19:03:51 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -20,9 +20,9 @@ static const char revid[] = "$Id: bt_verify.c,v 1.44 2000/12/06 19:55:44 ubell E #endif #include "db_int.h" -#include "db_page.h" -#include "db_verify.h" -#include "btree.h" +#include "dbinc/db_page.h" +#include "dbinc/db_verify.h" +#include "dbinc/btree.h" static int __bam_safe_getdata __P((DB *, PAGE *, u_int32_t, int, DBT *, int *)); static int __bam_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, @@ -79,15 +79,15 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags) /* avoid division by zero */ ovflsize = meta->minkey > 0 ? - B_MINKEY_TO_OVFLSIZE(meta->minkey, dbp->pgsize) : 0; + B_MINKEY_TO_OVFLSIZE(dbp, meta->minkey, dbp->pgsize) : 0; if (meta->minkey < 2 || - ovflsize > B_MINKEY_TO_OVFLSIZE(DEFMINKEYPAGE, dbp->pgsize)) { + ovflsize > B_MINKEY_TO_OVFLSIZE(dbp, DEFMINKEYPAGE, dbp->pgsize)) { pip->bt_minkey = 0; isbad = 1; EPRINT((dbp->dbenv, - "Nonsensical bt_minkey value %lu on metadata page %lu", - (u_long)meta->minkey, (u_long)pgno)); + "Page %lu: nonsensical bt_minkey value %lu on metadata page", + (u_long)pgno, (u_long)meta->minkey)); } else pip->bt_minkey = meta->minkey; @@ -103,13 +103,13 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags) * of the file, then the root page had better be page 1. */ pip->root = 0; - if (meta->root == PGNO_INVALID - || meta->root == pgno || !IS_VALID_PGNO(meta->root) || + if (meta->root == PGNO_INVALID || + meta->root == pgno || !IS_VALID_PGNO(meta->root) || (pgno == PGNO_BASE_MD && meta->root != 1)) { isbad = 1; EPRINT((dbp->dbenv, - "Nonsensical root page %lu on metadata page %lu", - (u_long)meta->root, (u_long)vdp->last_pgno)); + "Page %lu: nonsensical root page %lu on metadata page", + (u_long)pgno, (u_long)meta->root)); } else pip->root = meta->root; @@ -125,7 +125,7 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags) if (F_ISSET(&meta->dbmeta, BTM_DUP) && pgno == PGNO_BASE_MD) { isbad = 1; EPRINT((dbp->dbenv, - "Btree metadata page %lu has both duplicates and multiple databases", +"Page %lu: Btree metadata page has both duplicates and multiple databases", (u_long)pgno)); } F_SET(pip, VRFY_HAS_SUBDBS); @@ -139,7 +139,7 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags) F_SET(pip, VRFY_HAS_RECNUMS); if (F_ISSET(pip, VRFY_HAS_RECNUMS) && F_ISSET(pip, VRFY_HAS_DUPS)) { EPRINT((dbp->dbenv, - "Btree metadata page %lu illegally has both recnums and dups", + "Page %lu: Btree metadata page illegally has both recnums and dups", (u_long)pgno)); isbad = 1; } @@ -150,13 +150,13 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags) } else if (F_ISSET(pip, VRFY_IS_RRECNO)) { isbad = 1; EPRINT((dbp->dbenv, - "Metadata page %lu has renumber flag set but is not recno", + "Page %lu: metadata page has renumber flag set but is not recno", (u_long)pgno)); } if (F_ISSET(pip, VRFY_IS_RECNO) && F_ISSET(pip, VRFY_HAS_DUPS)) { EPRINT((dbp->dbenv, - "Recno metadata page %lu specifies duplicates", + "Page %lu: recno metadata page specifies duplicates", (u_long)pgno)); isbad = 1; } @@ -170,8 +170,8 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags) */ isbad = 1; EPRINT((dbp->dbenv, - "re_len of %lu in non-fixed-length database", - (u_long)pip->re_len)); + "Page %lu: re_len of %lu in non-fixed-length database", + (u_long)pgno, (u_long)pip->re_len)); } /* @@ -179,7 +179,8 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags) * not be and may still be correct. */ -err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) +err: if ((t_ret = + __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) ret = t_ret; return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); } @@ -242,7 +243,7 @@ __ram_vrfy_leaf(dbp, vdp, h, pgno, flags) if (F_ISSET(pip, VRFY_HAS_DUPS)) { EPRINT((dbp->dbenv, - "Recno database has dups on page %lu", (u_long)pgno)); + "Page %lu: Recno database has dups", (u_long)pgno)); ret = DB_VERIFY_BAD; goto err; } @@ -255,7 +256,7 @@ __ram_vrfy_leaf(dbp, vdp, h, pgno, flags) */ re_len_guess = 0; for (i = 0; i < NUM_ENT(h); i++) { - bk = GET_BKEYDATA(h, i); + bk = GET_BKEYDATA(dbp, h, i); /* KEYEMPTY. Go on. */ if (B_DISSET(bk->type)) continue; @@ -266,8 +267,8 @@ __ram_vrfy_leaf(dbp, vdp, h, pgno, flags) else { isbad = 1; EPRINT((dbp->dbenv, - "Nonsensical type for item %lu, page %lu", - (u_long)i, (u_long)pgno)); + "Page %lu: nonsensical type for item %lu", + (u_long)pgno, (u_long)i)); continue; } if (re_len_guess == 0) @@ -288,9 +289,10 @@ __ram_vrfy_leaf(dbp, vdp, h, pgno, flags) /* Save off record count. */ pip->rec_cnt = NUM_ENT(h); -err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) +err: if ((t_ret = + __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) ret = t_ret; - return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : 0); + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); } /* @@ -362,7 +364,7 @@ __bam_vrfy(dbp, vdp, h, pgno, flags) else goto err; EPRINT((dbp->dbenv, - "item order check on page %lu unsafe: skipping", + "Page %lu: item order check unsafe: skipping", (u_long)pgno)); } else if (!LF_ISSET(DB_NOORDERCHK) && (ret = __bam_vrfy_itemorder(dbp, vdp, h, pgno, 0, 0, 0, flags)) != 0) { @@ -377,9 +379,10 @@ __bam_vrfy(dbp, vdp, h, pgno, flags) goto err; } -err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) +err: if ((t_ret = + __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) ret = t_ret; - return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : 0); + return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); } /* @@ -403,6 +406,7 @@ __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) VRFY_PAGEINFO *pip; int ret, t_ret, isbad; u_int32_t himark, i, offset, nentries; + db_indx_t *inp; u_int8_t *pagelayout, *p; isbad = 0; @@ -422,30 +426,31 @@ __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) himark = dbp->pgsize; if ((ret = - __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &pagelayout)) != 0) + __os_malloc(dbp->dbenv, dbp->pgsize, &pagelayout)) != 0) goto err; memset(pagelayout, 0, dbp->pgsize); + inp = P_INP(dbp, h); for (i = 0; i < NUM_ENT(h); i++) { - if ((u_int8_t *)h->inp + i >= (u_int8_t *)h + himark) { + if ((u_int8_t *)inp + i >= (u_int8_t *)h + himark) { EPRINT((dbp->dbenv, - "Page %lu entries listing %lu overlaps data", + "Page %lu: entries listing %lu overlaps data", (u_long)pgno, (u_long)i)); ret = DB_VERIFY_BAD; goto err; } - offset = h->inp[i]; + offset = inp[i]; /* * Check that the item offset is reasonable: it points * somewhere after the inp array and before the end of the * page. */ - if (offset <= (u_int32_t)((u_int8_t *)h->inp + i - + if (offset <= (u_int32_t)((u_int8_t *)inp + i - (u_int8_t *)h) || offset > (u_int32_t)(dbp->pgsize - RINTERNAL_SIZE)) { isbad = 1; EPRINT((dbp->dbenv, - "Bad offset %lu at page %lu index %lu", - (u_long)offset, (u_long)pgno, (u_long)i)); + "Page %lu: bad offset %lu at index %lu", + (u_long)pgno, (u_long)offset, (u_long)i)); continue; } @@ -456,7 +461,7 @@ __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) nentries++; /* Make sure this RINTERNAL is not multiply referenced. */ - ri = GET_RINTERNAL(h, i); + ri = GET_RINTERNAL(dbp, h, i); if (pagelayout[offset] == 0) { pagelayout[offset] = 1; child.pgno = ri->pgno; @@ -466,8 +471,8 @@ __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) goto err; } else { EPRINT((dbp->dbenv, - "RINTERNAL structure at offset %lu, page %lu referenced twice", - (u_long)offset, (u_long)pgno)); + "Page %lu: RINTERNAL structure at offset %lu referenced twice", + (u_long)pgno, (u_long)offset)); isbad = 1; } } @@ -477,23 +482,25 @@ __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) p += RINTERNAL_SIZE) if (*p != 1) { EPRINT((dbp->dbenv, - "Gap between items at offset %lu, page %lu", - (u_long)(p - pagelayout), (u_long)pgno)); + "Page %lu: gap between items at offset %lu", + (u_long)pgno, (u_long)(p - pagelayout))); isbad = 1; } if ((db_indx_t)himark != HOFFSET(h)) { - EPRINT((dbp->dbenv, "Bad HOFFSET %lu, appears to be %lu", - (u_long)(HOFFSET(h)), (u_long)himark)); + EPRINT((dbp->dbenv, + "Page %lu: bad HOFFSET %lu, appears to be %lu", + (u_long)pgno, (u_long)(HOFFSET(h)), (u_long)himark)); isbad = 1; } *nentriesp = nentries; -err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) +err: if ((t_ret = + __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) ret = t_ret; if (pagelayout != NULL) - __os_free(pagelayout, dbp->pgsize); + __os_free(dbp->dbenv, pagelayout); return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); } @@ -558,22 +565,24 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) * it and the region immediately after it. */ himark = dbp->pgsize; - if ((ret = __os_malloc(dbp->dbenv, - dbp->pgsize, NULL, &pagelayout)) != 0) + if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &pagelayout)) != 0) goto err; memset(pagelayout, 0, dbp->pgsize); for (i = 0; i < NUM_ENT(h); i++) { - - ret = __db_vrfy_inpitem(dbp, - h, pgno, i, 1, flags, &himark, &offset); - if (ret == DB_VERIFY_BAD) { + switch (ret = __db_vrfy_inpitem(dbp, + h, pgno, i, 1, flags, &himark, &offset)) { + case 0: + break; + case DB_VERIFY_BAD: isbad = 1; continue; - } else if (ret == DB_VERIFY_FATAL) { + case DB_VERIFY_FATAL: isbad = 1; goto err; - } else if (ret != 0) - DB_ASSERT(0); + default: + DB_ASSERT(ret != 0); + break; + } /* * We now have a plausible beginning for the item, and we know @@ -582,7 +591,7 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) * Mark the beginning and end in pagelayout so we can make sure * items have no overlaps or gaps. */ - bk = GET_BKEYDATA(h, i); + bk = GET_BKEYDATA(dbp, h, i); #define ITEM_BEGIN 1 #define ITEM_END 2 if (pagelayout[offset] == 0) @@ -609,8 +618,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) } else { isbad = 1; EPRINT((dbp->dbenv, - "Duplicated item %lu on page %lu", - (u_long)i, (u_long)pgno)); + "Page %lu: duplicated item %lu", + (u_long)pgno, (u_long)i)); } } @@ -662,8 +671,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) */ if (isdupitem && pagelayout[endoff] != ITEM_END) { EPRINT((dbp->dbenv, - "Duplicated item %lu on page %lu", - (u_long)i, (u_long)pgno)); + "Page %lu: duplicated item %lu", + (u_long)pgno, (u_long)i)); isbad = 1; } else if (pagelayout[endoff] == 0) pagelayout[endoff] = ITEM_END; @@ -676,8 +685,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) if (B_DISSET(bk->type) && TYPE(h) != P_LRECNO) { isbad = 1; EPRINT((dbp->dbenv, - "Item %lu on page %lu marked deleted", - (u_long)i, (u_long)pgno)); + "Page %lu: item %lu marked deleted", + (u_long)pgno, (u_long)i)); } /* @@ -696,13 +705,13 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) if (TYPE(h) == P_IBTREE) { isbad = 1; EPRINT((dbp->dbenv, - "Duplicate page referenced by internal btree page %lu at item %lu", + "Page %lu: duplicate page referenced by internal btree page at item %lu", (u_long)pgno, (u_long)i)); break; } else if (TYPE(h) == P_LRECNO) { isbad = 1; EPRINT((dbp->dbenv, - "Duplicate page referenced by recno page %lu at item %lu", + "Page %lu: duplicate page referenced by recno page at item %lu", (u_long)pgno, (u_long)i)); break; } @@ -717,9 +726,9 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) if (bo->tlen > dbp->pgsize * vdp->last_pgno) { isbad = 1; EPRINT((dbp->dbenv, - "Impossible tlen %lu, item %lu, page %lu", - (u_long)bo->tlen, (u_long)i, - (u_long)pgno)); + "Page %lu: impossible tlen %lu, item %lu", + (u_long)pgno, + (u_long)bo->tlen, (u_long)i)); /* Don't save as a child. */ break; } @@ -728,8 +737,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) bo->pgno == PGNO_INVALID) { isbad = 1; EPRINT((dbp->dbenv, - "Offpage item %lu, page %lu has bad pgno", - (u_long)i, (u_long)pgno)); + "Page %lu: offpage item %lu has bad pgno %lu", + (u_long)pgno, (u_long)i, (u_long)bo->pgno)); /* Don't save as a child. */ break; } @@ -744,8 +753,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) default: isbad = 1; EPRINT((dbp->dbenv, - "Item %lu on page %lu of invalid type %lu", - (u_long)i, (u_long)pgno)); + "Page %lu: item %lu of invalid type %lu", + (u_long)pgno, (u_long)i)); break; } } @@ -765,7 +774,7 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) isbad = 1; EPRINT((dbp->dbenv, - "Gap between items, page %lu offset %lu", + "Page %lu: gap between items at offset %lu", (u_long)pgno, (u_long)i)); /* Find the end of the gap */ for ( ; pagelayout[i + 1] == 0 && @@ -777,8 +786,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) if (i != ALIGN(i, sizeof(u_int32_t))) { isbad = 1; EPRINT((dbp->dbenv, - "Offset %lu page %lu unaligned", - (u_long)i, (u_long)pgno)); + "Page %lu: offset %lu unaligned", + (u_long)pgno, (u_long)i)); } initem = 1; nentries++; @@ -791,7 +800,7 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) */ isbad = 1; EPRINT((dbp->dbenv, - "Overlapping items, page %lu offset %lu", + "Page %lu: overlapping items at offset %lu", (u_long)pgno, (u_long)i)); break; default: @@ -816,24 +825,26 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) */ isbad = 1; EPRINT((dbp->dbenv, - "Overlapping items, page %lu offset %lu", + "Page %lu: overlapping items at offset %lu", (u_long)pgno, (u_long)i)); break; } - (void)__os_free(pagelayout, dbp->pgsize); + (void)__os_free(dbp->dbenv, pagelayout); /* Verify HOFFSET. */ if ((db_indx_t)himark != HOFFSET(h)) { - EPRINT((dbp->dbenv, "Bad HOFFSET %lu, appears to be %lu", - (u_long)HOFFSET(h), (u_long)himark)); + EPRINT((dbp->dbenv, + "Page %lu: bad HOFFSET %lu, appears to be %lu", + (u_long)pgno, (u_long)HOFFSET(h), (u_long)himark)); isbad = 1; } err: if (nentriesp != NULL) *nentriesp = nentries; - if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) + if ((t_ret = + __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) ret = t_ret; return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret); @@ -865,14 +876,14 @@ __bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags) int ovflok, hasdups; u_int32_t flags; { - DBT dbta, dbtb, dup1, dup2, *p1, *p2, *tmp; + DBT dbta, dbtb, dup_1, dup_2, *p1, *p2, *tmp; BTREE *bt; BINTERNAL *bi; BKEYDATA *bk; BOVERFLOW *bo; VRFY_PAGEINFO *pip; db_indx_t i; - int cmp, freedup1, freedup2, isbad, ret, t_ret; + int cmp, freedup_1, freedup_2, isbad, ret, t_ret; int (*dupfunc) __P((DB *, const DBT *, const DBT *)); int (*func) __P((DB *, const DBT *, const DBT *)); void *buf1, *buf2, *tmpbuf; @@ -949,7 +960,7 @@ __bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags) */ switch (TYPE(h)) { case P_IBTREE: - bi = GET_BINTERNAL(h, i); + bi = GET_BINTERNAL(dbp, h, i); if (B_TYPE(bi->type) == B_OVERFLOW) { bo = (BOVERFLOW *)(bi->data); goto overflow; @@ -972,14 +983,14 @@ __bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags) if (i == 0 && bi->len != 0) { isbad = 1; EPRINT((dbp->dbenv, - "Lowest key on internal page %lu of nonzero length", + "Page %lu: lowest key on internal page of nonzero length", (u_long)pgno)); } #endif break; case P_LBTREE: case P_LDUP: - bk = GET_BKEYDATA(h, i); + bk = GET_BKEYDATA(dbp, h, i); if (B_TYPE(bk->type) == B_OVERFLOW) { bo = (BOVERFLOW *)bk; goto overflow; @@ -1030,8 +1041,8 @@ overflow: if (!ovflok) { p2, bo->tlen, bo->pgno, NULL, NULL)) != 0) { isbad = 1; EPRINT((dbp->dbenv, - "Error %lu in fetching overflow item %lu, page %lu", - (u_long)ret, (u_long)i, (u_long)pgno)); + "Page %lu: error %lu in fetching overflow item %lu", + (u_long)pgno, (u_long)ret, (u_long)i)); } /* In case it got realloc'ed and thus changed. */ buf2 = p2->data; @@ -1045,7 +1056,7 @@ overflow: if (!ovflok) { if (cmp > 0) { isbad = 1; EPRINT((dbp->dbenv, - "Out-of-order key, page %lu item %lu", + "Page %lu: out-of-order key at entry %lu", (u_long)pgno, (u_long)i)); /* proceed */ } else if (cmp == 0) { @@ -1060,7 +1071,7 @@ overflow: if (!ovflok) { else if (hasdups == 0) { isbad = 1; EPRINT((dbp->dbenv, - "Database with no duplicates has duplicated keys on page %lu", + "Page %lu: database with no duplicates has duplicated keys", (u_long)pgno)); } @@ -1092,11 +1103,11 @@ overflow: if (!ovflok) { * dups are probably (?) rare. */ if (((ret = __bam_safe_getdata(dbp, - h, i - 1, ovflok, &dup1, - &freedup1)) != 0) || + h, i - 1, ovflok, &dup_1, + &freedup_1)) != 0) || ((ret = __bam_safe_getdata(dbp, - h, i + 1, ovflok, &dup2, - &freedup2)) != 0)) + h, i + 1, ovflok, &dup_2, + &freedup_2)) != 0)) goto err; /* @@ -1105,8 +1116,8 @@ overflow: if (!ovflok) { * it's not safe to chase them now. * Mark an incomplete and return. */ - if (dup1.data == NULL || - dup2.data == NULL) { + if (dup_1.data == NULL || + dup_2.data == NULL) { DB_ASSERT(!ovflok); F_SET(pip, VRFY_INCOMPLETE); goto err; @@ -1118,26 +1129,28 @@ overflow: if (!ovflok) { * until we do the structure check * and see whether DUPSORT is set. */ - if (dupfunc(dbp, &dup1, &dup2) > 0) + if (dupfunc(dbp, &dup_1, &dup_2) > 0) F_SET(pip, VRFY_DUPS_UNSORTED); - if (freedup1) - __os_free(dup1.data, 0); - if (freedup2) - __os_free(dup2.data, 0); + if (freedup_1) + __os_ufree(dbp->dbenv, + dup_1.data); + if (freedup_2) + __os_ufree(dbp->dbenv, + dup_2.data); } } } } -err: if (pip != NULL && - ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0) && ret == 0) +err: if (pip != NULL && ((t_ret = + __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0) && ret == 0) ret = t_ret; if (buf1 != NULL) - __os_free(buf1, 0); + __os_ufree(dbp->dbenv, buf1); if (buf2 != NULL) - __os_free(buf2, 0); + __os_ufree(dbp->dbenv, buf2); return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); } @@ -1173,7 +1186,7 @@ __bam_vrfy_structure(dbp, vdp, meta_pgno, flags) goto err; if (p != 0) { EPRINT((dbp->dbenv, - "Btree metadata page number %lu observed twice", + "Page %lu: btree metadata page observed twice", (u_long)meta_pgno)); ret = DB_VERIFY_BAD; goto err; @@ -1185,7 +1198,8 @@ __bam_vrfy_structure(dbp, vdp, meta_pgno, flags) if (root == 0) { EPRINT((dbp->dbenv, - "Btree metadata page %lu has no root", (u_long)meta_pgno)); + "Page %lu: btree metadata page has no root", + (u_long)meta_pgno)); ret = DB_VERIFY_BAD; goto err; } @@ -1222,7 +1236,7 @@ __bam_vrfy_structure(dbp, vdp, meta_pgno, flags) */ if (mip->re_len > 0 && relen > 0 && mip->re_len != relen) { EPRINT((dbp->dbenv, - "Recno database with meta page %lu has bad re_len %lu", + "Page %lu: recno database has bad re_len %lu", (u_long)meta_pgno, (u_long)relen)); ret = DB_VERIFY_BAD; goto err; @@ -1231,24 +1245,24 @@ __bam_vrfy_structure(dbp, vdp, meta_pgno, flags) break; case P_LDUP: EPRINT((dbp->dbenv, - "Duplicate tree referenced from metadata page %lu", + "Page %lu: duplicate tree referenced from metadata page", (u_long)meta_pgno)); ret = DB_VERIFY_BAD; break; default: EPRINT((dbp->dbenv, - "Btree root of incorrect type %lu on meta page %lu", - (u_long)rip->type, (u_long)meta_pgno)); + "Page %lu: btree root of incorrect type %lu on metadata page", + (u_long)meta_pgno, (u_long)rip->type)); ret = DB_VERIFY_BAD; break; } -err: if (mip != NULL && - ((t_ret = __db_vrfy_putpageinfo(vdp, mip)) != 0) && ret == 0) - t_ret = ret; - if (rip != NULL && - ((t_ret = __db_vrfy_putpageinfo(vdp, rip)) != 0) && ret == 0) - t_ret = ret; +err: if (mip != NULL && ((t_ret = + __db_vrfy_putpageinfo(dbp->dbenv, vdp, mip)) != 0) && ret == 0) + ret = t_ret; + if (rip != NULL && ((t_ret = + __db_vrfy_putpageinfo(dbp->dbenv, vdp, rip)) != 0) && ret == 0) + ret = t_ret; return (ret); } @@ -1273,20 +1287,26 @@ __bam_vrfy_subtree(dbp, { BINTERNAL *li, *ri, *lp, *rp; DB *pgset; + DB_MPOOLFILE *mpf; DBC *cc; PAGE *h; VRFY_CHILDINFO *child; VRFY_PAGEINFO *pip; - db_recno_t nrecs, child_nrecs; db_indx_t i; - int ret, t_ret, isbad, toplevel, p; + db_pgno_t next_pgno, prev_pgno; + db_recno_t child_nrecs, nrecs; + u_int32_t child_level, child_relen, level, relen, stflags; + u_int8_t leaf_type; int (*func) __P((DB *, const DBT *, const DBT *)); - u_int32_t level, child_level, stflags, child_relen, relen; + int isbad, p, ret, t_ret, toplevel; + mpf = dbp->mpf; ret = isbad = 0; nrecs = 0; h = NULL; relen = 0; + leaf_type = P_INVALID; + next_pgno = prev_pgno = PGNO_INVALID; rp = (BINTERNAL *)r; lp = (BINTERNAL *)l; @@ -1300,10 +1320,33 @@ __bam_vrfy_subtree(dbp, cc = NULL; level = pip->bt_level; - toplevel = LF_ISSET(ST_TOPLEVEL); + toplevel = LF_ISSET(ST_TOPLEVEL) ? 1 : 0; LF_CLR(ST_TOPLEVEL); /* + * If this is the root, initialize the vdp's prev- and next-pgno + * accounting. + * + * For each leaf page we hit, we'll want to make sure that + * vdp->prev_pgno is the same as pip->prev_pgno and vdp->next_pgno is + * our page number. Then, we'll set vdp->next_pgno to pip->next_pgno + * and vdp->prev_pgno to our page number, and the next leaf page in + * line should be able to do the same verification. + */ + if (toplevel) { + /* + * Cache the values stored in the vdp so that if we're an + * auxiliary tree such as an off-page duplicate set, our + * caller's leaf page chain doesn't get lost. + */ + prev_pgno = vdp->prev_pgno; + next_pgno = vdp->next_pgno; + leaf_type = vdp->leaf_type; + vdp->next_pgno = vdp->prev_pgno = PGNO_INVALID; + vdp->leaf_type = P_INVALID; + } + + /* * We are recursively descending a btree, starting from the root * and working our way out to the leaves. * @@ -1333,8 +1376,53 @@ __bam_vrfy_subtree(dbp, case P_LDUP: case P_LBTREE: /* - * Cases 1, 2 and 3 (overflow pages are common to all three); - * traverse child list, looking for overflows. + * Cases 1, 2 and 3. + * + * We're some sort of leaf page; verify + * that our linked list of leaves is consistent. + */ + if (vdp->leaf_type == P_INVALID) { + /* + * First leaf page. Set the type that all its + * successors should be, and verify that our prev_pgno + * is PGNO_INVALID. + */ + vdp->leaf_type = pip->type; + if (pip->prev_pgno != PGNO_INVALID) + goto bad_prev; + } else { + /* + * Successor leaf page. Check our type, the previous + * page's next_pgno, and our prev_pgno. + */ + if (pip->type != vdp->leaf_type) { + EPRINT((dbp->dbenv, + "Page %lu: unexpected page type %lu found in leaf chain (expected %lu)", + (u_long)pip->pgno, (u_long)pip->type, + (u_long)vdp->leaf_type)); + isbad = 1; + } + if (pip->pgno != vdp->next_pgno) { + EPRINT((dbp->dbenv, + "Page %lu: incorrect next_pgno %lu found in leaf chain (should be %lu)", + (u_long)vdp->prev_pgno, + (u_long)vdp->next_pgno, (u_long)pip->pgno)); + isbad = 1; + } + if (pip->prev_pgno != vdp->prev_pgno) { +bad_prev: EPRINT((dbp->dbenv, + "Page %lu: incorrect prev_pgno %lu found in leaf chain (should be %lu)", + (u_long)pip->pgno, (u_long)pip->prev_pgno, + (u_long)vdp->prev_pgno)); + isbad = 1; + } + } + vdp->prev_pgno = pip->pgno; + vdp->next_pgno = pip->next_pgno; + + /* + * Overflow pages are common to all three leaf types; + * traverse the child list, looking for overflows. */ if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0) goto err; @@ -1360,7 +1448,7 @@ __bam_vrfy_subtree(dbp, !(LF_ISSET(ST_DUPOK) && !LF_ISSET(ST_DUPSORT))) { isbad = 1; EPRINT((dbp->dbenv, - "Recno leaf page %lu in non-recno tree", + "Page %lu: recno leaf page non-recno tree", (u_long)pgno)); goto done; } @@ -1372,7 +1460,7 @@ __bam_vrfy_subtree(dbp, */ isbad = 1; EPRINT((dbp->dbenv, - "Non-recno leaf page %lu in recno tree", + "Page %lu: non-recno leaf page in recno tree", (u_long)pgno)); goto done; } @@ -1389,7 +1477,7 @@ __bam_vrfy_subtree(dbp, if (!LF_ISSET(ST_DUPOK)) { isbad = 1; EPRINT((dbp->dbenv, - "Duplicates on page %lu in non-dup btree", + "Page %lu: duplicates in non-dup btree", (u_long)pgno)); } else { /* @@ -1414,8 +1502,8 @@ __bam_vrfy_subtree(dbp, } if ((ret = __bam_vrfy_subtree( dbp, vdp, child->pgno, NULL, - NULL, stflags, NULL, NULL, - NULL)) != 0) { + NULL, stflags | ST_TOPLEVEL, + NULL, NULL, NULL)) != 0) { if (ret != DB_VERIFY_BAD) goto err; @@ -1436,14 +1524,13 @@ __bam_vrfy_subtree(dbp, if (F_ISSET(pip, VRFY_DUPS_UNSORTED) && LF_ISSET(ST_DUPSORT)) { EPRINT((dbp->dbenv, - "Unsorted duplicate set at page %lu in sorted-dup database", + "Page %lu: unsorted duplicate set in sorted-dup database", (u_long)pgno)); isbad = 1; } } } goto leaf; - break; case P_IBTREE: case P_IRECNO: /* We handle these below. */ @@ -1455,10 +1542,18 @@ __bam_vrfy_subtree(dbp, * Note that the code at the "done" label assumes that the * current page is a btree/recno one of some sort; this * is not the case here, so we goto err. + * + * If the page is entirely zeroed, its pip->type will be a lie + * (we assumed it was a hash page, as they're allowed to be + * zeroed); handle this case specially. */ - EPRINT((dbp->dbenv, - "Page %lu is of inappropriate type %lu", - (u_long)pgno, (u_long)pip->type)); + if (F_ISSET(pip, VRFY_IS_ALLZEROES)) + ZEROPG_ERR_PRINT(dbp->dbenv, + pgno, "btree or recno page"); + else + EPRINT((dbp->dbenv, + "Page %lu: btree or recno page is of inappropriate type %lu", + (u_long)pgno, (u_long)pip->type)); ret = DB_VERIFY_BAD; goto err; } @@ -1500,8 +1595,9 @@ __bam_vrfy_subtree(dbp, relen != child_relen) { isbad = 1; EPRINT((dbp->dbenv, - "Recno page %lu returned bad re_len", - (u_long)child->pgno)); + "Page %lu: recno page returned bad re_len %lu", + (u_long)child->pgno, + (u_long)child_relen)); } if (relenp) *relenp = relen; @@ -1510,10 +1606,8 @@ __bam_vrfy_subtree(dbp, nrecs += child_nrecs; if (level != child_level + 1) { isbad = 1; - EPRINT((dbp->dbenv, "%s%lu%s%lu%s%lu", - "Recno level incorrect on page ", - (u_long)child->pgno, ": got ", - (u_long)child_level, ", expected ", + EPRINT((dbp->dbenv, "Page %lu: recno level incorrect: got %lu, expected %lu", + (u_long)child->pgno, (u_long)child_level, (u_long)(level - 1))); } } else if (child->type == V_OVERFLOW && @@ -1543,12 +1637,12 @@ __bam_vrfy_subtree(dbp, * itself, which must sort lower than all entries on its child; * ri will be the key to its right, which must sort greater. */ - if (h == NULL && (ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) + if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0) goto err; for (i = 0; i < pip->entries; i += O_INDX) { - li = GET_BINTERNAL(h, i); + li = GET_BINTERNAL(dbp, h, i); ri = (i + O_INDX < pip->entries) ? - GET_BINTERNAL(h, i + O_INDX) : NULL; + GET_BINTERNAL(dbp, h, i + O_INDX) : NULL; /* * The leftmost key is forcibly sorted less than all entries, @@ -1578,18 +1672,18 @@ __bam_vrfy_subtree(dbp, if (li->nrecs != child_nrecs) { isbad = 1; EPRINT((dbp->dbenv, - "Item %lu page %lu has incorrect record count of %lu, should be %lu", - (u_long)i, (u_long)pgno, (u_long)li->nrecs, + "Page %lu: item %lu has incorrect record count of %lu, should be %lu", + (u_long)pgno, (u_long)i, (u_long)li->nrecs, (u_long)child_nrecs)); } } if (level != child_level + 1) { isbad = 1; - EPRINT((dbp->dbenv, "%s%lu%s%lu%s%lu", - "Btree level incorrect on page ", (u_long)li->pgno, - ": got ", (u_long)child_level, ", expected ", - (u_long)(level - 1))); + EPRINT((dbp->dbenv, + "Page %lu: Btree level incorrect: got %lu, expected %lu", + (u_long)li->pgno, + (u_long)child_level, (u_long)(level - 1))); } } @@ -1616,7 +1710,7 @@ done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) { * isbad == 0, though, it's now safe to do so, as we've * traversed any child overflow pages. Do it. */ - if (h == NULL && (ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) + if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0) goto err; if ((ret = __bam_vrfy_itemorder(dbp, vdp, h, pgno, 0, 1, 0, flags)) != 0) @@ -1625,12 +1719,35 @@ done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) { } /* + * It's possible to get to this point with a page that has no + * items, but without having detected any sort of failure yet. + * Having zero items is legal if it's a leaf--it may be the + * root page in an empty tree, or the tree may have been + * modified with the DB_REVSPLITOFF flag set (there's no way + * to tell from what's on disk). For an internal page, + * though, having no items is a problem (all internal pages + * must have children). + */ + if (isbad == 0 && ret == 0) { + if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0) + goto err; + + if (NUM_ENT(h) == 0 && ISINTERNAL(h)) { + EPRINT((dbp->dbenv, + "Page %lu: internal page is empty and should not be", + (u_long)pgno)); + isbad = 1; + goto err; + } + } + + /* * Our parent has sent us BINTERNAL pointers to parent records * so that we can verify our place with respect to them. If it's * appropriate--we have a default sort function--verify this. */ if (isbad == 0 && ret == 0 && !LF_ISSET(DB_NOORDERCHK) && lp != NULL) { - if (h == NULL && (ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) + if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0) goto err; /* @@ -1662,7 +1779,7 @@ done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) { if (LF_ISSET(ST_RECNUM) && nrecs != pip->rec_cnt && toplevel) { isbad = 1; EPRINT((dbp->dbenv, - "Bad record count on page %lu: got %lu, expected %lu", + "Page %lu: bad record count: has %lu records, claims %lu", (u_long)pgno, (u_long)nrecs, (u_long)pip->rec_cnt)); } @@ -1676,13 +1793,32 @@ done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) { goto err; if (p != 0) { isbad = 1; - EPRINT((dbp->dbenv, "Page %lu linked twice", (u_long)pgno)); + EPRINT((dbp->dbenv, "Page %lu: linked twice", (u_long)pgno)); } else if ((ret = __db_vrfy_pgset_inc(pgset, pgno)) != 0) goto err; -err: if (h != NULL && (t_ret = memp_fput(dbp->mpf, h, 0)) != 0 && ret == 0) + if (toplevel) + /* + * The last page's next_pgno in the leaf chain should have been + * PGNO_INVALID. + */ + if (vdp->next_pgno != PGNO_INVALID) { + EPRINT((dbp->dbenv, "Page %lu: unterminated leaf chain", + (u_long)vdp->prev_pgno)); + isbad = 1; + } + +err: if (toplevel) { + /* Restore our caller's settings. */ + vdp->next_pgno = next_pgno; + vdp->prev_pgno = prev_pgno; + vdp->leaf_type = leaf_type; + } + + if (h != NULL && (t_ret = mpf->put(mpf, h, 0)) != 0 && ret == 0) ret = t_ret; - if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) + if ((t_ret = + __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) ret = t_ret; if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0) ret = t_ret; @@ -1720,6 +1856,14 @@ __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags) F_SET(&dbt, DB_DBT_MALLOC); ret = 0; + /* + * Empty pages are sorted correctly by definition. We check + * to see whether they ought to be empty elsewhere; leaf + * pages legally may be. + */ + if (NUM_ENT(h) == 0) + return (0); + switch (TYPE(h)) { case P_IBTREE: case P_LDUP: @@ -1760,7 +1904,8 @@ __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags) } else { DB_ASSERT(0); EPRINT((dbp->dbenv, - "Unknown type for internal record")); + "Page %lu: unknown type for internal record", + (u_long)PGNO(h))); return (EINVAL); } @@ -1768,17 +1913,17 @@ __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags) if ((ret = __bam_cmp(dbp, &dbt, h, 0, func, &cmp)) == 0) { if (cmp > 0) { EPRINT((dbp->dbenv, - "First item on page %lu sorted greater than parent entry", + "Page %lu: first item on page sorted greater than parent entry", (u_long)PGNO(h))); ret = DB_VERIFY_BAD; } } else EPRINT((dbp->dbenv, - "First item on page %lu had comparison error", + "Page %lu: first item on page had comparison error", (u_long)PGNO(h))); if (dbt.data != lp->data) - __os_free(dbt.data, 0); + __os_ufree(dbp->dbenv, dbt.data); if (ret != 0) return (ret); } @@ -1795,7 +1940,8 @@ __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags) } else { DB_ASSERT(0); EPRINT((dbp->dbenv, - "Unknown type for internal record")); + "Page %lu: unknown type for internal record", + (u_long)PGNO(h))); return (EINVAL); } @@ -1803,17 +1949,17 @@ __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags) if ((ret = __bam_cmp(dbp, &dbt, h, last, func, &cmp)) == 0) { if (cmp < 0) { EPRINT((dbp->dbenv, - "Last item on page %lu sorted greater than parent entry", + "Page %lu: last item on page sorted greater than parent entry", (u_long)PGNO(h))); ret = DB_VERIFY_BAD; } } else EPRINT((dbp->dbenv, - "Last item on page %lu had comparison error", + "Page %lu: last item on page had comparison error", (u_long)PGNO(h))); if (dbt.data != rp->data) - __os_free(dbt.data, 0); + __os_ufree(dbp->dbenv, dbt.data); } return (ret); @@ -1843,7 +1989,7 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags) DBT dbt, unkdbt; BKEYDATA *bk; BOVERFLOW *bo; - db_indx_t i, beg, end; + db_indx_t i, beg, end, *inp; u_int32_t himark; u_int8_t *pgmap; void *ovflbuf; @@ -1854,24 +2000,25 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags) ovflbuf = pgmap = NULL; err_ret = ret = 0; + inp = P_INP(dbp, h); memset(&dbt, 0, sizeof(DBT)); dbt.flags = DB_DBT_REALLOC; memset(&unkdbt, 0, sizeof(DBT)); - unkdbt.size = strlen("UNKNOWN") + 1; + unkdbt.size = (u_int32_t)(strlen("UNKNOWN") + 1); unkdbt.data = "UNKNOWN"; /* * Allocate a buffer for overflow items. Start at one page; * __db_safe_goff will realloc as needed. */ - if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &ovflbuf)) != 0) + if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &ovflbuf)) != 0) return (ret); if (LF_ISSET(DB_AGGRESSIVE)) { if ((ret = - __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &pgmap)) != 0) + __os_malloc(dbp->dbenv, dbp->pgsize, &pgmap)) != 0) goto err; memset(pgmap, 0, dbp->pgsize); } @@ -1914,7 +2061,7 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags) * We only want to print deleted items if * DB_AGGRESSIVE is set. */ - bk = GET_BKEYDATA(h, i); + bk = GET_BKEYDATA(dbp, h, i); if (!LF_ISSET(DB_AGGRESSIVE) && B_DISSET(bk->type)) continue; @@ -1927,10 +2074,10 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags) if (key != NULL && (i != 0 || !LF_ISSET(SA_SKIPFIRSTKEY))) if ((ret = __db_prdbt(key, - 0, " ", handle, callback, 0, NULL)) != 0) + 0, " ", handle, callback, 0, vdp)) != 0) err_ret = ret; - beg = h->inp[i]; + beg = inp[i]; switch (B_TYPE(bk->type)) { case B_DUPLICATE: end = beg + BOVERFLOW_SIZE - 1; @@ -1958,23 +2105,24 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags) (i % P_INDX == 0)) { /* Not much to do on failure. */ if ((ret = __db_prdbt(&unkdbt, 0, " ", - handle, callback, 0, NULL)) != 0) + handle, callback, 0, vdp)) != 0) err_ret = ret; break; } if ((ret = __db_salvage_duptree(dbp, vdp, bo->pgno, &dbt, handle, callback, - flags | SA_SKIPFIRSTKEY)) != 0) + flags | SA_SKIPFIRSTKEY)) != 0) err_ret = ret; break; case B_KEYDATA: - end = ALIGN(beg + bk->len, sizeof(u_int32_t)) - 1; + end = + ALIGN(beg + bk->len, sizeof(u_int32_t)) - 1; dbt.data = bk->data; dbt.size = bk->len; if ((ret = __db_prdbt(&dbt, - 0, " ", handle, callback, 0, NULL)) != 0) + 0, " ", handle, callback, 0, vdp)) != 0) err_ret = ret; break; case B_OVERFLOW: @@ -1985,11 +2133,11 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags) err_ret = ret; /* We care about err_ret more. */ (void)__db_prdbt(&unkdbt, 0, " ", - handle, callback, 0, NULL); + handle, callback, 0, vdp); break; } if ((ret = __db_prdbt(&dbt, - 0, " ", handle, callback, 0, NULL)) != 0) + 0, " ", handle, callback, 0, vdp)) != 0) err_ret = ret; break; default: @@ -2020,12 +2168,12 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags) * a datum; fix this imbalance by printing an "UNKNOWN". */ if (pgtype == P_LBTREE && (i % P_INDX == 1) && ((ret = - __db_prdbt(&unkdbt, 0, " ", handle, callback, 0, NULL)) != 0)) + __db_prdbt(&unkdbt, 0, " ", handle, callback, 0, vdp)) != 0)) err_ret = ret; err: if (pgmap != NULL) - __os_free(pgmap, 0); - __os_free(ovflbuf, 0); + __os_free(dbp->dbenv, pgmap); + __os_free(dbp->dbenv, ovflbuf); /* Mark this page as done. */ if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0) @@ -2061,12 +2209,13 @@ __bam_salvage_walkdupint(dbp, vdp, h, key, handle, callback, flags) for (i = 0; i < NUM_ENT(h); i++) { switch (TYPE(h)) { case P_IBTREE: - bi = GET_BINTERNAL(h, i); + bi = GET_BINTERNAL(dbp, h, i); if ((t_ret = __db_salvage_duptree(dbp, vdp, bi->pgno, key, handle, callback, flags)) != 0) ret = t_ret; + break; case P_IRECNO: - ri = GET_RINTERNAL(h, i); + ri = GET_RINTERNAL(dbp, h, i); if ((t_ret = __db_salvage_duptree(dbp, vdp, ri->pgno, key, handle, callback, flags)) != 0) ret = t_ret; @@ -2110,11 +2259,13 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset) DB *pgset; { BINTERNAL *bi; + DB_MPOOLFILE *mpf; PAGE *h; RINTERNAL *ri; db_pgno_t current, p; int err_ret, ret; + mpf = dbp->mpf; h = NULL; ret = err_ret = 0; DB_ASSERT(pgset != NULL); @@ -2123,7 +2274,7 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset) err_ret = DB_VERIFY_BAD; goto err; } - if ((ret = memp_fget(dbp->mpf, ¤t, 0, &h)) != 0) { + if ((ret = mpf->get(mpf, ¤t, 0, &h)) != 0) { err_ret = ret; goto err; } @@ -2137,10 +2288,10 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset) goto err; } if (TYPE(h) == P_IBTREE) { - bi = GET_BINTERNAL(h, 0); + bi = GET_BINTERNAL(dbp, h, 0); current = bi->pgno; } else { /* P_IRECNO */ - ri = GET_RINTERNAL(h, 0); + ri = GET_RINTERNAL(dbp, h, 0); current = ri->pgno; } break; @@ -2152,7 +2303,7 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset) goto err; } - if ((ret = memp_fput(dbp->mpf, h, 0)) != 0) + if ((ret = mpf->put(mpf, h, 0)) != 0) err_ret = ret; h = NULL; } @@ -2163,8 +2314,7 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset) */ traverse: while (IS_VALID_PGNO(current) && current != PGNO_INVALID) { - if (h == NULL && - (ret = memp_fget(dbp->mpf, ¤t, 0, &h) != 0)) { + if (h == NULL && (ret = mpf->get(mpf, ¤t, 0, &h)) != 0) { err_ret = ret; break; } @@ -2184,13 +2334,13 @@ traverse: goto err; current = NEXT_PGNO(h); - if ((ret = memp_fput(dbp->mpf, h, 0)) != 0) + if ((ret = mpf->put(mpf, h, 0)) != 0) err_ret = ret; h = NULL; } err: if (h != NULL) - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); return (ret == 0 ? err_ret : ret); } @@ -2218,7 +2368,7 @@ __bam_safe_getdata(dbp, h, i, ovflok, dbt, freedbtp) memset(dbt, 0, sizeof(DBT)); *freedbtp = 0; - bk = GET_BKEYDATA(h, i); + bk = GET_BKEYDATA(dbp, h, i); if (B_TYPE(bk->type) == B_OVERFLOW) { if (!ovflok) return (0); diff --git a/bdb/btree/btree.src b/bdb/btree/btree.src index a1eba7d7fc7..73f4abac874 100644 --- a/bdb/btree/btree.src +++ b/bdb/btree/btree.src @@ -1,13 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. * - * $Id: btree.src,v 10.26 2000/12/12 17:40:23 bostic Exp $ + * $Id: btree.src,v 10.35 2002/04/17 19:02:56 krinsky Exp $ */ -PREFIX bam +PREFIX __bam +DBPRIVATE INCLUDE #include "db_config.h" INCLUDE @@ -15,69 +16,23 @@ INCLUDE #ifndef NO_SYSTEM_INCLUDES INCLUDE #include <sys/types.h> INCLUDE INCLUDE #include <ctype.h> -INCLUDE #include <errno.h> INCLUDE #include <string.h> INCLUDE #endif INCLUDE INCLUDE #include "db_int.h" -INCLUDE #include "db_page.h" -INCLUDE #include "db_dispatch.h" -INCLUDE #include "db_am.h" -INCLUDE #include "btree.h" -INCLUDE #include "txn.h" +INCLUDE #include "dbinc/crypto.h" +INCLUDE #include "dbinc/db_page.h" +INCLUDE #include "dbinc/db_dispatch.h" +INCLUDE #include "dbinc/db_am.h" +INCLUDE #include "dbinc/btree.h" +INCLUDE #include "dbinc/log.h" +INCLUDE #include "dbinc/rep.h" +INCLUDE #include "dbinc/txn.h" INCLUDE /* - * BTREE-pg_alloc: used to record allocating a new page. - * - * meta_lsn: the meta-data page's original lsn. - * page_lsn: the allocated page's original lsn. - * pgno: the page allocated. - * next: the next page on the free list. + * NOTE: pg_alloc and pg_free have been moved to db.src, where they belong. */ -BEGIN pg_alloc 51 -ARG fileid int32_t ld -POINTER meta_lsn DB_LSN * lu -POINTER page_lsn DB_LSN * lu -ARG pgno db_pgno_t lu -ARG ptype u_int32_t lu -ARG next db_pgno_t lu -END - -DEPRECATED pg_alloc1 60 -ARG fileid int32_t ld -POINTER meta_lsn DB_LSN * lu -POINTER alloc_lsn DB_LSN * lu -POINTER page_lsn DB_LSN * lu -ARG pgno db_pgno_t lu -ARG ptype u_int32_t lu -ARG next db_pgno_t lu -END - -/* - * BTREE-pg_free: used to record freeing a page. - * - * pgno: the page being freed. - * meta_lsn: the meta-data page's original lsn. - * header: the header from the free'd page. - * next: the previous next pointer on the metadata page. - */ -BEGIN pg_free 52 -ARG fileid int32_t ld -ARG pgno db_pgno_t lu -POINTER meta_lsn DB_LSN * lu -DBT header DBT s -ARG next db_pgno_t lu -END - -DEPRECATED pg_free1 61 -ARG fileid int32_t ld -ARG pgno db_pgno_t lu -POINTER meta_lsn DB_LSN * lu -POINTER alloc_lsn DB_LSN * lu -DBT header DBT s -ARG next db_pgno_t lu -END /* * BTREE-split: used to log a page split. @@ -89,46 +44,21 @@ END * indx: the number of entries that went to the left page. * npgno: the next page number * nlsn: the next page's original LSN (or 0 if no next page). - * pg: the split page's contents before the split. - */ -DEPRECATED split1 53 -ARG fileid int32_t ld -ARG left db_pgno_t lu -POINTER llsn DB_LSN * lu -ARG right db_pgno_t lu -POINTER rlsn DB_LSN * lu -ARG indx u_int32_t lu -ARG npgno db_pgno_t lu -POINTER nlsn DB_LSN * lu -DBT pg DBT s -END - -/* - * BTREE-split: used to log a page split. - * - * left: the page number for the low-order contents. - * llsn: the left page's original LSN. - * right: the page number for the high-order contents. - * rlsn: the right page's original LSN. - * indx: the number of entries that went to the left page. - * npgno: the next page number - * npgno: the next page number - * nlsn: the next page's original LSN (or 0 if no next page). * root_pgno: the root page number * pg: the split page's contents before the split. * opflags: SPL_NRECS: if splitting a tree that maintains a record count. */ BEGIN split 62 -ARG fileid int32_t ld -ARG left db_pgno_t lu +DB fileid int32_t ld +WRLOCK left db_pgno_t lu POINTER llsn DB_LSN * lu -ARG right db_pgno_t lu +WRLOCK right db_pgno_t lu POINTER rlsn DB_LSN * lu ARG indx u_int32_t lu ARG npgno db_pgno_t lu POINTER nlsn DB_LSN * lu -ARG root_pgno db_pgno_t lu -DBT pg DBT s +WRLOCKNZ root_pgno db_pgno_t lu +PGDBT pg DBT s ARG opflags u_int32_t lu END @@ -137,34 +67,16 @@ END * * pgno: the page number of the page copied over the root. * pgdbt: the page being copied on the root page. - * nrec: the tree's record count. - * rootent: last entry on the root page. - * rootlsn: the root page's original lsn. - */ -DEPRECATED rsplit1 54 -ARG fileid int32_t ld -ARG pgno db_pgno_t lu -DBT pgdbt DBT s -ARG nrec db_pgno_t lu -DBT rootent DBT s -POINTER rootlsn DB_LSN * lu -END - -/* - * BTREE-rsplit: used to log a reverse-split - * - * pgno: the page number of the page copied over the root. - * pgdbt: the page being copied on the root page. * root_pgno: the root page number. * nrec: the tree's record count. * rootent: last entry on the root page. * rootlsn: the root page's original lsn. */ BEGIN rsplit 63 -ARG fileid int32_t ld -ARG pgno db_pgno_t lu -DBT pgdbt DBT s -ARG root_pgno db_pgno_t lu +DB fileid int32_t ld +WRLOCK pgno db_pgno_t lu +PGDBT pgdbt DBT s +WRLOCK root_pgno db_pgno_t lu ARG nrec db_pgno_t lu DBT rootent DBT s POINTER rootlsn DB_LSN * lu @@ -180,8 +92,8 @@ END * is_insert: 0 if a delete, 1 if an insert. */ BEGIN adj 55 -ARG fileid int32_t ld -ARG pgno db_pgno_t lu +DB fileid int32_t ld +WRLOCK pgno db_pgno_t lu POINTER lsn DB_LSN * lu ARG indx u_int32_t lu ARG indx_copy u_int32_t lu @@ -198,8 +110,8 @@ END * opflags: CAD_UPDATEROOT: if root page count was adjusted. */ BEGIN cadjust 56 -ARG fileid int32_t ld -ARG pgno db_pgno_t lu +DB fileid int32_t ld +WRLOCK pgno db_pgno_t lu POINTER lsn DB_LSN * lu ARG indx u_int32_t lu ARG adjust int32_t ld @@ -214,8 +126,8 @@ END * indx: the index to be deleted. */ BEGIN cdel 57 -ARG fileid int32_t ld -ARG pgno db_pgno_t lu +DB fileid int32_t ld +WRLOCK pgno db_pgno_t lu POINTER lsn DB_LSN * lu ARG indx u_int32_t lu END @@ -230,8 +142,8 @@ END * duplicate: the prefix of the replacement that matches the original. */ BEGIN repl 58 -ARG fileid int32_t ld -ARG pgno db_pgno_t lu +DB fileid int32_t ld +WRLOCK pgno db_pgno_t lu POINTER lsn DB_LSN * lu ARG indx u_int32_t lu ARG isdeleted u_int32_t lu @@ -245,9 +157,9 @@ END * BTREE-root: log the assignment of a root btree page. */ BEGIN root 59 -ARG fileid int32_t ld -ARG meta_pgno db_pgno_t lu -ARG root_pgno db_pgno_t lu +DB fileid int32_t ld +WRLOCK meta_pgno db_pgno_t lu +WRLOCK root_pgno db_pgno_t lu POINTER meta_lsn DB_LSN * lu END @@ -260,7 +172,7 @@ END */ BEGIN curadj 64 /* Fileid of db affected. */ -ARG fileid int32_t ld +DB fileid int32_t ld /* Which adjustment. */ ARG mode db_ca_mode ld /* Page entry is from. */ @@ -284,7 +196,7 @@ END */ BEGIN rcuradj 65 /* Fileid of db affected. */ -ARG fileid int32_t ld +DB fileid int32_t ld /* Which adjustment. */ ARG mode ca_recno_arg ld /* Root page number. */ |