diff options
author | unknown <tim@threads.polyesthetic.msg> | 2001-03-04 19:42:05 -0500 |
---|---|---|
committer | unknown <tim@threads.polyesthetic.msg> | 2001-03-04 19:42:05 -0500 |
commit | ec6ae091617bdfdca9e65e8d3e65b950d234f676 (patch) | |
tree | 9dd732e08dba156ee3d7635caedc0dc3107ecac6 /bdb/include | |
parent | 87d70fb598105b64b538ff6b81eef9da626255b1 (diff) | |
download | mariadb-git-ec6ae091617bdfdca9e65e8d3e65b950d234f676.tar.gz |
Import changeset
Diffstat (limited to 'bdb/include')
57 files changed, 10833 insertions, 0 deletions
diff --git a/bdb/include/btree.h b/bdb/include/btree.h new file mode 100644 index 00000000000..395f645f03f --- /dev/null +++ b/bdb/include/btree.h @@ -0,0 +1,317 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: btree.h,v 11.37 2001/01/17 17:09:52 bostic Exp $ + */ + +/* Forward structure declarations. */ +struct __btree; typedef struct __btree BTREE; +struct __cursor; typedef struct __cursor BTREE_CURSOR; +struct __epg; typedef struct __epg EPG; +struct __recno; typedef struct __recno RECNO; + +#define DEFMINKEYPAGE (2) + +/* + * A recno order of 0 indicates that we don't have an order, not that we've + * an order less than 1. + */ +#define INVALID_ORDER 0 + +#define ISINTERNAL(p) (TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO) +#define ISLEAF(p) (TYPE(p) == P_LBTREE || \ + TYPE(p) == P_LRECNO || TYPE(p) == P_LDUP) + +/* Flags for __bam_cadjust_log(). */ +#define CAD_UPDATEROOT 0x01 /* Root page count was updated. */ + +/* Flags for __bam_split_log(). */ +#define SPL_NRECS 0x01 /* Split tree has record count. */ + +/* Flags for __bam_iitem(). */ +#define BI_DELETED 0x01 /* Key/data pair only placeholder. */ + +/* Flags for __bam_stkrel(). */ +#define STK_CLRDBC 0x01 /* Clear dbc->page reference. */ +#define STK_NOLOCK 0x02 /* Don't retain locks. */ + +/* Flags for __ram_ca(). These get logged, so make the values explicit. */ +typedef enum { + CA_DELETE = 0, /* Delete the current record. */ + CA_IAFTER = 1, /* Insert before the current record. */ + CA_IBEFORE = 2, /* Insert after the current record. */ + CA_ICURRENT = 3 /* Overwrite the current record. */ +} ca_recno_arg; + +/* + * Flags for __bam_search() and __bam_rsearch(). + * + * Note, internal page searches must find the largest record less than key in + * the tree so that descents work. Leaf page searches must find the smallest + * record greater than key so that the returned index is the record's correct + * position for insertion. + * + * The flags parameter to the search routines describes three aspects of the + * search: the type of locking required (including if we're locking a pair of + * pages), the item to return in the presence of duplicates and whether or not + * to return deleted entries. To simplify both the mnemonic representation + * and the code that checks for various cases, we construct a set of bitmasks. + */ +#define S_READ 0x00001 /* Read locks. */ +#define S_WRITE 0x00002 /* Write locks. */ + +#define S_APPEND 0x00040 /* Append to the tree. */ +#define S_DELNO 0x00080 /* Don't return deleted items. */ +#define S_DUPFIRST 0x00100 /* Return first duplicate. */ +#define S_DUPLAST 0x00200 /* Return last duplicate. */ +#define S_EXACT 0x00400 /* Exact items only. */ +#define S_PARENT 0x00800 /* Lock page pair. */ +#define S_STACK 0x01000 /* Need a complete stack. */ +#define S_PAST_EOF 0x02000 /* If doing insert search (or keyfirst + * or keylast operations), or a split + * on behalf of an insert, it's okay to + * return an entry one past end-of-page. + */ +#define S_STK_ONLY 0x04000 /* Just return info in the stack */ + +#define S_DELETE (S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT | S_STACK) +#define S_FIND (S_READ | S_DUPFIRST | S_DELNO) +#define S_FIND_WR (S_WRITE | S_DUPFIRST | S_DELNO) +#define S_INSERT (S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK) +#define S_KEYFIRST (S_WRITE | S_DUPFIRST | S_PAST_EOF | S_STACK) +#define S_KEYLAST (S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK) +#define S_WRPAIR (S_WRITE | S_DUPLAST | S_PAST_EOF | S_PARENT) + +/* + * Various routines pass around page references. A page reference is + * a pointer to the page, and the indx indicates an item on the page. + * Each page reference may include a lock. + */ +struct __epg { + PAGE *page; /* The page. */ + db_indx_t indx; /* The index on the page. */ + db_indx_t entries; /* The number of entries on page */ + DB_LOCK lock; /* The page's lock. */ + db_lockmode_t lock_mode; /* The lock mode. */ +}; + +/* + * We maintain a stack of the pages that we're locking in the tree. Grow + * the stack as necessary. + * + * XXX + * Temporary fix for #3243 -- clear the page and lock from the stack entry. + * The correct fix is to never release a stack that doesn't hold items. + */ +#define BT_STK_CLR(c) do { \ + (c)->csp = (c)->sp; \ + (c)->csp->page = NULL; \ + (c)->csp->lock.off = LOCK_INVALID; \ +} while (0) + +#define BT_STK_ENTER(dbenv, c, pagep, page_indx, l, mode, ret) do { \ + if ((ret = \ + (c)->csp == (c)->esp ? __bam_stkgrow(dbenv, c) : 0) == 0) { \ + (c)->csp->page = pagep; \ + (c)->csp->indx = page_indx; \ + (c)->csp->entries = NUM_ENT(pagep); \ + (c)->csp->lock = l; \ + (c)->csp->lock_mode = mode; \ + } \ +} while (0) + +#define BT_STK_PUSH(dbenv, c, pagep, page_indx, lock, mode, ret) do { \ + BT_STK_ENTER(dbenv, c, pagep, page_indx, lock, mode, ret); \ + ++(c)->csp; \ +} while (0) + +#define BT_STK_NUM(dbenv, c, pagep, page_indx, ret) do { \ + if ((ret = \ + (c)->csp == (c)->esp ? __bam_stkgrow(dbenv, c) : 0) == 0) { \ + (c)->csp->page = NULL; \ + (c)->csp->indx = page_indx; \ + (c)->csp->entries = NUM_ENT(pagep); \ + (c)->csp->lock.off = LOCK_INVALID; \ + (c)->csp->lock_mode = DB_LOCK_NG; \ + } \ +} while (0) + +#define BT_STK_NUMPUSH(dbenv, c, pagep, page_indx,ret) do { \ + BT_STK_NUM(dbenv, cp, pagep, page_indx, ret); \ + ++(c)->csp; \ +} while (0) + +#define BT_STK_POP(c) \ + ((c)->csp == (c)->sp ? NULL : --(c)->csp) + +/* Btree/Recno cursor. */ +struct __cursor { + /* struct __dbc_internal */ + __DBC_INTERNAL + + /* btree private part */ + EPG *sp; /* Stack pointer. */ + EPG *csp; /* Current stack entry. */ + EPG *esp; /* End stack pointer. */ + EPG stack[5]; + + db_indx_t ovflsize; /* Maximum key/data on-page size. */ + + db_recno_t recno; /* Current record number. */ + u_int32_t order; /* Relative order among deleted curs. */ + + /* + * Btree: + * We set a flag in the cursor structure if the underlying object has + * been deleted. It's not strictly necessary, we could get the same + * information by looking at the page itself, but this method doesn't + * require us to retrieve the page on cursor delete. + * + * Recno: + * When renumbering recno databases during deletes, cursors referencing + * "deleted" records end up positioned between two records, and so must + * be specially adjusted on the next operation. + */ +#define C_DELETED 0x0001 /* Record was deleted. */ + /* + * There are three tree types that require maintaining record numbers. + * Recno AM trees, Btree AM trees for which the DB_RECNUM flag was set, + * and Btree off-page duplicate trees. + */ +#define C_RECNUM 0x0002 /* Tree requires record counts. */ + /* + * Recno trees have immutable record numbers by default, but optionally + * support mutable record numbers. Off-page duplicate Recno trees have + * mutable record numbers. All Btrees with record numbers (including + * off-page duplicate trees) are mutable by design, no flag is needed. + */ +#define C_RENUMBER 0x0004 /* Tree records are mutable. */ + u_int32_t flags; +}; + +/* + * Threshhold value, as a function of bt_minkey, of the number of + * bytes a key/data pair can use before being placed on an overflow + * page. Assume every item requires the maximum alignment for + * padding, out of sheer paranoia. + */ +#define B_MINKEY_TO_OVFLSIZE(minkey, pgsize) \ + ((u_int16_t)(((pgsize) - P_OVERHEAD) / ((minkey) * P_INDX) - \ + (BKEYDATA_PSIZE(0) + ALIGN(1, sizeof(int32_t))))) + +/* + * The maximum space that a single item can ever take up on one page. + * Used by __bam_split to determine whether a split is still necessary. + */ +#define B_MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define B_MAXSIZEONPAGE(ovflsize) \ + (B_MAX(BOVERFLOW_PSIZE, BKEYDATA_PSIZE(ovflsize))) + +/* + * The in-memory, per-tree btree/recno data structure. + */ +struct __btree { /* Btree access method. */ + /* + * !!! + * These fields are write-once (when the structure is created) and + * so are ignored as far as multi-threading is concerned. + */ + db_pgno_t bt_meta; /* Database meta-data page. */ + db_pgno_t bt_root; /* Database root page. */ + + u_int32_t bt_maxkey; /* Maximum keys per page. */ + u_int32_t bt_minkey; /* Minimum keys per page. */ + + /* Btree comparison function. */ + int (*bt_compare) __P((DB *, const DBT *, const DBT *)); + /* Btree prefix function. */ + size_t (*bt_prefix) __P((DB *, const DBT *, const DBT *)); + + /* Recno access method. */ + int re_pad; /* Fixed-length padding byte. */ + int re_delim; /* Variable-length delimiting byte. */ + u_int32_t re_len; /* Length for fixed-length records. */ + char *re_source; /* Source file name. */ + + /* + * !!! + * The bt_lpgno field is NOT protected by any mutex, and for this + * reason must be advisory only, so, while it is read/written by + * multiple threads, DB is completely indifferent to the quality + * of its information. + */ + db_pgno_t bt_lpgno; /* Last insert location. */ + + /* + * !!! + * The re_modified field is NOT protected by any mutex, and for this + * reason cannot be anything more complicated than a zero/non-zero + * value. The actual writing of the backing source file cannot be + * threaded, so clearing the flag isn't a problem. + */ + int re_modified; /* If the tree was modified. */ + + /* + * !!! + * These fields are ignored as far as multi-threading is concerned. + * There are no transaction semantics associated with backing files, + * nor is there any thread protection. + */ + FILE *re_fp; /* Source file handle. */ + int re_eof; /* Backing source file EOF reached. */ + db_recno_t re_last; /* Last record number read. */ +}; + +/* + * Modes for the __bam_curadj recovery records (btree_curadj). + * These appear in log records, so we wire the values and + * do not leave it up to the compiler. + */ +typedef enum { + DB_CA_DI = 1, + DB_CA_DUP = 2, + DB_CA_RSPLIT = 3, + DB_CA_SPLIT = 4 +} db_ca_mode; + +#include "btree_auto.h" +#include "btree_ext.h" +#include "db_am.h" diff --git a/bdb/include/btree_auto.h b/bdb/include/btree_auto.h new file mode 100644 index 00000000000..214f84332cf --- /dev/null +++ b/bdb/include/btree_auto.h @@ -0,0 +1,267 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef bam_AUTO_H +#define bam_AUTO_H + +#define DB_bam_pg_alloc 51 +typedef struct _bam_pg_alloc_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN meta_lsn; + DB_LSN page_lsn; + db_pgno_t pgno; + u_int32_t ptype; + db_pgno_t next; +} __bam_pg_alloc_args; + +int __bam_pg_alloc_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, DB_LSN *, DB_LSN *, db_pgno_t, u_int32_t, db_pgno_t)); +int __bam_pg_alloc_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_pg_alloc_read __P((DB_ENV *, void *, __bam_pg_alloc_args **)); + +#define DB_bam_pg_alloc1 60 +typedef struct _bam_pg_alloc1_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN meta_lsn; + DB_LSN alloc_lsn; + DB_LSN page_lsn; + db_pgno_t pgno; + u_int32_t ptype; + db_pgno_t next; +} __bam_pg_alloc1_args; + +int __bam_pg_alloc1_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_pg_alloc1_read __P((DB_ENV *, void *, __bam_pg_alloc1_args **)); + +#define DB_bam_pg_free 52 +typedef struct _bam_pg_free_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN meta_lsn; + DBT header; + db_pgno_t next; +} __bam_pg_free_args; + +int __bam_pg_free_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_pgno_t, DB_LSN *, const DBT *, db_pgno_t)); +int __bam_pg_free_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_pg_free_read __P((DB_ENV *, void *, __bam_pg_free_args **)); + +#define DB_bam_pg_free1 61 +typedef struct _bam_pg_free1_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN meta_lsn; + DB_LSN alloc_lsn; + DBT header; + db_pgno_t next; +} __bam_pg_free1_args; + +int __bam_pg_free1_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_pg_free1_read __P((DB_ENV *, void *, __bam_pg_free1_args **)); + +#define DB_bam_split1 53 +typedef struct _bam_split1_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t left; + DB_LSN llsn; + db_pgno_t right; + DB_LSN rlsn; + u_int32_t indx; + db_pgno_t npgno; + DB_LSN nlsn; + DBT pg; +} __bam_split1_args; + +int __bam_split1_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_split1_read __P((DB_ENV *, void *, __bam_split1_args **)); + +#define DB_bam_split 62 +typedef struct _bam_split_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t left; + DB_LSN llsn; + db_pgno_t right; + DB_LSN rlsn; + u_int32_t indx; + db_pgno_t npgno; + DB_LSN nlsn; + db_pgno_t root_pgno; + DBT pg; + u_int32_t opflags; +} __bam_split_args; + +int __bam_split_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *, u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, const DBT *, u_int32_t)); +int __bam_split_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_split_read __P((DB_ENV *, void *, __bam_split_args **)); + +#define DB_bam_rsplit1 54 +typedef struct _bam_rsplit1_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DBT pgdbt; + db_pgno_t nrec; + DBT rootent; + DB_LSN rootlsn; +} __bam_rsplit1_args; + +int __bam_rsplit1_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_rsplit1_read __P((DB_ENV *, void *, __bam_rsplit1_args **)); + +#define DB_bam_rsplit 63 +typedef struct _bam_rsplit_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DBT pgdbt; + db_pgno_t root_pgno; + db_pgno_t nrec; + DBT rootent; + DB_LSN rootlsn; +} __bam_rsplit_args; + +int __bam_rsplit_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_pgno_t, const DBT *, db_pgno_t, db_pgno_t, const DBT *, DB_LSN *)); +int __bam_rsplit_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_rsplit_read __P((DB_ENV *, void *, __bam_rsplit_args **)); + +#define DB_bam_adj 55 +typedef struct _bam_adj_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + u_int32_t indx; + u_int32_t indx_copy; + u_int32_t is_insert; +} __bam_adj_args; + +int __bam_adj_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_pgno_t, DB_LSN *, u_int32_t, u_int32_t, u_int32_t)); +int __bam_adj_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_adj_read __P((DB_ENV *, void *, __bam_adj_args **)); + +#define DB_bam_cadjust 56 +typedef struct _bam_cadjust_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + u_int32_t indx; + int32_t adjust; + u_int32_t opflags; +} __bam_cadjust_args; + +int __bam_cadjust_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_pgno_t, DB_LSN *, u_int32_t, int32_t, u_int32_t)); +int __bam_cadjust_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_cadjust_read __P((DB_ENV *, void *, __bam_cadjust_args **)); + +#define DB_bam_cdel 57 +typedef struct _bam_cdel_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + u_int32_t indx; +} __bam_cdel_args; + +int __bam_cdel_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_pgno_t, DB_LSN *, u_int32_t)); +int __bam_cdel_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_cdel_read __P((DB_ENV *, void *, __bam_cdel_args **)); + +#define DB_bam_repl 58 +typedef struct _bam_repl_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + u_int32_t indx; + u_int32_t isdeleted; + DBT orig; + DBT repl; + u_int32_t prefix; + u_int32_t suffix; +} __bam_repl_args; + +int __bam_repl_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_pgno_t, DB_LSN *, u_int32_t, u_int32_t, const DBT *, const DBT *, u_int32_t, u_int32_t)); +int __bam_repl_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_repl_read __P((DB_ENV *, void *, __bam_repl_args **)); + +#define DB_bam_root 59 +typedef struct _bam_root_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t meta_pgno; + db_pgno_t root_pgno; + DB_LSN meta_lsn; +} __bam_root_args; + +int __bam_root_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_pgno_t, db_pgno_t, DB_LSN *)); +int __bam_root_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_root_read __P((DB_ENV *, void *, __bam_root_args **)); + +#define DB_bam_curadj 64 +typedef struct _bam_curadj_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_ca_mode mode; + db_pgno_t from_pgno; + db_pgno_t to_pgno; + db_pgno_t left_pgno; + u_int32_t first_indx; + u_int32_t from_indx; + u_int32_t to_indx; +} __bam_curadj_args; + +int __bam_curadj_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_ca_mode, db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, u_int32_t, u_int32_t)); +int __bam_curadj_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_curadj_read __P((DB_ENV *, void *, __bam_curadj_args **)); + +#define DB_bam_rcuradj 65 +typedef struct _bam_rcuradj_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + ca_recno_arg mode; + db_pgno_t root; + db_recno_t recno; + u_int32_t order; +} __bam_rcuradj_args; + +int __bam_rcuradj_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, ca_recno_arg, db_pgno_t, db_recno_t, u_int32_t)); +int __bam_rcuradj_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_rcuradj_read __P((DB_ENV *, void *, __bam_rcuradj_args **)); +int __bam_init_print __P((DB_ENV *)); +int __bam_init_recover __P((DB_ENV *)); +#endif diff --git a/bdb/include/btree_ext.h b/bdb/include/btree_ext.h new file mode 100644 index 00000000000..8a9866e0b5a --- /dev/null +++ b/bdb/include/btree_ext.h @@ -0,0 +1,122 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _btree_ext_h_ +#define _btree_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +int __bam_cmp __P((DB *, const DBT *, PAGE *, + u_int32_t, int (*)(DB *, const DBT *, const DBT *), int *)); +int __bam_defcmp __P((DB *, const DBT *, const DBT *)); +size_t __bam_defpfx __P((DB *, const DBT *, const DBT *)); +int __bam_pgin __P((DB_ENV *, db_pgno_t, void *, DBT *)); +int __bam_pgout __P((DB_ENV *, db_pgno_t, void *, DBT *)); +int __bam_mswap __P((PAGE *)); +void __bam_cprint __P((DBC *)); +int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, int)); +int __ram_ca_delete __P((DB *, db_pgno_t)); +int __bam_ca_di __P((DBC *, db_pgno_t, u_int32_t, int)); +int __bam_ca_dup __P((DBC *, + u_int32_t, db_pgno_t, u_int32_t, db_pgno_t, u_int32_t)); +int __bam_ca_undodup __P((DB *, + u_int32_t, db_pgno_t, u_int32_t, u_int32_t)); +int __bam_ca_rsplit __P((DBC *, db_pgno_t, db_pgno_t)); +int __bam_ca_split __P((DBC *, + db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int)); +void __bam_ca_undosplit __P((DB *, + db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t)); +int __bam_c_init __P((DBC *, DBTYPE)); +int __bam_c_refresh __P((DBC *)); +int __bam_c_count __P((DBC *, db_recno_t *)); +int __bam_c_dup __P((DBC *, DBC *)); +int __bam_c_rget __P((DBC *, DBT *, u_int32_t)); +int __bam_delete __P((DB *, DB_TXN *, DBT *, u_int32_t)); +int __bam_ditem __P((DBC *, PAGE *, u_int32_t)); +int __bam_adjindx __P((DBC *, PAGE *, u_int32_t, u_int32_t, int)); +int __bam_dpages __P((DBC *, EPG *)); +int __bam_db_create __P((DB *)); +int __bam_db_close __P((DB *)); +int __bam_set_flags __P((DB *, u_int32_t *flagsp)); +int __ram_set_flags __P((DB *, u_int32_t *flagsp)); +int __bam_open __P((DB *, const char *, db_pgno_t, u_int32_t)); +int __bam_metachk __P((DB *, const char *, BTMETA *)); +int __bam_read_root __P((DB *, const char *, db_pgno_t, u_int32_t)); +int __bam_iitem __P((DBC *, DBT *, DBT *, u_int32_t, u_int32_t)); +u_int32_t __bam_partsize __P((u_int32_t, DBT *, PAGE *, u_int32_t)); +int __bam_build __P((DBC *, u_int32_t, + DBT *, PAGE *, u_int32_t, u_int32_t)); +int __bam_ritem __P((DBC *, PAGE *, u_int32_t, DBT *)); +int __bam_pg_alloc_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_pg_free_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_split_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_rsplit_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_adj_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_cadjust_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_cdel_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_repl_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_root_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_curadj_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_rcuradj_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __bam_reclaim __P((DB *, DB_TXN *)); +int __ram_open __P((DB *, const char *, db_pgno_t, u_int32_t)); +int __ram_c_del __P((DBC *)); +int __ram_c_get + __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); +int __ram_c_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); +int __ram_ca __P((DBC *, ca_recno_arg)); +int __ram_getno __P((DBC *, const DBT *, db_recno_t *, int)); +int __ram_writeback __P((DB *)); +int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *)); +int __bam_adjust __P((DBC *, int32_t)); +int __bam_nrecs __P((DBC *, db_recno_t *)); +db_recno_t __bam_total __P((PAGE *)); +int __bam_search __P((DBC *, + const DBT *, u_int32_t, int, db_recno_t *, int *)); +int __bam_stkrel __P((DBC *, u_int32_t)); +int __bam_stkgrow __P((DB_ENV *, BTREE_CURSOR *)); +int __bam_split __P((DBC *, void *)); +int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t)); +int __bam_stat __P((DB *, void *, void *(*)(size_t), u_int32_t)); +int __bam_traverse __P((DBC *, db_lockmode_t, + db_pgno_t, int (*)(DB *, PAGE *, void *, int *), void *)); +int __bam_stat_callback __P((DB *, PAGE *, void *, int *)); +int __bam_key_range __P((DB *, + DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t)); +int __bam_30_btreemeta __P((DB *, char *, u_int8_t *)); +int __bam_31_btreemeta + __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); +int __bam_31_lbtree + __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); +int __bam_vrfy_meta __P((DB *, VRFY_DBINFO *, BTMETA *, + db_pgno_t, u_int32_t)); +int __ram_vrfy_leaf __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, + u_int32_t)); +int __bam_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, + u_int32_t)); +int __bam_vrfy_itemorder __P((DB *, VRFY_DBINFO *, PAGE *, + db_pgno_t, u_int32_t, int, int, u_int32_t)); +int __bam_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, + u_int32_t)); +int __bam_vrfy_subtree __P((DB *, VRFY_DBINFO *, db_pgno_t, void *, + void *, u_int32_t, u_int32_t *, u_int32_t *, u_int32_t *)); +int __bam_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, + PAGE *, void *, int (*)(void *, const void *), DBT *, + u_int32_t)); +int __bam_salvage_walkdupint __P((DB *, VRFY_DBINFO *, PAGE *, + DBT *, void *, int (*)(void *, const void *), u_int32_t)); +int __bam_meta2pgset __P((DB *, VRFY_DBINFO *, BTMETA *, + u_int32_t, DB *)); +#if defined(__cplusplus) +} +#endif +#endif /* _btree_ext_h_ */ diff --git a/bdb/include/clib_ext.h b/bdb/include/clib_ext.h new file mode 100644 index 00000000000..efd0796afe3 --- /dev/null +++ b/bdb/include/clib_ext.h @@ -0,0 +1,38 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _clib_ext_h_ +#define _clib_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +#ifndef HAVE_GETCWD +char *getcwd __P((char *, size_t)); +#endif +#ifndef HAVE_GETOPT +int getopt __P((int, char * const *, const char *)); +#endif +#ifndef HAVE_MEMCMP +int memcmp __P((const void *, const void *, size_t)); +#endif +#ifndef HAVE_MEMCPY +void *memcpy __P((void *, const void *, size_t)); +#endif +#ifndef HAVE_MEMMOVE +void *memmove __P((void *, const void *, size_t)); +#endif +#ifndef HAVE_RAISE +int raise __P((int)); +#endif +#ifndef HAVE_SNPRINTF +int snprintf __P((char *, size_t, const char *, ...)); +#endif +int strcasecmp __P((const char *, const char *)); +#ifndef HAVE_STRERROR +char *strerror __P((int)); +#endif +#ifndef HAVE_VSNPRINTF +int vsnprintf(); +#endif +#if defined(__cplusplus) +} +#endif +#endif /* _clib_ext_h_ */ diff --git a/bdb/include/common_ext.h b/bdb/include/common_ext.h new file mode 100644 index 00000000000..a36d62cac4a --- /dev/null +++ b/bdb/include/common_ext.h @@ -0,0 +1,44 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _common_ext_h_ +#define _common_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +int __db_byteorder __P((DB_ENV *, int)); +int __db_fchk __P((DB_ENV *, const char *, u_int32_t, u_int32_t)); +int __db_fcchk + __P((DB_ENV *, const char *, u_int32_t, u_int32_t, u_int32_t)); +int __db_ferr __P((const DB_ENV *, const char *, int)); +int __db_pgerr __P((DB *, db_pgno_t)); +int __db_pgfmt __P((DB *, db_pgno_t)); +int __db_eopnotsup __P((const DB_ENV *)); +#ifdef DIAGNOSTIC +void __db_assert __P((const char *, const char *, int)); +#endif +int __db_panic_msg __P((DB_ENV *)); +int __db_panic __P((DB_ENV *, int)); +void __db_err __P((const DB_ENV *, const char *, ...)); +void __db_real_err + __P((const DB_ENV *, int, int, int, const char *, va_list)); +void __db_logmsg __P((const DB_ENV *, + DB_TXN *, const char *, u_int32_t, const char *, ...)); +void __db_real_log __P((const DB_ENV *, + DB_TXN *, const char *, u_int32_t, const char *, va_list ap)); +int __db_unknown_flag __P((DB_ENV *, char *, u_int32_t)); +int __db_unknown_type __P((DB_ENV *, char *, u_int32_t)); +#ifdef DIAGNOSTIC +int __db_missing_txn_err __P((DB_ENV *)); +#endif +int __db_getlong + __P((DB *, const char *, char *, long, long, long *)); +int __db_getulong + __P((DB *, const char *, char *, u_long, u_long, u_long *)); +u_int32_t __db_log2 __P((u_int32_t)); +int __db_util_logset __P((const char *, char *)); +void __db_util_siginit __P((void)); +int __db_util_interrupted __P((void)); +void __db_util_sigresend __P((void)); +#if defined(__cplusplus) +} +#endif +#endif /* _common_ext_h_ */ diff --git a/bdb/include/crdel_auto.h b/bdb/include/crdel_auto.h new file mode 100644 index 00000000000..409c256811f --- /dev/null +++ b/bdb/include/crdel_auto.h @@ -0,0 +1,88 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef crdel_AUTO_H +#define crdel_AUTO_H + +#define DB_crdel_fileopen 141 +typedef struct _crdel_fileopen_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + DBT name; + u_int32_t mode; +} __crdel_fileopen_args; + +int __crdel_fileopen_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, const DBT *, u_int32_t)); +int __crdel_fileopen_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_fileopen_read __P((DB_ENV *, void *, __crdel_fileopen_args **)); + +#define DB_crdel_metasub 142 +typedef struct _crdel_metasub_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DBT page; + DB_LSN lsn; +} __crdel_metasub_args; + +int __crdel_metasub_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_pgno_t, const DBT *, DB_LSN *)); +int __crdel_metasub_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_metasub_read __P((DB_ENV *, void *, __crdel_metasub_args **)); + +#define DB_crdel_metapage 143 +typedef struct _crdel_metapage_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + DBT name; + db_pgno_t pgno; + DBT page; +} __crdel_metapage_args; + +int __crdel_metapage_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, const DBT *, db_pgno_t, const DBT *)); +int __crdel_metapage_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_metapage_read __P((DB_ENV *, void *, __crdel_metapage_args **)); + +#define DB_crdel_old_delete 144 +typedef struct _crdel_old_delete_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + DBT name; +} __crdel_old_delete_args; + +int __crdel_old_delete_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_old_delete_read __P((DB_ENV *, void *, __crdel_old_delete_args **)); + +#define DB_crdel_rename 145 +typedef struct _crdel_rename_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + DBT name; + DBT newname; +} __crdel_rename_args; + +int __crdel_rename_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, const DBT *, const DBT *)); +int __crdel_rename_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_rename_read __P((DB_ENV *, void *, __crdel_rename_args **)); + +#define DB_crdel_delete 146 +typedef struct _crdel_delete_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + DBT name; +} __crdel_delete_args; + +int __crdel_delete_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, const DBT *)); +int __crdel_delete_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_delete_read __P((DB_ENV *, void *, __crdel_delete_args **)); +int __crdel_init_print __P((DB_ENV *)); +int __crdel_init_recover __P((DB_ENV *)); +#endif diff --git a/bdb/include/cxx_int.h b/bdb/include/cxx_int.h new file mode 100644 index 00000000000..4a9a40ceba1 --- /dev/null +++ b/bdb/include/cxx_int.h @@ -0,0 +1,96 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: cxx_int.h,v 11.13 2000/11/21 22:56:36 dda Exp $ + */ + +#ifndef _CXX_INT_H_ +#define _CXX_INT_H_ + +// private data structures known to the implementation only + +// +// Using FooImp classes will allow the implementation to change in the +// future without any modification to user code or even to header files +// that the user includes. FooImp * is just like void * except that it +// provides a little extra protection, since you cannot randomly assign +// any old pointer to a FooImp* as you can with void *. Currently, a +// pointer to such an opaque class is always just a pointer to the +// appropriate underlying implementation struct. These are converted +// back and forth using the various overloaded wrap()/unwrap() methods. +// This is essentially a use of the "Bridge" Design Pattern. +// +// WRAPPED_CLASS implements the appropriate wrap() and unwrap() methods +// for a wrapper class that has an underlying pointer representation. +// +#define WRAPPED_CLASS(_WRAPPER_CLASS, _IMP_CLASS, _WRAPPED_TYPE) \ + \ + class _IMP_CLASS {}; \ + \ + inline _WRAPPED_TYPE unwrap(_WRAPPER_CLASS *val) \ + { \ + if (!val) return (0); \ + return ((_WRAPPED_TYPE)((void *)(val->imp()))); \ + } \ + \ + inline const _WRAPPED_TYPE unwrapConst(const _WRAPPER_CLASS *val) \ + { \ + if (!val) return (0); \ + return ((const _WRAPPED_TYPE)((void *)(val->constimp()))); \ + } \ + \ + inline _IMP_CLASS *wrap(_WRAPPED_TYPE val) \ + { \ + return ((_IMP_CLASS*)((void *)val)); \ + } + +WRAPPED_CLASS(DbMpoolFile, DbMpoolFileImp, DB_MPOOLFILE*) +WRAPPED_CLASS(Db, DbImp, DB*) +WRAPPED_CLASS(DbEnv, DbEnvImp, DB_ENV*) +WRAPPED_CLASS(DbTxn, DbTxnImp, DB_TXN*) + +// A tristate integer value used by the DB_ERROR macro below. +// We chose not to make this an enumerated type so it can +// be kept private, even though methods that return the +// tristate int can be declared in db_cxx.h . +// +#define ON_ERROR_THROW 1 +#define ON_ERROR_RETURN 0 +#define ON_ERROR_UNKNOWN (-1) + +// Macros that handle detected errors, in case we want to +// change the default behavior. The 'policy' is one of +// the tristate values given above. If UNKNOWN is specified, +// the behavior is taken from the last initialized DbEnv. +// +#define DB_ERROR(caller, ecode, policy) \ + DbEnv::runtime_error(caller, ecode, policy) + +// These defines are for tedious field set/get access methods. +// + +#define DB_RO_ACCESS(_class, _type, _cxx_name, _field) \ + \ +_type _class::get_##_cxx_name() const \ +{ \ + return (_field); \ +} + +#define DB_WO_ACCESS(_class, _type, _cxx_name, _field) \ + \ +void _class::set_##_cxx_name(_type value) \ +{ \ + _field = value; \ +} \ + +#define DB_RW_ACCESS(_class, _type, _cxx_name, _field) \ + DB_RO_ACCESS(_class, _type, _cxx_name, _field) \ + DB_WO_ACCESS(_class, _type, _cxx_name, _field) + +/* values for Db::flags_ */ +#define DB_CXX_PRIVATE_ENV 0x00000001 + +#endif /* !_CXX_INT_H_ */ diff --git a/bdb/include/db.src b/bdb/include/db.src new file mode 100644 index 00000000000..6dc0071efae --- /dev/null +++ b/bdb/include/db.src @@ -0,0 +1,1375 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: db.src,v 11.121 2001/01/10 15:43:08 sue Exp $ + */ + +#ifndef _DB_H_ +#define _DB_H_ + +#ifndef __NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <stdio.h> +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * XXX + * Handle function prototypes and the keyword "const". This steps on name + * space that DB doesn't control, but all of the other solutions are worse. + * + * XXX + * While Microsoft's compiler is ANSI C compliant, it doesn't have _STDC_ + * defined by default, you specify a command line flag or #pragma to turn + * it on. Don't do that, however, because some of Microsoft's own header + * files won't compile. + */ +#undef __P +#if defined(__STDC__) || defined(__cplusplus) || defined(_MSC_VER) +#define __P(protos) protos /* ANSI C prototypes */ +#else +#define const +#define __P(protos) () /* K&R C preprocessor */ +#endif + +/* + * !!! + * DB needs basic information about specifically sized types. If they're + * not provided by the system, typedef them here. + * + * We protect them against multiple inclusion using __BIT_TYPES_DEFINED__, + * as does BIND and Kerberos, since we don't know for sure what #include + * files the user is using. + * + * !!! + * We also provide the standard u_int, u_long etc., if they're not provided + * by the system. + */ +#ifndef __BIT_TYPES_DEFINED__ +#define __BIT_TYPES_DEFINED__ +@u_int8_decl@ +@int16_decl@ +@u_int16_decl@ +@int32_decl@ +@u_int32_decl@ +#endif + +@u_char_decl@ +@u_short_decl@ +@u_int_decl@ +@u_long_decl@ +@ssize_t_decl@ + +#define DB_VERSION_MAJOR @DB_VERSION_MAJOR@ +#define DB_VERSION_MINOR @DB_VERSION_MINOR@ +#define DB_VERSION_PATCH @DB_VERSION_PATCH@ +#define DB_VERSION_STRING @DB_VERSION_STRING@ + +typedef u_int32_t db_pgno_t; /* Page number type. */ +typedef u_int16_t db_indx_t; /* Page offset type. */ +#define DB_MAX_PAGES 0xffffffff /* >= # of pages in a file */ + +typedef u_int32_t db_recno_t; /* Record number type. */ +#define DB_MAX_RECORDS 0xffffffff /* >= # of records in a tree */ + +/* Forward structure declarations, so applications get type checking. */ +struct __db; typedef struct __db DB; +#ifdef DB_DBM_HSEARCH + typedef struct __db DBM; +#endif +struct __db_bt_stat; typedef struct __db_bt_stat DB_BTREE_STAT; +struct __db_dbt; typedef struct __db_dbt DBT; +struct __db_env; typedef struct __db_env DB_ENV; +struct __db_h_stat; typedef struct __db_h_stat DB_HASH_STAT; +struct __db_ilock; typedef struct __db_ilock DB_LOCK_ILOCK; +struct __db_lock_stat; typedef struct __db_lock_stat DB_LOCK_STAT; +struct __db_lock_u; typedef struct __db_lock_u DB_LOCK; +struct __db_lockreq; typedef struct __db_lockreq DB_LOCKREQ; +struct __db_log_stat; typedef struct __db_log_stat DB_LOG_STAT; +struct __db_lsn; typedef struct __db_lsn DB_LSN; +struct __db_mpool_finfo;typedef struct __db_mpool_finfo DB_MPOOL_FINFO; +struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT; +struct __db_mpool_stat; typedef struct __db_mpool_stat DB_MPOOL_STAT; +struct __db_mpoolfile; typedef struct __db_mpoolfile DB_MPOOLFILE; +struct __db_qam_stat; typedef struct __db_qam_stat DB_QUEUE_STAT; +struct __db_txn; typedef struct __db_txn DB_TXN; +struct __db_txn_active; typedef struct __db_txn_active DB_TXN_ACTIVE; +struct __db_txn_stat; typedef struct __db_txn_stat DB_TXN_STAT; +struct __dbc; typedef struct __dbc DBC; +struct __dbc_internal; typedef struct __dbc_internal DBC_INTERNAL; +struct __fh_t; typedef struct __fh_t DB_FH; +struct __key_range; typedef struct __key_range DB_KEY_RANGE; + +/* Key/data structure -- a Data-Base Thang. */ +struct __db_dbt { + /* + * data/size must be fields 1 and 2 for DB 1.85 compatibility. + */ + void *data; /* Key/data */ + u_int32_t size; /* key/data length */ + + u_int32_t ulen; /* RO: length of user buffer. */ + u_int32_t dlen; /* RO: get/put record length. */ + u_int32_t doff; /* RO: get/put record offset. */ + +#define DB_DBT_ISSET 0x001 /* Lower level calls set value. */ +#define DB_DBT_MALLOC 0x002 /* Return in malloc'd memory. */ +#define DB_DBT_PARTIAL 0x004 /* Partial put/get. */ +#define DB_DBT_REALLOC 0x008 /* Return in realloc'd memory. */ +#define DB_DBT_USERMEM 0x010 /* Return in user's memory. */ +#define DB_DBT_DUPOK 0x020 /* Insert if duplicate. */ + u_int32_t flags; +}; + +/* + * Common flags -- + * Interfaces which use any of these common flags should never have + * interface specific flags in this range. + */ +#define DB_CREATE 0x000001 /* Create file as necessary. */ +#define DB_CXX_NO_EXCEPTIONS 0x000002 /* C++: return error values. */ +#define DB_FORCE 0x000004 /* Force (anything). */ +#define DB_NOMMAP 0x000008 /* Don't mmap underlying file. */ +#define DB_RDONLY 0x000010 /* Read-only (O_RDONLY). */ +#define DB_RECOVER 0x000020 /* Run normal recovery. */ +#define DB_THREAD 0x000040 /* Applications are threaded. */ +#define DB_TXN_NOSYNC 0x000080 /* Do not sync log on commit. */ +#define DB_USE_ENVIRON 0x000100 /* Use the environment. */ +#define DB_USE_ENVIRON_ROOT 0x000200 /* Use the environment if root. */ + +/* + * Flags private to db_env_create. + */ +#define DB_CLIENT 0x000400 /* Open for a client environment. */ + +/* + * Flags private to db_create. + */ +#define DB_XA_CREATE 0x000400 /* Open in an XA environment. */ + +/* + * Flags private to DBENV->open. + */ +#define DB_INIT_CDB 0x000400 /* Concurrent Access Methods. */ +#define DB_INIT_LOCK 0x000800 /* Initialize locking. */ +#define DB_INIT_LOG 0x001000 /* Initialize logging. */ +#define DB_INIT_MPOOL 0x002000 /* Initialize mpool. */ +#define DB_INIT_TXN 0x004000 /* Initialize transactions. */ +#define DB_JOINENV 0x008000 /* Initialize all subsystems present. */ +#define DB_LOCKDOWN 0x010000 /* Lock memory into physical core. */ +#define DB_PRIVATE 0x020000 /* DB_ENV is process local. */ +#define DB_RECOVER_FATAL 0x040000 /* Run catastrophic recovery. */ +#define DB_SYSTEM_MEM 0x080000 /* Use system-backed memory. */ + +/* + * Flags private to DB->open. + */ +#define DB_EXCL 0x000400 /* Exclusive open (O_EXCL). */ +#define DB_FCNTL_LOCKING 0x000800 /* UNDOC: fcntl(2) locking. */ +#define DB_ODDFILESIZE 0x001000 /* UNDOC: truncate to N * pgsize. */ +#define DB_RDWRMASTER 0x002000 /* UNDOC: allow subdb master open R/W */ +#define DB_TRUNCATE 0x004000 /* Discard existing DB (O_TRUNC). */ +#define DB_EXTENT 0x008000 /* UNDOC: dealing with an extent. */ + +/* + * Flags private to DBENV->txn_begin. + */ +#define DB_TXN_NOWAIT 0x000400 /* Do not wait for locks in this TXN. */ +#define DB_TXN_SYNC 0x000800 /* Always sync log on commit. */ + +/* + * Flags private to DBENV->set_flags. + */ +#define DB_CDB_ALLDB 0x000400 /* In CDB, lock across environment. */ + +/* + * Flags private to DB->set_feedback's callback. + */ +#define DB_UPGRADE 0x000400 /* Upgrading. */ +#define DB_VERIFY 0x000800 /* Verifying. */ + +/* + * Flags private to DB->set_flags. + * + * DB->set_flags does not share common flags and so values start at 0x01. + */ +#define DB_DUP 0x0001 /* Btree, Hash: duplicate keys. */ +#define DB_DUPSORT 0x0002 /* Btree, Hash: duplicate keys. */ +#define DB_RECNUM 0x0004 /* Btree: record numbers. */ +#define DB_RENUMBER 0x0008 /* Recno: renumber on insert/delete. */ +#define DB_REVSPLITOFF 0x0010 /* Btree: turn off reverse splits. */ +#define DB_SNAPSHOT 0x0020 /* Recno: snapshot the input. */ + +/* + * Flags private to DB->join. + * + * DB->join does not share common flags and so values start at 0x01. + */ +#define DB_JOIN_NOSORT 0x0001 /* Don't try to optimize join. */ + +/* + * Flags private to DB->verify. + * + * DB->verify does not share common flags and so values start at 0x01. + */ +#define DB_AGGRESSIVE 0x0001 /* Salvage anything which might be data.*/ +#define DB_NOORDERCHK 0x0002 /* Skip order check; subdb w/ user func */ +#define DB_ORDERCHKONLY 0x0004 /* Only perform an order check on subdb */ +#define DB_PR_PAGE 0x0008 /* Show page contents (-da). */ +#define DB_PR_HEADERS 0x0010 /* Show only page headers (-dh). */ +#define DB_PR_RECOVERYTEST 0x0020 /* Recovery test (-dr). */ +#define DB_SALVAGE 0x0040 /* Salvage what looks like data. */ +/* + * !!! + * These must not go over 0x8000, or they will collide with the flags + * used by __bam_vrfy_subtree. + */ +#define DB_VRFY_FLAGMASK 0xffff /* For masking above flags. */ + +/* + * Deadlock detector modes; used in the DBENV structure to configure the + * locking subsystem. + */ +#define DB_LOCK_NORUN 0 +#define DB_LOCK_DEFAULT 1 /* Default policy. */ +#define DB_LOCK_OLDEST 2 /* Abort oldest transaction. */ +#define DB_LOCK_RANDOM 3 /* Abort random transaction. */ +#define DB_LOCK_YOUNGEST 4 /* Abort youngest transaction. */ + +/******************************************************* + * Environment. + *******************************************************/ +#define DB_REGION_MAGIC 0x120897 /* Environment magic number. */ + +typedef enum { + DB_TXN_ABORT, + DB_TXN_BACKWARD_ROLL, + DB_TXN_FORWARD_ROLL, + DB_TXN_OPENFILES +} db_recops; + +#define DB_UNDO(op) ((op) == DB_TXN_ABORT || (op) == DB_TXN_BACKWARD_ROLL) +#define DB_REDO(op) ((op) == DB_TXN_FORWARD_ROLL) + +struct __db_env { + /******************************************************* + * Public: owned by the application. + *******************************************************/ + FILE *db_errfile; /* Error message file stream. */ + const char *db_errpfx; /* Error message prefix. */ + /* Callbacks. */ + void (*db_errcall) __P((const char *, char *)); + void (*db_feedback) __P((DB_ENV *, int, int)); + void (*db_paniccall) __P((DB_ENV *, int)); + int (*db_recovery_init) __P((DB_ENV *)); + + /* + * Currently, the verbose list is a bit field with room for 32 + * entries. There's no reason that it needs to be limited, if + * there are ever more than 32 entries, convert to a bit array. + */ +#define DB_VERB_CHKPOINT 0x0001 /* List checkpoints. */ +#define DB_VERB_DEADLOCK 0x0002 /* Deadlock detection information. */ +#define DB_VERB_RECOVERY 0x0004 /* Recovery information. */ +#define DB_VERB_WAITSFOR 0x0008 /* Dump waits-for table. */ + u_int32_t verbose; /* Verbose output. */ + + void *app_private; /* Application-private handle. */ + + /* Locking. */ + u_int8_t *lk_conflicts; /* Two dimensional conflict matrix. */ + u_int32_t lk_modes; /* Number of lock modes in table. */ + u_int32_t lk_max; /* Maximum number of locks. */ + u_int32_t lk_max_lockers;/* Maximum number of lockers. */ + u_int32_t lk_max_objects;/* Maximum number of locked objects. */ + u_int32_t lk_detect; /* Deadlock detect on all conflicts. */ + + /* Logging. */ + u_int32_t lg_bsize; /* Buffer size. */ + u_int32_t lg_max; /* Maximum file size. */ + + /* Memory pool. */ + u_int32_t mp_gbytes; /* Cachesize: GB. */ + u_int32_t mp_bytes; /* Cachesize: Bytes. */ + size_t mp_size; /* DEPRECATED: Cachesize: bytes. */ + int mp_ncache; /* Number of cache regions. */ + size_t mp_mmapsize; /* Maximum file size for mmap. */ + + /* Transactions. */ + u_int32_t tx_max; /* Maximum number of transactions. */ + time_t tx_timestamp; /* Recover to specific timestamp. */ + int (*tx_recover) /* Dispatch function for recovery. */ + __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); + + /******************************************************* + * Private: owned by DB. + *******************************************************/ + int db_panic; /* Panic causing errno. */ + + /* User files, paths. */ + char *db_home; /* Database home. */ + char *db_log_dir; /* Database log file directory. */ + char *db_tmp_dir; /* Database tmp file directory. */ + + char **db_data_dir; /* Database data file directories. */ + int data_cnt; /* Database data file slots. */ + int data_next; /* Next Database data file slot. */ + + int db_mode; /* Default open permissions. */ + + void *reginfo; /* REGINFO structure reference. */ + DB_FH *lockfhp; /* fcntl(2) locking file handle. */ + long shm_key; /* shmget(2) key. */ + + void *lg_handle; /* Log handle. */ + + void *lk_handle; /* Lock handle. */ + + void *mp_handle; /* Mpool handle. */ + + void *tx_handle; /* Txn handle. */ + + int (**dtab) /* Dispatch table */ + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); + size_t dtab_size; /* Slots in the dispatch table. */ + + void *cl_handle; /* RPC: remote client handle. */ + long cl_id; /* RPC: Remote client env id. */ + + int dblocal_ref; /* DB_ENV_DBLOCAL: reference count. */ + u_int32_t db_mutexlocks; /* db_set_mutexlocks */ + + /* + * List of open DB handles for this DB_ENV, used for cursor + * adjustment. Must be protected for multi-threaded support. + * + * !!! + * As this structure is allocated in per-process memory, the + * mutex may need to be stored elsewhere on architectures unable + * to support mutexes in heap memory, e.g. HP/UX 9. + */ + void *dblist_mutexp; /* Mutex. */ + /* + * !!! + * Explicit representation of structure in queue.h. + * LIST_HEAD(dblist, __db); + */ + struct { + struct __db *lh_first; + } dblist; + + /* + * XA support. + * + * !!! + * Explicit representations of structures in queue.h. + * + * TAILQ_ENTRY(__db_env); + */ + struct { + struct __db_env *tqe_next; + struct __db_env **tqe_prev; + } links; + int xa_rmid; /* XA Resource Manager ID. */ + DB_TXN *xa_txn; /* XA Current transaction. */ + + void *cj_internal; /* C++/Java private. */ + + /* Methods. */ + int (*close) __P((DB_ENV *, u_int32_t)); + void (*err) __P((const DB_ENV *, int, const char *, ...)); + void (*errx) __P((const DB_ENV *, const char *, ...)); + int (*open) __P((DB_ENV *, const char *, u_int32_t, int)); + int (*remove) __P((DB_ENV *, const char *, u_int32_t)); + int (*set_data_dir) __P((DB_ENV *, const char *)); + void (*set_errcall) __P((DB_ENV *, void (*)(const char *, char *))); + void (*set_errfile) __P((DB_ENV *, FILE *)); + void (*set_errpfx) __P((DB_ENV *, const char *)); + int (*set_feedback) __P((DB_ENV *, void (*)(DB_ENV *, int, int))); + int (*set_flags) __P((DB_ENV *, u_int32_t, int)); + int (*set_mutexlocks) __P((DB_ENV *, int)); + int (*set_paniccall) __P((DB_ENV *, void (*)(DB_ENV *, int))); + int (*set_recovery_init) __P((DB_ENV *, int (*)(DB_ENV *))); + int (*set_server) __P((DB_ENV *, char *, long, long, u_int32_t)); + int (*set_shm_key) __P((DB_ENV *, long)); + int (*set_tmp_dir) __P((DB_ENV *, const char *)); + int (*set_verbose) __P((DB_ENV *, u_int32_t, int)); + + int (*set_lg_bsize) __P((DB_ENV *, u_int32_t)); + int (*set_lg_dir) __P((DB_ENV *, const char *)); + int (*set_lg_max) __P((DB_ENV *, u_int32_t)); + + int (*set_lk_conflicts) __P((DB_ENV *, u_int8_t *, int)); + int (*set_lk_detect) __P((DB_ENV *, u_int32_t)); + int (*set_lk_max) __P((DB_ENV *, u_int32_t)); + int (*set_lk_max_locks) __P((DB_ENV *, u_int32_t)); + int (*set_lk_max_lockers) __P((DB_ENV *, u_int32_t)); + int (*set_lk_max_objects) __P((DB_ENV *, u_int32_t)); + + int (*set_mp_mmapsize) __P((DB_ENV *, size_t)); + int (*set_cachesize) __P((DB_ENV *, u_int32_t, u_int32_t, int)); + + int (*set_tx_max) __P((DB_ENV *, u_int32_t)); + int (*set_tx_recover) __P((DB_ENV *, + int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops))); + int (*set_tx_timestamp) __P((DB_ENV *, time_t *)); + +#ifdef CONFIG_TEST +#define DB_TEST_PREOPEN 1 /* before __os_open */ +#define DB_TEST_POSTOPEN 2 /* after __os_open */ +#define DB_TEST_POSTLOGMETA 3 /* after logging meta in btree */ +#define DB_TEST_POSTLOG 4 /* after logging all pages */ +#define DB_TEST_POSTSYNC 5 /* after syncing the log */ +#define DB_TEST_PRERENAME 6 /* before __os_rename */ +#define DB_TEST_POSTRENAME 7 /* after __os_rename */ + int test_abort; /* Abort value for testing. */ + int test_copy; /* Copy value for testing. */ +#endif + +#define DB_ENV_CDB 0x00001 /* DB_INIT_CDB. */ +#define DB_ENV_CDB_ALLDB 0x00002 /* CDB environment wide locking. */ +#define DB_ENV_CREATE 0x00004 /* DB_CREATE set. */ +#define DB_ENV_DBLOCAL 0x00008 /* DB_ENV allocated for private DB. */ +#define DB_ENV_LOCKDOWN 0x00010 /* DB_LOCKDOWN set. */ +#define DB_ENV_NOMMAP 0x00020 /* DB_NOMMAP set. */ +#define DB_ENV_OPEN_CALLED 0x00040 /* DBENV->open called (paths valid). */ +#define DB_ENV_PRIVATE 0x00080 /* DB_PRIVATE set. */ +#define DB_ENV_RPCCLIENT 0x00100 /* DB_CLIENT set. */ +#define DB_ENV_STANDALONE 0x00200 /* Test: freestanding environment. */ +#define DB_ENV_SYSTEM_MEM 0x00400 /* DB_SYSTEM_MEM set. */ +#define DB_ENV_THREAD 0x00800 /* DB_THREAD set. */ +#define DB_ENV_TXN_NOSYNC 0x01000 /* DB_TXN_NOSYNC set. */ +#define DB_ENV_USER_ALLOC 0x02000 /* User allocated the structure. */ + u_int32_t flags; /* Flags. */ +}; + +/******************************************************* + * Access methods. + *******************************************************/ +/* + * !!! + * Changes here must be reflected in java/src/com/sleepycat/db/Db.java. + */ +typedef enum { + DB_BTREE=1, + DB_HASH, + DB_RECNO, + DB_QUEUE, + DB_UNKNOWN /* Figure it out on open. */ +} DBTYPE; + +#define DB_BTREEVERSION 8 /* Current btree version. */ +#define DB_BTREEOLDVER 6 /* Oldest btree version supported. */ +#define DB_BTREEMAGIC 0x053162 + +#define DB_HASHVERSION 7 /* Current hash version. */ +#define DB_HASHOLDVER 4 /* Oldest hash version supported. */ +#define DB_HASHMAGIC 0x061561 + +#define DB_QAMVERSION 3 /* Current queue version. */ +#define DB_QAMOLDVER 1 /* Oldest queue version supported. */ +#define DB_QAMMAGIC 0x042253 + +#define DB_LOGVERSION 3 /* Current log version. */ +#define DB_LOGOLDVER 3 /* Oldest log version supported. */ +#define DB_LOGMAGIC 0x040988 + +/* + * DB access method and cursor operation values. Each value is an operation + * code to which additional bit flags are added. + */ +#define DB_AFTER 1 /* c_put() */ +#define DB_APPEND 2 /* put() */ +#define DB_BEFORE 3 /* c_put() */ +#define DB_CACHED_COUNTS 4 /* stat() */ +#define DB_CHECKPOINT 5 /* log_put(), log_get() */ +#define DB_CONSUME 6 /* get() */ +#define DB_CONSUME_WAIT 7 /* get() */ +#define DB_CURLSN 8 /* log_put() */ +#define DB_CURRENT 9 /* c_get(), c_put(), log_get() */ +#define DB_FIRST 10 /* c_get(), log_get() */ +#define DB_FLUSH 11 /* log_put() */ +#define DB_GET_BOTH 12 /* get(), c_get() */ +#define DB_GET_BOTHC 13 /* c_get() (internal) */ +#define DB_GET_RECNO 14 /* c_get() */ +#define DB_JOIN_ITEM 15 /* c_get(); do not do primary lookup */ +#define DB_KEYFIRST 16 /* c_put() */ +#define DB_KEYLAST 17 /* c_put() */ +#define DB_LAST 18 /* c_get(), log_get() */ +#define DB_NEXT 19 /* c_get(), log_get() */ +#define DB_NEXT_DUP 20 /* c_get() */ +#define DB_NEXT_NODUP 21 /* c_get() */ +#define DB_NODUPDATA 22 /* put(), c_put() */ +#define DB_NOOVERWRITE 23 /* put() */ +#define DB_NOSYNC 24 /* close() */ +#define DB_POSITION 25 /* c_dup() */ +#define DB_POSITIONI 26 /* c_dup() (internal) */ +#define DB_PREV 27 /* c_get(), log_get() */ +#define DB_PREV_NODUP 28 /* c_get(), log_get() */ +#define DB_RECORDCOUNT 29 /* stat() */ +#define DB_SET 30 /* c_get(), log_get() */ +#define DB_SET_RANGE 31 /* c_get() */ +#define DB_SET_RECNO 32 /* get(), c_get() */ +#define DB_WRITECURSOR 33 /* cursor() */ +#define DB_WRITELOCK 34 /* cursor() (internal) */ + +/* This has to change when the max opcode hits 255. */ +#define DB_OPFLAGS_MASK 0x000000ff /* Mask for operations flags. */ +#define DB_RMW 0x80000000 /* Acquire write flag immediately. */ + +/* + * DB (user visible) error return codes. + * + * !!! + * Changes to any of the user visible error return codes must be reflected + * in java/src/com/sleepycat/db/Db.java. + * + * !!! + * For source compatibility with DB 2.X deadlock return (EAGAIN), use the + * following: + * #include <errno.h> + * #define DB_LOCK_DEADLOCK EAGAIN + * + * !!! + * We don't want our error returns to conflict with other packages where + * possible, so pick a base error value that's hopefully not common. We + * document that we own the error name space from -30,800 to -30,999. + */ +/* Public error return codes. */ +#define DB_INCOMPLETE (-30999)/* Sync didn't finish. */ +#define DB_KEYEMPTY (-30998)/* Key/data deleted or never created. */ +#define DB_KEYEXIST (-30997)/* The key/data pair already exists. */ +#define DB_LOCK_DEADLOCK (-30996)/* Deadlock. */ +#define DB_LOCK_NOTGRANTED (-30995)/* Lock unavailable. */ +#define DB_NOSERVER (-30994)/* Server panic return. */ +#define DB_NOSERVER_HOME (-30993)/* Bad home sent to server. */ +#define DB_NOSERVER_ID (-30992)/* Bad ID sent to server. */ +#define DB_NOTFOUND (-30991)/* Key/data pair not found (EOF). */ +#define DB_OLD_VERSION (-30990)/* Out-of-date version. */ +#define DB_RUNRECOVERY (-30989)/* Panic return. */ +#define DB_VERIFY_BAD (-30988)/* Verify failed; bad format. */ + +/* DB (private) error return codes. */ +#define DB_ALREADY_ABORTED (-30899) +#define DB_DELETED (-30898)/* Recovery file marked deleted. */ +#define DB_JAVA_CALLBACK (-30897)/* Exception during a java callback. */ +#define DB_NEEDSPLIT (-30896)/* Page needs to be split. */ +#define DB_SWAPBYTES (-30895)/* Database needs byte swapping. */ +#define DB_TXN_CKP (-30894)/* Encountered ckp record in log. */ +#define DB_VERIFY_FATAL (-30893)/* Fatal: DB->verify cannot proceed. */ + +#define DB_FILE_ID_LEN 20 /* DB file ID length. */ + +/* DB access method description structure. */ +struct __db { + /******************************************************* + * Public: owned by the application. + *******************************************************/ + u_int32_t pgsize; /* Database logical page size. */ + + /* Callbacks. */ + int (*db_append_recno) __P((DB *, DBT *, db_recno_t)); + void (*db_feedback) __P((DB *, int, int)); + void *(*db_malloc) __P((size_t)); + void *(*db_realloc) __P((void *, size_t)); + int (*dup_compare) __P((DB *, const DBT *, const DBT *)); + + void *app_private; /* Application-private handle. */ + + /******************************************************* + * Private: owned by DB. + *******************************************************/ + DB_ENV *dbenv; /* Backing environment. */ + + DBTYPE type; /* DB access method type. */ + + DB_MPOOLFILE *mpf; /* Backing buffer pool. */ + + void *mutexp; /* Synchronization for free threading */ + + u_int8_t fileid[DB_FILE_ID_LEN];/* File's unique ID for locking. */ + + u_int32_t adj_fileid; /* File's unique ID for curs. adj. */ + +#define DB_LOGFILEID_INVALID -1 + int32_t log_fileid; /* File's unique ID for logging. */ + db_pgno_t meta_pgno; /* Meta page number */ + DB_TXN *open_txn; /* Transaction to protect creates. */ + + long cl_id; /* RPC: remote client id. */ + + /* + * !!! + * Some applications use DB but implement their own locking outside of + * DB. If they're using fcntl(2) locking on the underlying database + * file, and we open and close a file descriptor for that file, we will + * discard their locks. The DB_FCNTL_LOCKING flag to DB->open is an + * undocumented interface to support this usage which leaves any file + * descriptors we open until DB->close. This will only work with the + * DB->open interface and simple caches, e.g., creating a transaction + * thread may open/close file descriptors this flag doesn't protect. + * Locking with fcntl(2) on a file that you don't own is a very, very + * unsafe thing to do. 'Nuff said. + */ + DB_FH *saved_open_fhp; /* Saved file handle. */ + + /* + * Linked list of DBP's, used in the log's dbentry table + * to keep track of all open db handles for a given log id. + * !!! + * Explicit representations of structures in queue.h. + * + * TAILQ_ENTRY(__db) links; + */ + struct { + struct __db *tqe_next; + struct __db **tqe_prev; + } links; + + /* + * Linked list of DBP's, linked from the DB_ENV, used to + * keep track of all open db handles for cursor adjustment. + * + * XXX + * Eventually, this should be merged with "links" above. + * + * !!! + * Explicit representations of structures in queue.h. + * + * LIST_ENTRY(__db) dblistlinks; + */ + struct { + struct __db *le_next; + struct __db **le_prev; + } dblistlinks; + + /* + * Cursor queues. + * + * !!! + * Explicit representations of structures in queue.h. + * + * TAILQ_HEAD(free_queue, __dbc); + * TAILQ_HEAD(active_queue, __dbc); + * TAILQ_HEAD(join_queue, __dbc); + */ + struct { + struct __dbc *tqh_first; + struct __dbc **tqh_last; + } free_queue; + struct { + struct __dbc *tqh_first; + struct __dbc **tqh_last; + } active_queue; + struct { + struct __dbc *tqh_first; + struct __dbc **tqh_last; + } join_queue; + + void *bt_internal; /* Btree/Recno access method private. */ + void *cj_internal; /* C++/Java private. */ + void *h_internal; /* Hash access method private. */ + void *q_internal; /* Queue access method private. */ + void *xa_internal; /* XA private. */ + + /* Methods. */ + int (*close) __P((DB *, u_int32_t)); + int (*cursor) __P((DB *, DB_TXN *, DBC **, u_int32_t)); + int (*del) __P((DB *, DB_TXN *, DBT *, u_int32_t)); + void (*err) __P((DB *, int, const char *, ...)); + void (*errx) __P((DB *, const char *, ...)); + int (*fd) __P((DB *, int *)); + int (*get) __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); + int (*get_byteswapped) __P((DB *)); + DBTYPE + (*get_type) __P((DB *)); + int (*join) __P((DB *, DBC **, DBC **, u_int32_t)); + int (*key_range) __P((DB *, + DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t)); + int (*open) __P((DB *, + const char *, const char *, DBTYPE, u_int32_t, int)); + int (*put) __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); + int (*remove) __P((DB *, const char *, const char *, u_int32_t)); + int (*rename) __P((DB *, + const char *, const char *, const char *, u_int32_t)); + int (*set_append_recno) __P((DB *, int (*)(DB *, DBT *, db_recno_t))); + int (*set_cachesize) __P((DB *, u_int32_t, u_int32_t, int)); + int (*set_dup_compare) __P((DB *, + int (*)(DB *, const DBT *, const DBT *))); + void (*set_errcall) __P((DB *, void (*)(const char *, char *))); + void (*set_errfile) __P((DB *, FILE *)); + void (*set_errpfx) __P((DB *, const char *)); + int (*set_feedback) __P((DB *, void (*)(DB *, int, int))); + int (*set_flags) __P((DB *, u_int32_t)); + int (*set_lorder) __P((DB *, int)); + int (*set_malloc) __P((DB *, void *(*)(size_t))); + int (*set_pagesize) __P((DB *, u_int32_t)); + int (*set_paniccall) __P((DB *, void (*)(DB_ENV *, int))); + int (*set_realloc) __P((DB *, void *(*)(void *, size_t))); + int (*stat) __P((DB *, void *, void *(*)(size_t), u_int32_t)); + int (*sync) __P((DB *, u_int32_t)); + int (*upgrade) __P((DB *, const char *, u_int32_t)); + int (*verify) __P((DB *, + const char *, const char *, FILE *, u_int32_t)); + + int (*set_bt_compare) __P((DB *, + int (*)(DB *, const DBT *, const DBT *))); + int (*set_bt_maxkey) __P((DB *, u_int32_t)); + int (*set_bt_minkey) __P((DB *, u_int32_t)); + int (*set_bt_prefix) __P((DB *, + size_t (*)(DB *, const DBT *, const DBT *))); + + int (*set_h_ffactor) __P((DB *, u_int32_t)); + int (*set_h_hash) __P((DB *, + u_int32_t (*)(DB *, const void *, u_int32_t))); + int (*set_h_nelem) __P((DB *, u_int32_t)); + + int (*set_re_delim) __P((DB *, int)); + int (*set_re_len) __P((DB *, u_int32_t)); + int (*set_re_pad) __P((DB *, int)); + int (*set_re_source) __P((DB *, const char *)); + int (*set_q_extentsize) __P((DB *, u_int32_t)); + + int (*db_am_remove) __P((DB *, const char *, + const char *, DB_LSN *, int (**)(DB *, void*), void **)); + int (*db_am_rename) __P((DB *, + const char *, const char *, const char *)); + +#define DB_OK_BTREE 0x01 +#define DB_OK_HASH 0x02 +#define DB_OK_QUEUE 0x04 +#define DB_OK_RECNO 0x08 + u_int32_t am_ok; /* Legal AM choices. */ + +#define DB_AM_DISCARD 0x00001 /* Discard any cached pages. */ +#define DB_AM_DUP 0x00002 /* DB_DUP. */ +#define DB_AM_DUPSORT 0x00004 /* DB_DUPSORT. */ +#define DB_AM_INMEM 0x00008 /* In-memory; no sync on close. */ +#define DB_AM_PGDEF 0x00010 /* Page size was defaulted. */ +#define DB_AM_RDONLY 0x00020 /* Database is readonly. */ +#define DB_AM_RECOVER 0x00040 /* DBP opened by recovery routine. */ +#define DB_AM_SUBDB 0x00080 /* Subdatabases supported. */ +#define DB_AM_SWAP 0x00100 /* Pages need to be byte-swapped. */ +#define DB_AM_TXN 0x00200 /* DBP was in a transaction. */ +#define DB_AM_VERIFYING 0x00400 /* DB handle is in the verifier. */ +#define DB_BT_RECNUM 0x00800 /* DB_RECNUM. */ +#define DB_BT_REVSPLIT 0x01000 /* DB_REVSPLITOFF. */ +#define DB_DBM_ERROR 0x02000 /* Error in DBM/NDBM database. */ +#define DB_OPEN_CALLED 0x04000 /* DB->open called. */ +#define DB_RE_DELIMITER 0x08000 /* Variablen length delimiter set. */ +#define DB_RE_FIXEDLEN 0x10000 /* Fixed-length records. */ +#define DB_RE_PAD 0x20000 /* Fixed-length record pad. */ +#define DB_RE_RENUMBER 0x40000 /* DB_RENUMBER. */ +#define DB_RE_SNAPSHOT 0x80000 /* DB_SNAPSHOT. */ + u_int32_t flags; +}; + +/* + * DB_LOCK_ILOCK -- + * Internal DB access method lock. + */ +struct __db_ilock { + db_pgno_t pgno; /* Page being locked. */ + u_int8_t fileid[DB_FILE_ID_LEN];/* File id. */ +#define DB_RECORD_LOCK 1 +#define DB_PAGE_LOCK 2 + u_int8_t type; /* Record or Page lock */ +}; + +/* + * DB_LOCK -- + * The structure is allocated by the caller and filled in during a + * lock_get request (or a lock_vec/DB_LOCK_GET). + */ +struct __db_lock_u { + size_t off; /* Offset of the lock in the region */ + u_int32_t ndx; /* Index of the object referenced by + * this lock; used for locking. */ + u_int32_t gen; /* Generation number of this lock. */ +}; + +/* Cursor description structure. */ +struct __dbc { + DB *dbp; /* Related DB access method. */ + DB_TXN *txn; /* Associated transaction. */ + + /* + * !!! + * Explicit representations of structures in queue.h. + * + * TAILQ_ENTRY(__dbc) links; Active/free cursor queues. + */ + struct { + DBC *tqe_next; + DBC **tqe_prev; + } links; + + DBT rkey; /* Returned key. */ + DBT rdata; /* Returned data. */ + + u_int32_t lid; /* Default process' locker id. */ + u_int32_t locker; /* Locker for this operation. */ + DBT lock_dbt; /* DBT referencing lock. */ + DB_LOCK_ILOCK lock; /* Object to be locked. */ + DB_LOCK mylock; /* Lock held on this cursor. */ + + long cl_id; /* Remote client id. */ + + DBTYPE dbtype; /* Cursor type. */ + + DBC_INTERNAL *internal; /* Access method private. */ + + int (*c_close) __P((DBC *)); /* Methods: public. */ + int (*c_count) __P((DBC *, db_recno_t *, u_int32_t)); + int (*c_del) __P((DBC *, u_int32_t)); + int (*c_dup) __P((DBC *, DBC **, u_int32_t)); + int (*c_get) __P((DBC *, DBT *, DBT *, u_int32_t)); + int (*c_put) __P((DBC *, DBT *, DBT *, u_int32_t)); + + /* Methods: private. */ + int (*c_am_close) __P((DBC *, db_pgno_t, int *)); + int (*c_am_del) __P((DBC *)); + int (*c_am_destroy) __P((DBC *)); + int (*c_am_get) __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); + int (*c_am_put) __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); + int (*c_am_writelock) __P((DBC *)); + +#define DBC_ACTIVE 0x001 /* Cursor is being used. */ +#define DBC_OPD 0x002 /* Cursor references off-page dups. */ +#define DBC_RECOVER 0x004 /* Cursor created by recovery routine + * (do not log or lock). + */ +#define DBC_RMW 0x008 /* Acquire write flag in read op. */ +#define DBC_WRITECURSOR 0x010 /* Cursor may be used to write (CDB). */ +#define DBC_WRITEDUP 0x020 /* idup'ed DBC_WRITECURSOR (CDB). */ +#define DBC_WRITER 0x040 /* Cursor immediately writing (CDB). */ +#define DBC_TRANSIENT 0x080 /* Cursor is transient. */ +#define DBC_COMPENSATE 0x100 /* Cursor is doing compensation + * do not lock. + */ + u_int32_t flags; +}; + +/* Key range statistics structure */ +struct __key_range { + double less; + double equal; + double greater; +}; + +/* Btree/Recno statistics structure. */ +struct __db_bt_stat { + u_int32_t bt_magic; /* Magic number. */ + u_int32_t bt_version; /* Version number. */ + u_int32_t bt_metaflags; /* Metadata flags. */ + u_int32_t bt_nkeys; /* Number of unique keys. */ + u_int32_t bt_ndata; /* Number of data items. */ + u_int32_t bt_pagesize; /* Page size. */ + u_int32_t bt_maxkey; /* Maxkey value. */ + u_int32_t bt_minkey; /* Minkey value. */ + u_int32_t bt_re_len; /* Fixed-length record length. */ + u_int32_t bt_re_pad; /* Fixed-length record pad. */ + u_int32_t bt_levels; /* Tree levels. */ + u_int32_t bt_int_pg; /* Internal pages. */ + u_int32_t bt_leaf_pg; /* Leaf pages. */ + u_int32_t bt_dup_pg; /* Duplicate pages. */ + u_int32_t bt_over_pg; /* Overflow pages. */ + u_int32_t bt_free; /* Pages on the free list. */ + u_int32_t bt_int_pgfree; /* Bytes free in internal pages. */ + u_int32_t bt_leaf_pgfree; /* Bytes free in leaf pages. */ + u_int32_t bt_dup_pgfree; /* Bytes free in duplicate pages. */ + u_int32_t bt_over_pgfree; /* Bytes free in overflow pages. */ +}; + +/* Queue statistics structure. */ +struct __db_qam_stat { + u_int32_t qs_magic; /* Magic number. */ + u_int32_t qs_version; /* Version number. */ + u_int32_t qs_metaflags; /* Metadata flags. */ + u_int32_t qs_nkeys; /* Number of unique keys. */ + u_int32_t qs_ndata; /* Number of data items. */ + u_int32_t qs_pagesize; /* Page size. */ + u_int32_t qs_pages; /* Data pages. */ + u_int32_t qs_re_len; /* Fixed-length record length. */ + u_int32_t qs_re_pad; /* Fixed-length record pad. */ + u_int32_t qs_pgfree; /* Bytes free in data pages. */ + u_int32_t qs_first_recno; /* First not deleted record. */ + u_int32_t qs_cur_recno; /* Last allocated record number. */ +}; + +/* Hash statistics structure. */ +struct __db_h_stat { + u_int32_t hash_magic; /* Magic number. */ + u_int32_t hash_version; /* Version number. */ + u_int32_t hash_metaflags; /* Metadata flags. */ + u_int32_t hash_nkeys; /* Number of unique keys. */ + u_int32_t hash_ndata; /* Number of data items. */ + u_int32_t hash_pagesize; /* Page size. */ + u_int32_t hash_nelem; /* Original nelem specified. */ + u_int32_t hash_ffactor; /* Fill factor specified at create. */ + u_int32_t hash_buckets; /* Number of hash buckets. */ + u_int32_t hash_free; /* Pages on the free list. */ + u_int32_t hash_bfree; /* Bytes free on bucket pages. */ + u_int32_t hash_bigpages; /* Number of big key/data pages. */ + u_int32_t hash_big_bfree; /* Bytes free on big item pages. */ + u_int32_t hash_overflows; /* Number of overflow pages. */ + u_int32_t hash_ovfl_free; /* Bytes free on ovfl pages. */ + u_int32_t hash_dup; /* Number of dup pages. */ + u_int32_t hash_dup_free; /* Bytes free on duplicate pages. */ +}; + +int db_create __P((DB **, DB_ENV *, u_int32_t)); +int db_env_create __P((DB_ENV **, u_int32_t)); +int db_env_set_func_close __P((int (*)(int))); +int db_env_set_func_dirfree __P((void (*)(char **, int))); +int db_env_set_func_dirlist __P((int (*)(const char *, char ***, int *))); +int db_env_set_func_exists __P((int (*)(const char *, int *))); +int db_env_set_func_free __P((void (*)(void *))); +int db_env_set_func_fsync __P((int (*)(int))); +int db_env_set_func_ioinfo __P((int (*)(const char *, + int, u_int32_t *, u_int32_t *, u_int32_t *))); +int db_env_set_func_malloc __P((void *(*)(size_t))); +int db_env_set_func_map __P((int (*)(char *, size_t, int, int, void **))); +int db_env_set_func_open __P((int (*)(const char *, int, ...))); +int db_env_set_func_read __P((ssize_t (*)(int, void *, size_t))); +int db_env_set_func_realloc __P((void *(*)(void *, size_t))); +int db_env_set_func_rename __P((int (*)(const char *, const char *))); +int db_env_set_func_seek + __P((int (*)(int, size_t, db_pgno_t, u_int32_t, int, int))); +int db_env_set_func_sleep __P((int (*)(u_long, u_long))); +int db_env_set_func_unlink __P((int (*)(const char *))); +int db_env_set_func_unmap __P((int (*)(void *, size_t))); +int db_env_set_func_write __P((ssize_t (*)(int, const void *, size_t))); +int db_env_set_func_yield __P((int (*)(void))); +int db_env_set_pageyield __P((int)); +int db_env_set_panicstate __P((int)); +int db_env_set_region_init __P((int)); +int db_env_set_tas_spins __P((u_int32_t)); +char *db_strerror __P((int)); +char *db_version __P((int *, int *, int *)); + +/******************************************************* + * Locking + *******************************************************/ +#define DB_LOCKVERSION 1 + +/* Flag values for lock_vec(), lock_get(). */ +#define DB_LOCK_NOWAIT 0x01 /* Don't wait on unavailable lock. */ +#define DB_LOCK_RECORD 0x02 /* Internal: record lock. */ +#define DB_LOCK_UPGRADE 0x04 /* Internal: upgrade existing lock. */ +#define DB_LOCK_SWITCH 0x08 /* Internal: switch existing lock. */ + +/* Flag values for lock_detect(). */ +#define DB_LOCK_CONFLICT 0x01 /* Run on any conflict. */ + +/* + * Request types. + * + * !!! + * Changes here must be reflected in java/src/com/sleepycat/db/Db.java. + */ +typedef enum { + DB_LOCK_DUMP=0, /* Display held locks. */ + DB_LOCK_GET, /* Get the lock. */ + DB_LOCK_INHERIT, /* Pass locks to parent. */ + DB_LOCK_PUT, /* Release the lock. */ + DB_LOCK_PUT_ALL, /* Release locker's locks. */ + DB_LOCK_PUT_OBJ /* Release locker's locks on obj. */ +} db_lockop_t; + +/* + * Simple R/W lock modes and for multi-granularity intention locking. + * + * !!! + * These values are NOT random, as they are used as an index into the lock + * conflicts arrays, i.e., DB_LOCK_IWRITE must be == 3, and DB_LOCK_IREAD + * must be == 4. + * + * !!! + * Changes here must be reflected in java/src/com/sleepycat/db/Db.java. + */ +typedef enum { + DB_LOCK_NG=0, /* Not granted. */ + DB_LOCK_READ, /* Shared/read. */ + DB_LOCK_WRITE, /* Exclusive/write. */ + DB_LOCK_WAIT, /* Wait for event */ + DB_LOCK_IWRITE, /* Intent exclusive/write. */ + DB_LOCK_IREAD, /* Intent to share/read. */ + DB_LOCK_IWR /* Intent to read and write. */ +} db_lockmode_t; + +/* + * Status of a lock. + */ +typedef enum { + DB_LSTAT_ABORTED, /* Lock belongs to an aborted txn. */ + DB_LSTAT_ERR, /* Lock is bad. */ + DB_LSTAT_FREE, /* Lock is unallocated. */ + DB_LSTAT_HELD, /* Lock is currently held. */ + DB_LSTAT_NOGRANT, /* Lock was not granted. */ + DB_LSTAT_PENDING, /* Lock was waiting and has been + * promoted; waiting for the owner + * to run and upgrade it to held. */ + DB_LSTAT_WAITING /* Lock is on the wait queue. */ +} db_status_t; + +/* Lock request structure. */ +struct __db_lockreq { + db_lockop_t op; /* Operation. */ + db_lockmode_t mode; /* Requested mode. */ + u_int32_t locker; /* Locker identity. */ + DBT *obj; /* Object being locked. */ + DB_LOCK lock; /* Lock returned. */ +}; + +/* + * Commonly used conflict matrices. + * + */ + +/* Multi-granularity locking. */ +#define DB_LOCK_RIW_N 7 +extern const u_int8_t db_riw_conflicts[]; + +struct __db_lock_stat { + u_int32_t st_lastid; /* Last allocated locker ID. */ + u_int32_t st_maxlocks; /* Maximum number of locks in table. */ + u_int32_t st_maxlockers; /* Maximum number of lockers in table. */ + u_int32_t st_maxobjects; /* Maximum number of objects in table. */ + u_int32_t st_nmodes; /* Number of lock modes. */ + u_int32_t st_nlocks; /* Current number of locks. */ + u_int32_t st_maxnlocks; /* Maximum number of locks so far. */ + u_int32_t st_nlockers; /* Current number of lockers. */ + u_int32_t st_maxnlockers; /* Maximum number of lockers so far. */ + u_int32_t st_nobjects; /* Current number of objects. */ + u_int32_t st_maxnobjects; /* Maximum number of objects so far. */ + u_int32_t st_nconflicts; /* Number of lock conflicts. */ + u_int32_t st_nrequests; /* Number of lock gets. */ + u_int32_t st_nreleases; /* Number of lock puts. */ + u_int32_t st_nnowaits; /* Number of requests that would have + waited, but NOWAIT was set. */ + u_int32_t st_ndeadlocks; /* Number of lock deadlocks. */ + u_int32_t st_region_wait; /* Region lock granted after wait. */ + u_int32_t st_region_nowait; /* Region lock granted without wait. */ + u_int32_t st_regsize; /* Region size. */ +}; + +int lock_detect __P((DB_ENV *, u_int32_t, u_int32_t, int *)); +int lock_get __P((DB_ENV *, + u_int32_t, u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *)); +int lock_id __P((DB_ENV *, u_int32_t *)); +int lock_put __P((DB_ENV *, DB_LOCK *)); +int lock_stat __P((DB_ENV *, DB_LOCK_STAT **, void *(*)(size_t))); +int lock_vec __P((DB_ENV *, + u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **)); + +/******************************************************* + * Logging. + *******************************************************/ +/* Flag values for log_archive(). */ +#define DB_ARCH_ABS 0x001 /* Absolute pathnames. */ +#define DB_ARCH_DATA 0x002 /* Data files. */ +#define DB_ARCH_LOG 0x004 /* Log files. */ + +/* + * A DB_LSN has two parts, a fileid which identifies a specific file, and an + * offset within that file. The fileid is an unsigned 4-byte quantity that + * uniquely identifies a file within the log directory -- currently a simple + * counter inside the log. The offset is also an unsigned 4-byte value. The + * log manager guarantees the offset is never more than 4 bytes by switching + * to a new log file before the maximum length imposed by an unsigned 4-byte + * offset is reached. + */ +struct __db_lsn { + u_int32_t file; /* File ID. */ + u_int32_t offset; /* File offset. */ +}; + +/* Log statistics structure. */ +struct __db_log_stat { + u_int32_t st_magic; /* Log file magic number. */ + u_int32_t st_version; /* Log file version number. */ + int st_mode; /* Log file mode. */ + u_int32_t st_lg_bsize; /* Log buffer size. */ + u_int32_t st_lg_max; /* Maximum log file size. */ + u_int32_t st_w_bytes; /* Bytes to log. */ + u_int32_t st_w_mbytes; /* Megabytes to log. */ + u_int32_t st_wc_bytes; /* Bytes to log since checkpoint. */ + u_int32_t st_wc_mbytes; /* Megabytes to log since checkpoint. */ + u_int32_t st_wcount; /* Total writes to the log. */ + u_int32_t st_wcount_fill; /* Overflow writes to the log. */ + u_int32_t st_scount; /* Total syncs to the log. */ + u_int32_t st_region_wait; /* Region lock granted after wait. */ + u_int32_t st_region_nowait; /* Region lock granted without wait. */ + u_int32_t st_cur_file; /* Current log file number. */ + u_int32_t st_cur_offset; /* Current log file offset. */ + u_int32_t st_regsize; /* Region size. */ +}; + +int log_archive __P((DB_ENV *, char **[], u_int32_t, void *(*)(size_t))); +int log_compare __P((const DB_LSN *, const DB_LSN *)); +int log_file __P((DB_ENV *, const DB_LSN *, char *, size_t)); +int log_flush __P((DB_ENV *, const DB_LSN *)); +int log_get __P((DB_ENV *, DB_LSN *, DBT *, u_int32_t)); +int log_put __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t)); +int log_register __P((DB_ENV *, DB *, const char *)); +int log_stat __P((DB_ENV *, DB_LOG_STAT **, void *(*)(size_t))); +int log_unregister __P((DB_ENV *, DB *)); + +/******************************************************* + * Mpool + *******************************************************/ +/* Flag values for memp_fget(). */ +#define DB_MPOOL_CREATE 0x001 /* Create a page. */ +#define DB_MPOOL_LAST 0x002 /* Return the last page. */ +#define DB_MPOOL_NEW 0x004 /* Create a new page. */ +#define DB_MPOOL_NEW_GROUP 0x008 /* Create a group of pages. */ +#define DB_MPOOL_EXTENT 0x010 /* Get for an extent. */ + +/* Flag values for memp_fput(), memp_fset(). */ +#define DB_MPOOL_CLEAN 0x001 /* Page is not modified. */ +#define DB_MPOOL_DIRTY 0x002 /* Page is modified. */ +#define DB_MPOOL_DISCARD 0x004 /* Don't cache the page. */ + +/* Mpool statistics structure. */ +struct __db_mpool_stat { + u_int32_t st_cache_hit; /* Pages found in the cache. */ + u_int32_t st_cache_miss; /* Pages not found in the cache. */ + u_int32_t st_map; /* Pages from mapped files. */ + u_int32_t st_page_create; /* Pages created in the cache. */ + u_int32_t st_page_in; /* Pages read in. */ + u_int32_t st_page_out; /* Pages written out. */ + u_int32_t st_ro_evict; /* Clean pages forced from the cache. */ + u_int32_t st_rw_evict; /* Dirty pages forced from the cache. */ + u_int32_t st_hash_buckets; /* Number of hash buckets. */ + u_int32_t st_hash_searches; /* Total hash chain searches. */ + u_int32_t st_hash_longest; /* Longest hash chain searched. */ + u_int32_t st_hash_examined; /* Total hash entries searched. */ + u_int32_t st_page_clean; /* Clean pages. */ + u_int32_t st_page_dirty; /* Dirty pages. */ + u_int32_t st_page_trickle; /* Pages written by memp_trickle. */ + u_int32_t st_region_wait; /* Region lock granted after wait. */ + u_int32_t st_region_nowait; /* Region lock granted without wait. */ + u_int32_t st_gbytes; /* Total cache size: GB. */ + u_int32_t st_bytes; /* Total cache size: B. */ + u_int32_t st_ncache; /* Number of caches. */ + u_int32_t st_regsize; /* Cache size. */ +}; + +/* Mpool file open information structure. */ +struct __db_mpool_finfo { + int ftype; /* File type. */ + DBT *pgcookie; /* Byte-string passed to pgin/pgout. */ + u_int8_t *fileid; /* Unique file ID. */ + int32_t lsn_offset; /* LSN offset in page. */ + u_int32_t clear_len; /* Cleared length on created pages. */ +}; + +/* Mpool file statistics structure. */ +struct __db_mpool_fstat { + char *file_name; /* File name. */ + size_t st_pagesize; /* Page size. */ + u_int32_t st_cache_hit; /* Pages found in the cache. */ + u_int32_t st_cache_miss; /* Pages not found in the cache. */ + u_int32_t st_map; /* Pages from mapped files. */ + u_int32_t st_page_create; /* Pages created in the cache. */ + u_int32_t st_page_in; /* Pages read in. */ + u_int32_t st_page_out; /* Pages written out. */ +}; + +int memp_fclose __P((DB_MPOOLFILE *)); +int memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, u_int32_t, void *)); +int memp_fopen __P((DB_ENV *, const char *, + u_int32_t, int, size_t, DB_MPOOL_FINFO *, DB_MPOOLFILE **)); +int memp_fput __P((DB_MPOOLFILE *, void *, u_int32_t)); +int memp_fset __P((DB_MPOOLFILE *, void *, u_int32_t)); +int memp_fsync __P((DB_MPOOLFILE *)); +int memp_register __P((DB_ENV *, int, + int (*)(DB_ENV *, db_pgno_t, void *, DBT *), + int (*)(DB_ENV *, db_pgno_t, void *, DBT *))); +int memp_stat __P((DB_ENV *, + DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, void *(*)(size_t))); +int memp_sync __P((DB_ENV *, DB_LSN *)); +int memp_trickle __P((DB_ENV *, int, int *)); + +/******************************************************* + * Transactions. + *******************************************************/ +#define DB_TXNVERSION 1 + +/* Operations values to the tx_recover() function. */ +#define DB_TXN_BACKWARD_ROLL 1 /* Read the log backwards. */ +#define DB_TXN_FORWARD_ROLL 2 /* Read the log forwards. */ +#define DB_TXN_OPENFILES 3 /* Read for open files. */ +#define DB_TXN_REDO 4 /* Redo the operation. */ +#define DB_TXN_UNDO 5 /* Undo the operation. */ + +/* Internal transaction status values. */ + +/* Transaction statistics structure. */ +struct __db_txn_active { + u_int32_t txnid; /* Transaction ID */ + u_int32_t parentid; /* Transaction ID of parent */ + DB_LSN lsn; /* Lsn of the begin record */ +}; + +struct __db_txn_stat { + DB_LSN st_last_ckp; /* lsn of the last checkpoint */ + DB_LSN st_pending_ckp; /* last checkpoint did not finish */ + time_t st_time_ckp; /* time of last checkpoint */ + u_int32_t st_last_txnid; /* last transaction id given out */ + u_int32_t st_maxtxns; /* maximum txns possible */ + u_int32_t st_naborts; /* number of aborted transactions */ + u_int32_t st_nbegins; /* number of begun transactions */ + u_int32_t st_ncommits; /* number of committed transactions */ + u_int32_t st_nactive; /* number of active transactions */ + u_int32_t st_maxnactive; /* maximum active transactions */ + DB_TXN_ACTIVE + *st_txnarray; /* array of active transactions */ + u_int32_t st_region_wait; /* Region lock granted after wait. */ + u_int32_t st_region_nowait; /* Region lock granted without wait. */ + u_int32_t st_regsize; /* Region size. */ +}; + +int txn_abort __P((DB_TXN *)); +int txn_begin __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t)); +int txn_checkpoint __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t)); +int txn_commit __P((DB_TXN *, u_int32_t)); +u_int32_t txn_id __P((DB_TXN *)); +int txn_prepare __P((DB_TXN *)); +int txn_stat __P((DB_ENV *, DB_TXN_STAT **, void *(*)(size_t))); + +#ifndef DB_DBM_HSEARCH +#define DB_DBM_HSEARCH 0 /* No historic interfaces by default. */ +#endif +#if DB_DBM_HSEARCH != 0 +/******************************************************* + * Dbm/Ndbm historic interfaces. + *******************************************************/ +#define DBM_INSERT 0 /* Flags to dbm_store(). */ +#define DBM_REPLACE 1 + +/* + * The DB support for ndbm(3) always appends this suffix to the + * file name to avoid overwriting the user's original database. + */ +#define DBM_SUFFIX ".db" + +#if defined(_XPG4_2) +typedef struct { + char *dptr; + size_t dsize; +} datum; +#else +typedef struct { + char *dptr; + int dsize; +} datum; +#endif + +/* + * Translate DBM calls into DB calls so that DB doesn't step on the + * application's name space. + * + * The global variables dbrdonly, dirf and pagf were not retained when 4BSD + * replaced the dbm interface with ndbm, and are not supported here. + */ +#define dbminit(a) __db_dbm_init(a) +#define dbmclose __db_dbm_close +#if !defined(__cplusplus) +#define delete(a) __db_dbm_delete(a) +#endif +#define fetch(a) __db_dbm_fetch(a) +#define firstkey __db_dbm_firstkey +#define nextkey(a) __db_dbm_nextkey(a) +#define store(a, b) __db_dbm_store(a, b) + +/* Prototype the DB calls. */ +int __db_dbm_close __P((void)); +int __db_dbm_dbrdonly __P((void)); +int __db_dbm_delete __P((datum)); +int __db_dbm_dirf __P((void)); +datum __db_dbm_fetch __P((datum)); +datum __db_dbm_firstkey __P((void)); +int __db_dbm_init __P((char *)); +datum __db_dbm_nextkey __P((datum)); +int __db_dbm_pagf __P((void)); +int __db_dbm_store __P((datum, datum)); + +/* + * Translate NDBM calls into DB calls so that DB doesn't step on the + * application's name space. + */ +#define dbm_clearerr(a) __db_ndbm_clearerr(a) +#define dbm_close(a) __db_ndbm_close(a) +#define dbm_delete(a, b) __db_ndbm_delete(a, b) +#define dbm_dirfno(a) __db_ndbm_dirfno(a) +#define dbm_error(a) __db_ndbm_error(a) +#define dbm_fetch(a, b) __db_ndbm_fetch(a, b) +#define dbm_firstkey(a) __db_ndbm_firstkey(a) +#define dbm_nextkey(a) __db_ndbm_nextkey(a) +#define dbm_open(a, b, c) __db_ndbm_open(a, b, c) +#define dbm_pagfno(a) __db_ndbm_pagfno(a) +#define dbm_rdonly(a) __db_ndbm_rdonly(a) +#define dbm_store(a, b, c, d) __db_ndbm_store(a, b, c, d) + +/* Prototype the DB calls. */ +int __db_ndbm_clearerr __P((DBM *)); +void __db_ndbm_close __P((DBM *)); +int __db_ndbm_delete __P((DBM *, datum)); +int __db_ndbm_dirfno __P((DBM *)); +int __db_ndbm_error __P((DBM *)); +datum __db_ndbm_fetch __P((DBM *, datum)); +datum __db_ndbm_firstkey __P((DBM *)); +datum __db_ndbm_nextkey __P((DBM *)); +DBM *__db_ndbm_open __P((const char *, int, int)); +int __db_ndbm_pagfno __P((DBM *)); +int __db_ndbm_rdonly __P((DBM *)); +int __db_ndbm_store __P((DBM *, datum, datum, int)); + +/******************************************************* + * Hsearch historic interface. + *******************************************************/ +typedef enum { + FIND, ENTER +} ACTION; + +typedef struct entry { + char *key; + char *data; +} ENTRY; + +/* + * Translate HSEARCH calls into DB calls so that DB doesn't step on the + * application's name space. + */ +#define hcreate(a) __db_hcreate(a) +#define hdestroy __db_hdestroy +#define hsearch(a, b) __db_hsearch(a, b) + +/* Prototype the DB calls. */ +int __db_hcreate __P((size_t)); +void __db_hdestroy __P((void)); +ENTRY *__db_hsearch __P((ENTRY, ACTION)); +#endif /* DB_DBM_HSEARCH */ + +/* + * XXX + * MacOS: Reset Metrowerks C enum sizes. + */ +#ifdef __MWERKS__ +#pragma enumsalwaysint reset +#endif + +#if defined(__cplusplus) +} +#endif + +#endif /* !_DB_H_ */ diff --git a/bdb/include/db_185.h b/bdb/include/db_185.h new file mode 100644 index 00000000000..e50ebb0adb8 --- /dev/null +++ b/bdb/include/db_185.h @@ -0,0 +1,175 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: db_185.h,v 11.4 2000/02/14 02:59:54 bostic Exp $ + */ + +#ifndef _DB_185_H_ +#define _DB_185_H_ + +#include <sys/types.h> + +#include <limits.h> + +/* + * XXX + * Handle function prototypes and the keyword "const". This steps on name + * space that DB doesn't control, but all of the other solutions are worse. + */ +#undef __P +#if defined(__STDC__) || defined(__cplusplus) +#define __P(protos) protos /* ANSI C prototypes */ +#else +#define const +#define __P(protos) () /* K&R C preprocessor */ +#endif + +#define RET_ERROR -1 /* Return values. */ +#define RET_SUCCESS 0 +#define RET_SPECIAL 1 + +#ifndef __BIT_TYPES_DEFINED__ +#define __BIT_TYPES_DEFINED__ +@u_int8_decl@ +@int16_decl@ +@u_int16_decl@ +@int32_decl@ +@u_int32_decl@ +#endif + +/* + * XXX + * SGI/IRIX already has a pgno_t. + */ +#ifdef sgi +#define pgno_t db_pgno_t +#endif + +#define MAX_PAGE_NUMBER 0xffffffff /* >= # of pages in a file */ +typedef u_int32_t pgno_t; +#define MAX_PAGE_OFFSET 65535 /* >= # of bytes in a page */ +typedef u_int16_t indx_t; +#define MAX_REC_NUMBER 0xffffffff /* >= # of records in a tree */ +typedef u_int32_t recno_t; + +/* Key/data structure -- a Data-Base Thang. */ +typedef struct { + void *data; /* data */ + size_t size; /* data length */ +} DBT; + +/* Routine flags. */ +#define R_CURSOR 1 /* del, put, seq */ +#define __R_UNUSED 2 /* UNUSED */ +#define R_FIRST 3 /* seq */ +#define R_IAFTER 4 /* put (RECNO) */ +#define R_IBEFORE 5 /* put (RECNO) */ +#define R_LAST 6 /* seq (BTREE, RECNO) */ +#define R_NEXT 7 /* seq */ +#define R_NOOVERWRITE 8 /* put */ +#define R_PREV 9 /* seq (BTREE, RECNO) */ +#define R_SETCURSOR 10 /* put (RECNO) */ +#define R_RECNOSYNC 11 /* sync (RECNO) */ + +typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE; + +/* Access method description structure. */ +typedef struct __db { + DBTYPE type; /* Underlying db type. */ + int (*close) __P((struct __db *)); + int (*del) __P((const struct __db *, const DBT *, u_int)); + int (*get) __P((const struct __db *, const DBT *, DBT *, u_int)); + int (*put) __P((const struct __db *, DBT *, const DBT *, u_int)); + int (*seq) __P((const struct __db *, DBT *, DBT *, u_int)); + int (*sync) __P((const struct __db *, u_int)); + void *internal; /* Access method private. */ + int (*fd) __P((const struct __db *)); +} DB; + +#define BTREEMAGIC 0x053162 +#define BTREEVERSION 3 + +/* Structure used to pass parameters to the btree routines. */ +typedef struct { +#define R_DUP 0x01 /* duplicate keys */ + u_int32_t flags; + u_int32_t cachesize; /* bytes to cache */ + u_int32_t maxkeypage; /* maximum keys per page */ + u_int32_t minkeypage; /* minimum keys per page */ + u_int32_t psize; /* page size */ + int (*compare) /* comparison function */ + __P((const DBT *, const DBT *)); + size_t (*prefix) /* prefix function */ + __P((const DBT *, const DBT *)); + int lorder; /* byte order */ +} BTREEINFO; + +#define HASHMAGIC 0x061561 +#define HASHVERSION 2 + +/* Structure used to pass parameters to the hashing routines. */ +typedef struct { + u_int32_t bsize; /* bucket size */ + u_int32_t ffactor; /* fill factor */ + u_int32_t nelem; /* number of elements */ + u_int32_t cachesize; /* bytes to cache */ + u_int32_t /* hash function */ + (*hash) __P((const void *, size_t)); + int lorder; /* byte order */ +} HASHINFO; + +/* Structure used to pass parameters to the record routines. */ +typedef struct { +#define R_FIXEDLEN 0x01 /* fixed-length records */ +#define R_NOKEY 0x02 /* key not required */ +#define R_SNAPSHOT 0x04 /* snapshot the input */ + u_int32_t flags; + u_int32_t cachesize; /* bytes to cache */ + u_int32_t psize; /* page size */ + int lorder; /* byte order */ + size_t reclen; /* record length (fixed-length records) */ + u_char bval; /* delimiting byte (variable-length records */ + char *bfname; /* btree file name */ +} RECNOINFO; + +#if defined(__cplusplus) +extern "C" { +#endif +#define dbopen __db185_open +DB *__db185_open __P((const char *, int, int, DBTYPE, const void *)); + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_185_H_ */ diff --git a/bdb/include/db_am.h b/bdb/include/db_am.h new file mode 100644 index 00000000000..3a41eb3bbfd --- /dev/null +++ b/bdb/include/db_am.h @@ -0,0 +1,131 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: db_am.h,v 11.21 2000/12/12 17:43:56 bostic Exp $ + */ +#ifndef _DB_AM_H_ +#define _DB_AM_H_ + +#define DB_MINPAGECACHE 10 /* Min pages access methods cache. */ + +/* DB recovery operation codes. The low bits used to have flags or'd in. */ +#define DB_ADD_DUP 0x10 +#define DB_REM_DUP 0x20 +#define DB_ADD_BIG 0x30 +#define DB_REM_BIG 0x40 +#define DB_UNUSED_1 0x50 +#define DB_UNUSED_2 0x60 +#define DB_ADD_PAGE 0x70 +#define DB_REM_PAGE 0x80 + +/* + * This is a grotesque naming hack. We have modified the btree page + * allocation and freeing functions to be generic and have therefore + * moved them into the access-method independent portion of the code. + * However, since we didn't want to create new log records and routines + * for them, we left their logging and recovery functions over in btree. + * To make the code look prettier, we macro them, but this is sure to + * confuse the heck out of everyone. + */ +#define __db_pg_alloc_log __bam_pg_alloc_log +#define __db_pg_free_log __bam_pg_free_log + +/* + * Standard initialization and shutdown macros for all recovery functions. + * + * Requires the following local variables: + * + * DB *file_dbp; + * DB_MPOOLFILE *mpf; + * int ret; + */ +#define REC_INTRO(func, inc_count) { \ + file_dbp = NULL; \ + dbc = NULL; \ + if ((ret = func(dbenv, dbtp->data, &argp)) != 0) \ + goto out; \ + if ((ret = __db_fileid_to_db(dbenv, \ + &file_dbp, argp->fileid, inc_count)) != 0) { \ + if (ret == DB_DELETED) { \ + ret = 0; \ + goto done; \ + } \ + goto out; \ + } \ + if (file_dbp == NULL) \ + goto out; \ + if ((ret = file_dbp->cursor(file_dbp, NULL, &dbc, 0)) != 0) \ + goto out; \ + F_SET(dbc, DBC_RECOVER); \ + mpf = file_dbp->mpf; \ +} + +#define REC_CLOSE { \ + int __t_ret; \ + if (argp != NULL) \ + __os_free(argp, sizeof(*argp)); \ + if (dbc != NULL && (__t_ret = dbc->c_close(dbc)) != 0 && ret == 0) \ + return (__t_ret); \ + return (ret); \ +} + +/* + * No-op versions of the same macros. + */ +#define REC_NOOP_INTRO(func) { \ + if ((ret = func(dbenv, dbtp->data, &argp)) != 0) \ + return (ret); \ +} +#define REC_NOOP_CLOSE \ + if (argp != NULL) \ + __os_free(argp, sizeof(*argp)); \ + return (ret); \ + +/* + * Standard debugging macro for all recovery functions. + */ +#ifdef DEBUG_RECOVER +#define REC_PRINT(func) \ + (void)func(dbenv, dbtp, lsnp, op, info); +#else +#define REC_PRINT(func) +#endif + +/* + * Flags to __db_lget + */ +#define LCK_COUPLE 0x01 /* Lock Couple */ +#define LCK_ALWAYS 0x02 /* Lock even for off page dup cursors */ +#define LCK_ROLLBACK 0x04 /* Lock even if in rollback */ + +/* + * If doing transactions we have to hold the locks associated with a data item + * from a page for the entire transaction. However, we don't have to hold the + * locks associated with walking the tree. Distinguish between the two so that + * we don't tie up the internal pages of the tree longer than necessary. + */ +#define __LPUT(dbc, lock) \ + (lock.off != LOCK_INVALID ? \ + lock_put((dbc)->dbp->dbenv, &(lock)) : 0) +#define __TLPUT(dbc, lock) \ + (lock.off != LOCK_INVALID && \ + (dbc)->txn == NULL ? lock_put((dbc)->dbp->dbenv, &(lock)) : 0) + +#ifdef DIAGNOSTIC +#define DB_CHECK_TXN(dbp, txn) \ + if (txn != NULL) \ + F_SET(dbp, DB_AM_TXN); \ + else if (F_ISSET(dbp, DB_AM_TXN)) \ + return (__db_missing_txn_err((dbp)->dbenv)); +#else +#define DB_CHECK_TXN(dbp, txn) +#endif + +#include "db_dispatch.h" +#include "db_auto.h" +#include "crdel_auto.h" +#include "db_ext.h" +#endif diff --git a/bdb/include/db_auto.h b/bdb/include/db_auto.h new file mode 100644 index 00000000000..88bf7419bea --- /dev/null +++ b/bdb/include/db_auto.h @@ -0,0 +1,140 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef db_AUTO_H +#define db_AUTO_H + +#define DB_db_addrem 41 +typedef struct _db_addrem_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t pgno; + u_int32_t indx; + size_t nbytes; + DBT hdr; + DBT dbt; + DB_LSN pagelsn; +} __db_addrem_args; + +int __db_addrem_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, int32_t, db_pgno_t, u_int32_t, size_t, const DBT *, const DBT *, DB_LSN *)); +int __db_addrem_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_addrem_read __P((DB_ENV *, void *, __db_addrem_args **)); + +#define DB_db_split 42 +typedef struct _db_split_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t pgno; + DBT pageimage; + DB_LSN pagelsn; +} __db_split_args; + +int __db_split_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_split_read __P((DB_ENV *, void *, __db_split_args **)); + +#define DB_db_big 43 +typedef struct _db_big_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t pgno; + db_pgno_t prev_pgno; + db_pgno_t next_pgno; + DBT dbt; + DB_LSN pagelsn; + DB_LSN prevlsn; + DB_LSN nextlsn; +} __db_big_args; + +int __db_big_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, int32_t, db_pgno_t, db_pgno_t, db_pgno_t, const DBT *, DB_LSN *, DB_LSN *, DB_LSN *)); +int __db_big_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_big_read __P((DB_ENV *, void *, __db_big_args **)); + +#define DB_db_ovref 44 +typedef struct _db_ovref_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + int32_t adjust; + DB_LSN lsn; +} __db_ovref_args; + +int __db_ovref_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_pgno_t, int32_t, DB_LSN *)); +int __db_ovref_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_ovref_read __P((DB_ENV *, void *, __db_ovref_args **)); + +#define DB_db_relink 45 +typedef struct _db_relink_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + db_pgno_t prev; + DB_LSN lsn_prev; + db_pgno_t next; + DB_LSN lsn_next; +} __db_relink_args; + +int __db_relink_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, int32_t, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *)); +int __db_relink_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_relink_read __P((DB_ENV *, void *, __db_relink_args **)); + +#define DB_db_addpage 46 +typedef struct _db_addpage_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + db_pgno_t nextpgno; + DB_LSN nextlsn; +} __db_addpage_args; + +int __db_addpage_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_addpage_read __P((DB_ENV *, void *, __db_addpage_args **)); + +#define DB_db_debug 47 +typedef struct _db_debug_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + DBT op; + int32_t fileid; + DBT key; + DBT data; + u_int32_t arg_flags; +} __db_debug_args; + +int __db_debug_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, const DBT *, int32_t, const DBT *, const DBT *, u_int32_t)); +int __db_debug_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_debug_read __P((DB_ENV *, void *, __db_debug_args **)); + +#define DB_db_noop 48 +typedef struct _db_noop_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN prevlsn; +} __db_noop_args; + +int __db_noop_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_pgno_t, DB_LSN *)); +int __db_noop_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_noop_read __P((DB_ENV *, void *, __db_noop_args **)); +int __db_init_print __P((DB_ENV *)); +int __db_init_recover __P((DB_ENV *)); +#endif diff --git a/bdb/include/db_cxx.h b/bdb/include/db_cxx.h new file mode 100644 index 00000000000..b5599ee699c --- /dev/null +++ b/bdb/include/db_cxx.h @@ -0,0 +1,652 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: db_cxx.h,v 11.44 2000/12/21 20:30:18 dda Exp $ + */ + +#ifndef _DB_CXX_H_ +#define _DB_CXX_H_ +// +// C++ assumptions: +// +// To ensure portability to many platforms, both new and old, we make +// few assumptions about the C++ compiler and library. For example, +// we do not expect STL, templates or namespaces to be available. The +// "newest" C++ feature used is exceptions, which are used liberally +// to transmit error information. Even the use of exceptions can be +// disabled at runtime, to do so, use the DB_CXX_NO_EXCEPTIONS flags +// with the DbEnv or Db constructor. +// +// C++ naming conventions: +// +// - All top level class names start with Db. +// - All class members start with lower case letter. +// - All private data members are suffixed with underscore. +// - Use underscores to divide names into multiple words. +// - Simple data accessors are named with get_ or set_ prefix. +// - All method names are taken from names of functions in the C +// layer of db (usually by dropping a prefix like "db_"). +// These methods have the same argument types and order, +// other than dropping the explicit arg that acts as "this". +// +// As a rule, each DbFoo object has exactly one underlying DB_FOO struct +// (defined in db.h) associated with it. In some cases, we inherit directly +// from the DB_FOO structure to make this relationship explicit. Often, +// the underlying C layer allocates and deallocates these structures, so +// there is no easy way to add any data to the DbFoo class. When you see +// a comment about whether data is permitted to be added, this is what +// is going on. Of course, if we need to add data to such C++ classes +// in the future, we will arrange to have an indirect pointer to the +// DB_FOO struct (as some of the classes already have). +// + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Forward declarations +// + +#include <iostream.h> +#include <stdarg.h> +#include "db.h" + +class Db; // forward +class Dbc; // forward +class DbEnv; // forward +class DbException; // forward +class DbInfo; // forward +class DbLock; // forward +class DbLsn; // forward +class DbMpoolFile; // forward +class Dbt; // forward +class DbTxn; // forward + +// These classes are not defined here and should be invisible +// to the user, but some compilers require forward references. +// There is one for each use of the DEFINE_DB_CLASS macro. + +class DbImp; +class DbEnvImp; +class DbMpoolFileImp; +class DbTxnImp; + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Mechanisms for declaring classes +// + +// +// Every class defined in this file has an _exported next to the class name. +// This is needed for WinTel machines so that the class methods can +// be exported or imported in a DLL as appropriate. Users of the DLL +// use the define DB_USE_DLL. When the DLL is built, DB_CREATE_DLL +// must be defined. +// +#if defined(_MSC_VER) + +# if defined(DB_CREATE_DLL) +# define _exported __declspec(dllexport) // creator of dll +# elif defined(DB_USE_DLL) +# define _exported __declspec(dllimport) // user of dll +# else +# define _exported // static lib creator or user +# endif + +#else + +# define _exported + +#endif + +// DEFINE_DB_CLASS defines an imp_ data member and imp() accessor. +// The underlying type is a pointer to an opaque *Imp class, that +// gets converted to the correct implementation class by the implementation. +// +// Since these defines use "private/public" labels, and leave the access +// being "private", we always use these by convention before any data +// members in the private section of a class. Keeping them in the +// private section also emphasizes that they are off limits to user code. +// +#define DEFINE_DB_CLASS(name) \ + public: class name##Imp* imp() { return (imp_); } \ + public: const class name##Imp* constimp() const { return (imp_); } \ + private: class name##Imp* imp_ + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Turn off inappropriate compiler warnings +// + +#ifdef _MSC_VER + +// These are level 4 warnings that are explicitly disabled. +// With Visual C++, by default you do not see above level 3 unless +// you use /W4. But we like to compile with the highest level +// warnings to catch other errors. +// +// 4201: nameless struct/union +// triggered by standard include file <winnt.h> +// +// 4514: unreferenced inline function has been removed +// certain include files in MSVC define methods that are not called +// +#pragma warning(disable: 4201 4514) + +#endif + +// Some interfaces can be customized by allowing users +// to define callback functions. For performance and +// logistical reasons, some callbacks require you do +// declare the functions in C, or in an extern "C" block. +// +extern "C" { + typedef void * (*db_malloc_fcn_type) + (size_t); + typedef void * (*db_realloc_fcn_type) + (void *, size_t); + typedef int (*bt_compare_fcn_type) + (DB *, const DBT *, const DBT *); + typedef size_t (*bt_prefix_fcn_type) + (DB *, const DBT *, const DBT *); + typedef int (*dup_compare_fcn_type) + (DB *, const DBT *, const DBT *); + typedef u_int32_t (*h_hash_fcn_type) + (DB *, const void *, u_int32_t); + typedef int (*pgin_fcn_type)(DB_ENV *dbenv, + db_pgno_t pgno, void *pgaddr, DBT *pgcookie); + typedef int (*pgout_fcn_type)(DB_ENV *dbenv, + db_pgno_t pgno, void *pgaddr, DBT *pgcookie); +}; + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Exception classes +// + +// Almost any error in the DB library throws a DbException. +// Every exception should be considered an abnormality +// (e.g. bug, misuse of DB, file system error). +// +// NOTE: We would like to inherit from class exception and +// let it handle what(), but there are +// MSVC++ problems when <exception> is included. +// +class _exported DbException +{ +public: + virtual ~DbException(); + DbException(int err); + DbException(const char *description); + DbException(const char *prefix, int err); + DbException(const char *prefix1, const char *prefix2, int err); + int get_errno() const; + virtual const char *what() const; + + DbException(const DbException &); + DbException &operator = (const DbException &); + +private: + char *what_; + int err_; // errno +}; + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Lock classes +// + +class _exported DbLock +{ + friend class DbEnv; + +public: + DbLock(); + + int put(DbEnv *env); + + DbLock(const DbLock &); + DbLock &operator = (const DbLock &); + +protected: + // We can add data to this class if needed + // since its contained class is not allocated by db. + // (see comment at top) + + DbLock(DB_LOCK); + DB_LOCK lock_; +}; + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Log classes +// + +class _exported DbLsn : protected DB_LSN +{ + friend class DbEnv; // friendship needed to cast to base class +}; + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Memory pool classes +// + +class _exported DbMpoolFile +{ + friend class DbEnv; + +public: + int close(); + int get(db_pgno_t *pgnoaddr, u_int32_t flags, void *pagep); + int put(void *pgaddr, u_int32_t flags); + int set(void *pgaddr, u_int32_t flags); + int sync(); + + static int open(DbEnv *envp, const char *file, + u_int32_t flags, int mode, size_t pagesize, + DB_MPOOL_FINFO *finfop, DbMpoolFile **mpf); + +private: + // We can add data to this class if needed + // since it is implemented via a pointer. + // (see comment at top) + + // Note: use DbMpoolFile::open() + // to get pointers to a DbMpoolFile, + // and call DbMpoolFile::close() rather than delete to release them. + // + DbMpoolFile(); + + // Shut g++ up. +protected: + ~DbMpoolFile(); + +private: + // no copying + DbMpoolFile(const DbMpoolFile &); + void operator = (const DbMpoolFile &); + + DEFINE_DB_CLASS(DbMpoolFile); +}; + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Transaction classes +// + +class _exported DbTxn +{ + friend class DbEnv; + +public: + int abort(); + int commit(u_int32_t flags); + u_int32_t id(); + int prepare(); + +private: + // We can add data to this class if needed + // since it is implemented via a pointer. + // (see comment at top) + + // Note: use DbEnv::txn_begin() to get pointers to a DbTxn, + // and call DbTxn::abort() or DbTxn::commit rather than + // delete to release them. + // + DbTxn(); + ~DbTxn(); + + // no copying + DbTxn(const DbTxn &); + void operator = (const DbTxn &); + + DEFINE_DB_CLASS(DbTxn); +}; + +// +// Berkeley DB environment class. Provides functions for opening databases. +// User of this library can use this class as a starting point for +// developing a DB application - derive their application class from +// this one, add application control logic. +// +// Note that if you use the default constructor, you must explicitly +// call appinit() before any other db activity (e.g. opening files) +// +class _exported DbEnv +{ + friend class Db; + friend class DbLock; + friend class DbMpoolFile; + +public: + + ~DbEnv(); + + // After using this constructor, you can set any needed + // parameters for the environment using the set_* methods. + // Then call open() to finish initializing the environment + // and attaching it to underlying files. + // + DbEnv(u_int32_t flags); + + // These methods match those in the C interface. + // + int close(u_int32_t); + void err(int, const char *, ...); + void errx(const char *, ...); + int open(const char *, u_int32_t, int); + int remove(const char *, u_int32_t); + int set_cachesize(u_int32_t, u_int32_t, int); + int set_data_dir(const char *); + void set_errcall(void (*)(const char *, char *)); + void set_errfile(FILE *); + void set_errpfx(const char *); + int set_flags(u_int32_t, int); + int set_feedback(void (*)(DbEnv *, int, int)); + int set_recovery_init(int (*)(DbEnv *)); + int set_lg_bsize(u_int32_t); + int set_lg_dir(const char *); + int set_lg_max(u_int32_t); + int set_lk_conflicts(u_int8_t *, int); + int set_lk_detect(u_int32_t); + int set_lk_max(u_int32_t); + int set_lk_max_lockers(u_int32_t); + int set_lk_max_locks(u_int32_t); + int set_lk_max_objects(u_int32_t); + int set_mp_mmapsize(size_t); + int set_mutexlocks(int); + static int set_pageyield(int); + int set_paniccall(void (*)(DbEnv *, int)); + static int set_panicstate(int); + static int set_region_init(int); + int set_server(char *, long, long, u_int32_t); + int set_shm_key(long); + int set_tmp_dir(const char *); + static int set_tas_spins(u_int32_t); + int set_tx_max(u_int32_t); + int set_tx_recover(int (*)(DbEnv *, Dbt *, DbLsn *, db_recops)); + int set_tx_timestamp(time_t *); + int set_verbose(u_int32_t which, int onoff); + + // Version information. A static method so it can be obtained anytime. + // + static char *version(int *major, int *minor, int *patch); + + // Convert DB errors to strings + static char *strerror(int); + + // If an error is detected and the error call function + // or stream is set, a message is dispatched or printed. + // If a prefix is set, each message is prefixed. + // + // You can use set_errcall() or set_errfile() above to control + // error functionality. Alternatively, you can call + // set_error_stream() to force all errors to a C++ stream. + // It is unwise to mix these approaches. + // + void set_error_stream(ostream *); + + // used internally + static void runtime_error(const char *caller, int err, + int error_policy); + + // Lock functions + // + int lock_detect(u_int32_t flags, u_int32_t atype, int *aborted); + int lock_get(u_int32_t locker, u_int32_t flags, const Dbt *obj, + db_lockmode_t lock_mode, DbLock *lock); + int lock_id(u_int32_t *idp); + int lock_stat(DB_LOCK_STAT **statp, db_malloc_fcn_type db_malloc_fcn); + int lock_vec(u_int32_t locker, u_int32_t flags, DB_LOCKREQ list[], + int nlist, DB_LOCKREQ **elistp); + + // Log functions + // + int log_archive(char **list[], u_int32_t flags, db_malloc_fcn_type db_malloc_fcn); + static int log_compare(const DbLsn *lsn0, const DbLsn *lsn1); + int log_file(DbLsn *lsn, char *namep, size_t len); + int log_flush(const DbLsn *lsn); + int log_get(DbLsn *lsn, Dbt *data, u_int32_t flags); + int log_put(DbLsn *lsn, const Dbt *data, u_int32_t flags); + + int log_register(Db *dbp, const char *name); + int log_stat(DB_LOG_STAT **spp, db_malloc_fcn_type db_malloc_fcn); + int log_unregister(Db *dbp); + + // Mpool functions + // + int memp_register(int ftype, + pgin_fcn_type pgin_fcn, + pgout_fcn_type pgout_fcn); + + int memp_stat(DB_MPOOL_STAT **gsp, DB_MPOOL_FSTAT ***fsp, + db_malloc_fcn_type db_malloc_fcn); + int memp_sync(DbLsn *lsn); + int memp_trickle(int pct, int *nwrotep); + + // Transaction functions + // + int txn_begin(DbTxn *pid, DbTxn **tid, u_int32_t flags); + int txn_checkpoint(u_int32_t kbyte, u_int32_t min, u_int32_t flags); + int txn_stat(DB_TXN_STAT **statp, db_malloc_fcn_type db_malloc_fcn); + + // These are public only because they need to be called + // via C functions. They should never be called by users + // of this class. + // + static void _stream_error_function(const char *, char *); + static int _tx_recover_intercept(DB_ENV *env, DBT *dbt, DB_LSN *lsn, + db_recops op); + static void _paniccall_intercept(DB_ENV *env, int errval); + static int _recovery_init_intercept(DB_ENV *env); + static void _feedback_intercept(DB_ENV *env, int opcode, int pct); + static void _destroy_check(const char *str, int isDbEnv); + +private: + void cleanup(); + int initialize(DB_ENV *env); + int error_policy(); + + // Used internally + DbEnv(DB_ENV *, u_int32_t flags); + + // no copying + DbEnv(const DbEnv &); + void operator = (const DbEnv &); + + DEFINE_DB_CLASS(DbEnv); + + // instance data + int construct_error_; + u_int32_t construct_flags_; + Db *headdb_; + Db *taildb_; + int (*tx_recover_callback_)(DbEnv *, Dbt *, DbLsn *, db_recops); + int (*recovery_init_callback_)(DbEnv *); + void (*paniccall_callback_)(DbEnv *, int); + void (*feedback_callback_)(DbEnv *, int, int); + + // class data + static ostream *error_stream_; +}; + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Table access classes +// + +// +// Represents a database table = a set of keys with associated values. +// +class _exported Db +{ + friend class DbEnv; + +public: + Db(DbEnv*, u_int32_t); // create a Db object, then call open() + ~Db(); // does *not* call close. + + // These methods exactly match those in the C interface. + // + int close(u_int32_t flags); + int cursor(DbTxn *txnid, Dbc **cursorp, u_int32_t flags); + int del(DbTxn *txnid, Dbt *key, u_int32_t flags); + void err(int, const char *, ...); + void errx(const char *, ...); + int fd(int *fdp); + int get(DbTxn *txnid, Dbt *key, Dbt *data, u_int32_t flags); + int get_byteswapped() const; + DBTYPE get_type() const; + int join(Dbc **curslist, Dbc **dbcp, u_int32_t flags); + int key_range(DbTxn *, Dbt *, DB_KEY_RANGE *, u_int32_t); + int open(const char *, const char *subname, DBTYPE, u_int32_t, int); + int put(DbTxn *, Dbt *, Dbt *, u_int32_t); + int remove(const char *, const char *, u_int32_t); + int rename(const char *, const char *, const char *, u_int32_t); + int set_bt_compare(bt_compare_fcn_type); + int set_bt_maxkey(u_int32_t); + int set_bt_minkey(u_int32_t); + int set_bt_prefix(bt_prefix_fcn_type); + int set_cachesize(u_int32_t, u_int32_t, int); + int set_dup_compare(dup_compare_fcn_type); + void set_errcall(void (*)(const char *, char *)); + void set_errfile(FILE *); + void set_errpfx(const char *); + int set_append_recno(int (*)(Db *, Dbt *, db_recno_t)); + int set_feedback(void (*)(Db *, int, int)); + int set_flags(u_int32_t); + int set_h_ffactor(u_int32_t); + int set_h_hash(h_hash_fcn_type); + int set_h_nelem(u_int32_t); + int set_lorder(int); + int set_malloc(db_malloc_fcn_type); + int set_pagesize(u_int32_t); + int set_paniccall(void (*)(DbEnv *, int)); + int set_realloc(db_realloc_fcn_type); + int set_re_delim(int); + int set_re_len(u_int32_t); + int set_re_pad(int); + int set_re_source(char *); + int set_q_extentsize(u_int32_t); + int stat(void *sp, db_malloc_fcn_type db_malloc_fcn, u_int32_t flags); + int sync(u_int32_t flags); + int upgrade(const char *name, u_int32_t flags); + int verify(const char *, const char *, ostream *, u_int32_t); + + // This additional method is available for C++ + // + void set_error_stream(ostream *); + + // These are public only because it needs to be called + // via C functions. It should never be called by users + // of this class. + // + static void _feedback_intercept(DB *db, int opcode, int pct); + static int _append_recno_intercept(DB *db, DBT *data, db_recno_t recno); +private: + + // no copying + Db(const Db &); + Db &operator = (const Db &); + + DEFINE_DB_CLASS(Db); + + void cleanup(); + int initialize(); + int error_policy(); + + // instance data + DbEnv *env_; + Db *next_; + Db *prev_; + int construct_error_; + u_int32_t flags_; + u_int32_t construct_flags_; + void (*feedback_callback_)(Db *, int, int); + int (*append_recno_callback_)(Db *, Dbt *, db_recno_t); +}; + +// +// A chunk of data, maybe a key or value. +// +class _exported Dbt : private DBT +{ + friend class Dbc; + friend class Db; + friend class DbEnv; + +public: + + // key/data + void *get_data() const; + void set_data(void *); + + // key/data length + u_int32_t get_size() const; + void set_size(u_int32_t); + + // RO: length of user buffer. + u_int32_t get_ulen() const; + void set_ulen(u_int32_t); + + // RO: get/put record length. + u_int32_t get_dlen() const; + void set_dlen(u_int32_t); + + // RO: get/put record offset. + u_int32_t get_doff() const; + void set_doff(u_int32_t); + + // flags + u_int32_t get_flags() const; + void set_flags(u_int32_t); + + Dbt(void *data, size_t size); + Dbt(); + ~Dbt(); + Dbt(const Dbt &); + Dbt &operator = (const Dbt &); + +private: + // We can add data to this class if needed + // since parent class is not allocated by db. + // (see comment at top) +}; + +class _exported Dbc : protected DBC +{ + friend class Db; + +public: + int close(); + int count(db_recno_t *countp, u_int32_t flags); + int del(u_int32_t flags); + int dup(Dbc** cursorp, u_int32_t flags); + int get(Dbt* key, Dbt *data, u_int32_t flags); + int put(Dbt* key, Dbt *data, u_int32_t flags); + +private: + // No data is permitted in this class (see comment at top) + + // Note: use Db::cursor() to get pointers to a Dbc, + // and call Dbc::close() rather than delete to release them. + // + Dbc(); + ~Dbc(); + + // no copying + Dbc(const Dbc &); + Dbc &operator = (const Dbc &); +}; +#endif /* !_DB_CXX_H_ */ diff --git a/bdb/include/db_dispatch.h b/bdb/include/db_dispatch.h new file mode 100644 index 00000000000..003acee6f65 --- /dev/null +++ b/bdb/include/db_dispatch.h @@ -0,0 +1,95 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: db_dispatch.h,v 11.17 2000/12/14 07:39:13 ubell Exp $ + */ + +#ifndef _DB_DISPATCH_H_ +#define _DB_DISPATCH_H_ + +/* + * Declarations and typedefs for the list of transaction IDs used during + * recovery. This is a generic list used to pass along whatever information + * we need during recovery. + */ +struct __db_txnhead { + LIST_HEAD(__db_headlink, __db_txnlist) head; + u_int32_t maxid; + int32_t generation; +}; + +#define TXNLIST_INVALID_ID 0xffffffff +struct __db_txnlist { + db_txnlist_type type; + LIST_ENTRY(__db_txnlist) links; + union { + struct { + u_int32_t txnid; + int32_t generation; + int32_t aborted; + } t; + struct { +#define TXNLIST_FLAG_DELETED 0x1 +#define TXNLIST_FLAG_CLOSED 0x2 + u_int32_t flags; + int32_t fileid; + u_int32_t count; + char *fname; + } d; + struct { + int32_t ntxns; + int32_t maxn; + DB_LSN *lsn_array; + } l; + struct { + int32_t nentries; + int32_t maxentry; + char *fname; + int32_t fileid; + db_pgno_t *pgno_array; + u_int8_t uid[DB_FILE_ID_LEN]; + } p; + } u; +}; + +/* + * Flag value for __db_txnlist_lsnadd. Distinguish whether we are replacing + * an entry in the transaction list or adding a new one. + */ + +#define TXNLIST_NEW 0x1 + +#define DB_user_BEGIN 10000 + +#endif diff --git a/bdb/include/db_ext.h b/bdb/include/db_ext.h new file mode 100644 index 00000000000..efe25424791 --- /dev/null +++ b/bdb/include/db_ext.h @@ -0,0 +1,208 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _db_ext_h_ +#define _db_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +int __crdel_fileopen_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_metasub_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_metapage_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_delete_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __crdel_rename_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_open __P((DB *, + const char *, const char *, DBTYPE, u_int32_t, int)); +int __db_dbopen __P((DB *, const char *, u_int32_t, int, db_pgno_t)); +int __db_master_open __P((DB *, + const char *, u_int32_t, int, DB **)); +int __db_dbenv_setup __P((DB *, const char *, u_int32_t)); +int __db_close __P((DB *, u_int32_t)); +int __db_remove __P((DB *, const char *, const char *, u_int32_t)); +int __db_rename __P((DB *, + const char *, const char *, const char *, u_int32_t)); +int __db_metabegin __P((DB *, DB_LOCK *)); +int __db_metaend __P((DB *, + DB_LOCK *, int, int (*)(DB *, void *), void *)); +int __db_log_page __P((DB *, + const char *, DB_LSN *, db_pgno_t, PAGE *)); +int __db_backup_name __P((DB_ENV *, + const char *, char **, DB_LSN *)); +DB *__dblist_get __P((DB_ENV *, u_int32_t)); +int __db_testcopy __P((DB *, const char *)); +int __db_cursor __P((DB *, DB_TXN *, DBC **, u_int32_t)); +int __db_icursor + __P((DB *, DB_TXN *, DBTYPE, db_pgno_t, int, DBC **)); +int __db_cprint __P((DB *)); +int __db_fd __P((DB *, int *)); +int __db_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); +int __db_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); +int __db_sync __P((DB *, u_int32_t)); +int __db_c_close __P((DBC *)); +int __db_c_destroy __P((DBC *)); +int __db_c_count __P((DBC *, db_recno_t *, u_int32_t)); +int __db_c_del __P((DBC *, u_int32_t)); +int __db_c_dup __P((DBC *, DBC **, u_int32_t)); +int __db_c_newopd __P((DBC *, db_pgno_t, DBC **)); +int __db_c_get __P((DBC *, DBT *, DBT *, u_int32_t)); +int __db_c_put __P((DBC *, DBT *, DBT *, u_int32_t)); +int __db_duperr __P((DB *, u_int32_t)); +int __db_pgin __P((DB_ENV *, db_pgno_t, void *, DBT *)); +int __db_pgout __P((DB_ENV *, db_pgno_t, void *, DBT *)); +void __db_metaswap __P((PAGE *)); +int __db_byteswap __P((DB_ENV *, db_pgno_t, PAGE *, size_t, int)); +int __db_dispatch __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_add_recovery __P((DB_ENV *, + int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *), u_int32_t)); +int __deprecated_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_txnlist_init __P((DB_ENV *, void *)); +int __db_txnlist_add __P((DB_ENV *, void *, u_int32_t, int32_t)); +int __db_txnlist_remove __P((void *, u_int32_t)); +int __db_txnlist_close __P((void *, int32_t, u_int32_t)); +int __db_txnlist_delete __P((DB_ENV *, + void *, char *, u_int32_t, int)); +void __db_txnlist_end __P((DB_ENV *, void *)); +int __db_txnlist_find __P((void *, u_int32_t)); +void __db_txnlist_gen __P((void *, int)); +int __db_txnlist_lsnadd __P((DB_ENV *, void *, DB_LSN *, u_int32_t)); +int __db_txnlist_lsnhead __P((void *, DB_LSN **)); +int __db_txnlist_lsninit __P((DB_ENV *, DB_TXNHEAD *, DB_LSN *)); +int __db_add_limbo __P((DB_ENV *, + void *, int32_t, db_pgno_t, int32_t)); +int __db_do_the_limbo __P((DB_ENV *, DB_TXNHEAD *)); +int __db_txnlist_pgnoadd __P((DB_ENV *, DB_TXNHEAD *, + int32_t, u_int8_t [DB_FILE_ID_LEN], char *, db_pgno_t)); +void __db_txnlist_print __P((void *)); + int __db_ditem __P((DBC *, PAGE *, u_int32_t, u_int32_t)); +int __db_pitem + __P((DBC *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *)); +int __db_relink __P((DBC *, u_int32_t, PAGE *, PAGE **, int)); +int __db_cursorchk __P((const DB *, u_int32_t, int)); +int __db_ccountchk __P((const DB *, u_int32_t, int)); +int __db_cdelchk __P((const DB *, u_int32_t, int, int)); +int __db_cgetchk __P((const DB *, DBT *, DBT *, u_int32_t, int)); +int __db_cputchk __P((const DB *, + const DBT *, DBT *, u_int32_t, int, int)); +int __db_closechk __P((const DB *, u_int32_t)); +int __db_delchk __P((const DB *, DBT *, u_int32_t, int)); +int __db_getchk __P((const DB *, const DBT *, DBT *, u_int32_t)); +int __db_joinchk __P((const DB *, DBC * const *, u_int32_t)); +int __db_joingetchk __P((const DB *, DBT *, u_int32_t)); +int __db_putchk + __P((const DB *, DBT *, const DBT *, u_int32_t, int, int)); +int __db_removechk __P((const DB *, u_int32_t)); +int __db_statchk __P((const DB *, u_int32_t)); +int __db_syncchk __P((const DB *, u_int32_t)); +int __db_join __P((DB *, DBC **, DBC **, u_int32_t)); +int __db_new __P((DBC *, u_int32_t, PAGE **)); +int __db_free __P((DBC *, PAGE *)); +int __db_lprint __P((DBC *)); +int __db_lget __P((DBC *, + int, db_pgno_t, db_lockmode_t, int, DB_LOCK *)); +int __dbh_am_chk __P((DB *, u_int32_t)); +#ifdef HAVE_RPC +int __dbcl_init __P((DB *, DB_ENV *, u_int32_t)); +#endif +int __db_goff __P((DB *, DBT *, + u_int32_t, db_pgno_t, void **, u_int32_t *)); +int __db_poff __P((DBC *, const DBT *, db_pgno_t *)); +int __db_ovref __P((DBC *, db_pgno_t, int32_t)); +int __db_doff __P((DBC *, db_pgno_t)); +int __db_moff __P((DB *, const DBT *, db_pgno_t, u_int32_t, + int (*)(DB *, const DBT *, const DBT *), int *)); +int __db_vrfy_overflow __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, + u_int32_t)); +int __db_vrfy_ovfl_structure + __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, u_int32_t)); +int __db_safe_goff __P((DB *, VRFY_DBINFO *, db_pgno_t, + DBT *, void **, u_int32_t)); +void __db_loadme __P((void)); +int __db_dump __P((DB *, char *, char *)); +int __db_prnpage __P((DB *, db_pgno_t)); +int __db_prpage __P((DB *, PAGE *, u_int32_t)); +void __db_pr __P((u_int8_t *, u_int32_t)); +int __db_prdbt __P((DBT *, int, const char *, void *, + int (*)(void *, const void *), int, VRFY_DBINFO *)); +void __db_prflags __P((u_int32_t, const FN *, FILE *)); +const char *__db_pagetype_to_string __P((u_int32_t)); +int __db_prheader __P((DB *, char *, int, int, void *, + int (*)(void *, const void *), VRFY_DBINFO *, db_pgno_t)); +int __db_prfooter __P((void *, int (*)(void *, const void *))); +int __db_addrem_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_big_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_ovref_recover __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_relink_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __db_debug_recover __P((DB_ENV *, + DBT *, DB_LSN *, db_recops, void *)); +int __db_noop_recover __P((DB_ENV *, + DBT *, DB_LSN *, db_recops, void *)); +int __db_traverse_dup __P((DB *, + db_pgno_t, int (*)(DB *, PAGE *, void *, int *), void *)); +int __db_traverse_big __P((DB *, + db_pgno_t, int (*)(DB *, PAGE *, void *, int *), void *)); +int __db_reclaim_callback __P((DB *, PAGE *, void *, int *)); +int __db_ret __P((DB *, + PAGE *, u_int32_t, DBT *, void **, u_int32_t *)); +int __db_retcopy __P((DB *, DBT *, + void *, u_int32_t, void **, u_int32_t *)); +int __db_upgrade __P((DB *, const char *, u_int32_t)); +int __db_lastpgno __P((DB *, char *, DB_FH *, db_pgno_t *)); +int __db_31_offdup __P((DB *, char *, DB_FH *, int, db_pgno_t *)); +int __db_verify + __P((DB *, const char *, const char *, FILE *, u_int32_t)); +int __db_verify_callback __P((void *, const void *)); +int __db_verify_internal __P((DB *, const char *, + const char *, void *, int (*)(void *, const void *), u_int32_t)); +int __db_vrfy_datapage + __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); +int __db_vrfy_meta + __P((DB *, VRFY_DBINFO *, DBMETA *, db_pgno_t, u_int32_t)); +int __db_vrfy_struct_feedback __P((DB *, VRFY_DBINFO *)); +int __db_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, + void *, int (*)(void *, const void *), u_int32_t)); +int __db_vrfy_inpitem __P((DB *, PAGE *, + db_pgno_t, u_int32_t, int, u_int32_t, u_int32_t *, u_int32_t *)); +int __db_vrfy_duptype + __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t)); +int __db_salvage_duptree __P((DB *, VRFY_DBINFO *, db_pgno_t, + DBT *, void *, int (*)(void *, const void *), u_int32_t)); +int __db_salvage_subdbpg + __P((DB *, VRFY_DBINFO *, PAGE *, void *, + int (*)(void *, const void *), u_int32_t)); +int __db_vrfy_dbinfo_create + __P((DB_ENV *, u_int32_t, VRFY_DBINFO **)); +int __db_vrfy_dbinfo_destroy __P((VRFY_DBINFO *)); +int __db_vrfy_getpageinfo + __P((VRFY_DBINFO *, db_pgno_t, VRFY_PAGEINFO **)); +int __db_vrfy_putpageinfo __P((VRFY_DBINFO *, VRFY_PAGEINFO *)); +int __db_vrfy_pgset __P((DB_ENV *, u_int32_t, DB **)); +int __db_vrfy_pgset_get __P((DB *, db_pgno_t, int *)); +int __db_vrfy_pgset_inc __P((DB *, db_pgno_t)); +int __db_vrfy_pgset_dec __P((DB *, db_pgno_t)); +int __db_vrfy_pgset_next __P((DBC *, db_pgno_t *)); +int __db_vrfy_childcursor __P((VRFY_DBINFO *, DBC **)); +int __db_vrfy_childput + __P((VRFY_DBINFO *, db_pgno_t, VRFY_CHILDINFO *)); +int __db_vrfy_ccset __P((DBC *, db_pgno_t, VRFY_CHILDINFO **)); +int __db_vrfy_ccnext __P((DBC *, VRFY_CHILDINFO **)); +int __db_vrfy_ccclose __P((DBC *)); +int __db_vrfy_pageinfo_create __P((VRFY_PAGEINFO **)); +int __db_salvage_init __P((VRFY_DBINFO *)); +void __db_salvage_destroy __P((VRFY_DBINFO *)); +int __db_salvage_getnext + __P((VRFY_DBINFO *, db_pgno_t *, u_int32_t *)); +int __db_salvage_isdone __P((VRFY_DBINFO *, db_pgno_t)); +int __db_salvage_markdone __P((VRFY_DBINFO *, db_pgno_t)); +int __db_salvage_markneeded + __P((VRFY_DBINFO *, db_pgno_t, u_int32_t)); +#if defined(__cplusplus) +} +#endif +#endif /* _db_ext_h_ */ diff --git a/bdb/include/db_int.src b/bdb/include/db_int.src new file mode 100644 index 00000000000..347169ab5cd --- /dev/null +++ b/bdb/include/db_int.src @@ -0,0 +1,397 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: db_int.src,v 11.42 2001/01/11 17:49:17 krinsky Exp $ + */ + +#ifndef _DB_INTERNAL_H_ +#define _DB_INTERNAL_H_ + +/******************************************************* + * General includes. + *******************************************************/ +#include "db.h" + +#ifndef NO_SYSTEM_INCLUDES +#if defined(__STDC__) || defined(__cplusplus) +#include <stdarg.h> +#else +#include <varargs.h> +#endif +#include <errno.h> +#endif + +#include "queue.h" +#include "shqueue.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/******************************************************* + * General purpose constants and macros. + *******************************************************/ +#define UINT16_T_MAX 0xffff /* Maximum 16 bit unsigned. */ +#define UINT32_T_MAX 0xffffffff /* Maximum 32 bit unsigned. */ + +#define MEGABYTE 1048576 +#define GIGABYTE 1073741824 + +#define MS_PER_SEC 1000 /* Milliseconds in a second. */ +#define USEC_PER_MS 1000 /* Microseconds in a millisecond. */ + +#define DB_MIN_PGSIZE 0x000200 /* Minimum page size (512). */ +#define DB_MAX_PGSIZE 0x010000 /* Maximum page size (65536). */ + +#define RECNO_OOB 0 /* Illegal record number. */ + +/* + * If we are unable to determine the underlying filesystem block size, use + * 8K on the grounds that most OS's use less than 8K for a VM page size. + */ +#define DB_DEF_IOSIZE (8 * 1024) + +/* + * Aligning items to particular sizes or in pages or memory. + * + * db_align_t -- + * Largest integral type, used to align structures in memory. We don't store + * floating point types in structures, so integral types should be sufficient + * (and we don't have to worry about systems that store floats in other than + * power-of-2 numbers of bytes). Additionally this fixes compiler that rewrite + * structure assignments and ANSI C memcpy calls to be in-line instructions + * that happen to require alignment. Note: this alignment isn't sufficient for + * mutexes, which depend on things like cache line alignment. Mutex alignment + * is handled separately, in mutex.h. + * + * db_alignp_t -- + * Integral type that's the same size as a pointer. There are places where + * DB modifies pointers by discarding the bottom bits to guarantee alignment. + * We can't use db_align_t, it may be larger than the pointer, and compilers + * get upset about that. So far we haven't run on any machine where there + * isn't an integral type the same size as a pointer -- here's hoping. + */ +@db_align_t_decl@ +@db_alignp_t_decl@ + +/* Align an integer to a specific boundary. */ +#undef ALIGN +#define ALIGN(value, bound) \ + (((value) + (bound) - 1) & ~(((u_int)bound) - 1)) + +/* Align a pointer to a specific boundary. */ +#undef ALIGNP +#define ALIGNP(value, bound) ALIGN((db_alignp_t)value, bound) + +/* + * There are several on-page structures that are declared to have a number of + * fields followed by a variable length array of items. The structure size + * without including the variable length array or the address of the first of + * those elements can be found using SSZ. + * + * This macro can also be used to find the offset of a structure element in a + * structure. This is used in various places to copy structure elements from + * unaligned memory references, e.g., pointers into a packed page. + * + * There are two versions because compilers object if you take the address of + * an array. + */ +#undef SSZ +#define SSZ(name, field) ((int)&(((name *)0)->field)) + +#undef SSZA +#define SSZA(name, field) ((int)&(((name *)0)->field[0])) + +/* + * Print an address as a u_long (a u_long is the largest type we can print + * portably). Most 64-bit systems have made longs 64-bits, so this should + * work. + */ +#define P_TO_ULONG(p) ((u_long)(db_alignp_t)(p)) + +/* Structure used to print flag values. */ +typedef struct __fn { + u_int32_t mask; /* Flag value. */ + const char *name; /* Flag name. */ +} FN; + +/* Set, clear and test flags. */ +#define FLD_CLR(fld, f) (fld) &= ~(f) +#define FLD_ISSET(fld, f) ((fld) & (f)) +#define FLD_SET(fld, f) (fld) |= (f) +#define F_CLR(p, f) (p)->flags &= ~(f) +#define F_ISSET(p, f) ((p)->flags & (f)) +#define F_SET(p, f) (p)->flags |= (f) +#define LF_CLR(f) (flags &= ~(f)) +#define LF_ISSET(f) (flags & (f)) +#define LF_SET(f) (flags |= (f)) + +/* Display separator string. */ +#undef DB_LINE +#define DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=" + +/* Unused, or not-used-yet variable. "Shut that bloody compiler up!" */ +#define COMPQUIET(n, v) (n) = (v) + +/******************************************************* + * Files. + *******************************************************/ + /* + * We use 1024 as the maximum path length. It's too hard to figure out what + * the real path length is, as it was traditionally stored in <sys/param.h>, + * and that file isn't always available. + */ +#undef MAXPATHLEN +#define MAXPATHLEN 1024 + +#define PATH_DOT "." /* Current working directory. */ +#define PATH_SEPARATOR "/" /* Path separator character. */ + +/* + * Flags understood by __os_open. + */ +#define DB_OSO_CREATE 0x001 /* POSIX: O_CREAT */ +#define DB_OSO_EXCL 0x002 /* POSIX: O_EXCL */ +#define DB_OSO_LOG 0x004 /* Opening a log file. */ +#define DB_OSO_RDONLY 0x008 /* POSIX: O_RDONLY */ +#define DB_OSO_REGION 0x010 /* Opening a region file. */ +#define DB_OSO_SEQ 0x020 /* Expected sequential access. */ +#define DB_OSO_TEMP 0x040 /* Remove after last close. */ +#define DB_OSO_TRUNC 0x080 /* POSIX: O_TRUNC */ + +/* + * Seek options understood by __os_seek. + */ +typedef enum { + DB_OS_SEEK_CUR, /* POSIX: SEEK_CUR */ + DB_OS_SEEK_END, /* POSIX: SEEK_END */ + DB_OS_SEEK_SET /* POSIX: SEEK_SET */ +} DB_OS_SEEK; + +/******************************************************* + * Environment. + *******************************************************/ +/* Type passed to __db_appname(). */ +typedef enum { + DB_APP_NONE=0, /* No type (region). */ + DB_APP_DATA, /* Data file. */ + DB_APP_LOG, /* Log file. */ + DB_APP_TMP /* Temporary file. */ +} APPNAME; + +/* + * CDB_LOCKING CDB product locking. + * LOCKING_ON Locking has been configured. + * LOGGING_ON Logging has been configured. + * MPOOL_ON Memory pool has been configured. + * TXN_ON Transactions have been configured. + */ +#define CDB_LOCKING(dbenv) F_ISSET(dbenv, DB_ENV_CDB) +#define LOCKING_ON(dbenv) ((dbenv)->lk_handle != NULL) +#define LOGGING_ON(dbenv) ((dbenv)->lg_handle != NULL) +#define MPOOL_ON(dbenv) ((dbenv)->mp_handle != NULL) +#define TXN_ON(dbenv) ((dbenv)->tx_handle != NULL) + +/* + * STD_LOCKING Standard locking, that is, locking was configured and CDB + * was not. We do not do locking in off-page duplicate trees, + * so we check for that in the cursor first. + */ +#define STD_LOCKING(dbc) \ + (!F_ISSET(dbc, DBC_OPD) && \ + !CDB_LOCKING((dbc)->dbp->dbenv) && LOCKING_ON((dbc)->dbp->dbenv)) + +/* + * IS_RECOVERING The system is running recovery. + */ +#define IS_RECOVERING(dbenv) \ + (LOGGING_ON(dbenv) && \ + F_ISSET((DB_LOG *)(dbenv)->lg_handle, DBLOG_RECOVER)) + +/* Most initialization methods cannot be called after open is called. */ +#define ENV_ILLEGAL_AFTER_OPEN(dbenv, name) \ + if (F_ISSET((dbenv), DB_ENV_OPEN_CALLED)) \ + return (__db_mi_open(dbenv, name, 1)); + +/* We're not actually user hostile, honest. */ +#define ENV_REQUIRES_CONFIG(dbenv, handle, subsystem) \ + if (handle == NULL) \ + return (__db_env_config(dbenv, subsystem)); + +/******************************************************* + * Database Access Methods. + *******************************************************/ +/* + * DB_IS_THREADED -- + * The database handle is free-threaded (was opened with DB_THREAD). + */ +#define DB_IS_THREADED(dbp) \ + ((dbp)->mutexp != NULL) + +/* Initialization methods are often illegal before/after open is called. */ +#define DB_ILLEGAL_AFTER_OPEN(dbp, name) \ + if (F_ISSET((dbp), DB_OPEN_CALLED)) \ + return (__db_mi_open(dbp->dbenv, name, 1)); +#define DB_ILLEGAL_BEFORE_OPEN(dbp, name) \ + if (!F_ISSET((dbp), DB_OPEN_CALLED)) \ + return (__db_mi_open(dbp->dbenv, name, 0)); +/* Some initialization methods are illegal if environment isn't local. */ +#define DB_ILLEGAL_IN_ENV(dbp, name) \ + if (!F_ISSET(dbp->dbenv, DB_ENV_DBLOCAL)) \ + return (__db_mi_env(dbp->dbenv, name)); +#define DB_ILLEGAL_METHOD(dbp, flags) { \ + int __ret; \ + if ((__ret = __dbh_am_chk(dbp, flags)) != 0) \ + return (__ret); \ +} + +/* + * Common DBC->internal fields. Each access method adds additional fields + * to this list, but the initial fields are common. + */ +#define __DBC_INTERNAL \ + DBC *opd; /* Off-page duplicate cursor. */\ + \ + void *page; /* Referenced page. */ \ + db_pgno_t root; /* Tree root. */ \ + db_pgno_t pgno; /* Referenced page number. */ \ + db_indx_t indx; /* Referenced key item index. */\ + \ + DB_LOCK lock; /* Cursor lock. */ \ + db_lockmode_t lock_mode; /* Lock mode. */ + +struct __dbc_internal { + __DBC_INTERNAL +}; + +/* + * Access-method-common macro for determining whether a cursor + * has been initialized. + */ +#define IS_INITIALIZED(dbc) ((dbc)->internal->pgno != PGNO_INVALID) + +/******************************************************* + * Mpool. + *******************************************************/ +/* + * File types for DB access methods. Negative numbers are reserved to DB. + */ +#define DB_FTYPE_SET -1 /* Call pgin/pgout functions. */ +#define DB_FTYPE_NOTSET 0 /* Don't call... */ + +/* Structure used as the DB pgin/pgout pgcookie. */ +typedef struct __dbpginfo { + size_t db_pagesize; /* Underlying page size. */ + int needswap; /* If swapping required. */ +} DB_PGINFO; + +/******************************************************* + * Log. + *******************************************************/ +/* Initialize an LSN to 'zero'. */ +#define ZERO_LSN(LSN) do { \ + (LSN).file = 0; \ + (LSN).offset = 0; \ +} while (0) + +/* Return 1 if LSN is a 'zero' lsn, otherwise return 0. */ +#define IS_ZERO_LSN(LSN) ((LSN).file == 0) + +/* Test if we need to log a change. */ +#define DB_LOGGING(dbc) \ + (LOGGING_ON((dbc)->dbp->dbenv) && !F_ISSET(dbc, DBC_RECOVER)) + +/* Internal flag for use with internal __log_unregister. */ +#define DB_LOGONLY 0x01 +/******************************************************* + * Txn. + *******************************************************/ +#define DB_NONBLOCK(C) ((C)->txn != NULL && F_ISSET((C)->txn, TXN_NOWAIT)) +#define IS_SUBTRANSACTION(txn) \ + ((txn) != NULL && (txn)->parent != NULL) + +/******************************************************* + * Global variables. + *******************************************************/ +#ifdef HAVE_VXWORKS +#include "semLib.h" +#endif + +/* + * DB global variables. Done in a single structure to minimize the name-space + * pollution. + */ +typedef struct __db_globals { + u_int32_t db_pageyield; /* db_set_pageyield */ + u_int32_t db_panic; /* db_set_panic */ + u_int32_t db_region_init; /* db_set_region_init */ + u_int32_t db_tas_spins; /* db_set_tas_spins */ +#ifdef HAVE_VXWORKS + u_int32_t db_global_init; /* VxWorks: inited */ + SEM_ID db_global_lock; /* VxWorks: global semaphore */ +#endif + /* XA: list of opened environments. */ + TAILQ_HEAD(__db_envq, __db_env) db_envq; +} DB_GLOBALS; + +#ifdef DB_INITIALIZE_DB_GLOBALS +DB_GLOBALS __db_global_values = { + 0, /* db_set_pageyield */ + 1, /* db_set_panic */ + 0, /* db_set_region_init */ + 0, /* db_set_tas_spins */ +#ifdef HAVE_VXWORKS + 0, /* db_global_init */ + NULL, /* db_global_lock */ +#endif + /* XA environment queue */ + {NULL, &__db_global_values.db_envq.tqh_first} +}; +#else +extern DB_GLOBALS __db_global_values; +#endif +#define DB_GLOBAL(v) __db_global_values.v + +/* Forward structure declarations. */ +struct __db_reginfo_t; typedef struct __db_reginfo_t REGINFO; +struct __mutex_t; typedef struct __mutex_t MUTEX; +struct __vrfy_childinfo; typedef struct __vrfy_childinfo VRFY_CHILDINFO; +struct __vrfy_dbinfo; typedef struct __vrfy_dbinfo VRFY_DBINFO; +struct __vrfy_pageinfo; typedef struct __vrfy_pageinfo VRFY_PAGEINFO; +struct __db_txnlist; typedef struct __db_txnlist DB_TXNLIST; +struct __db_txnhead; typedef struct __db_txnhead DB_TXNHEAD; +typedef enum { + TXNLIST_DELETE, + TXNLIST_LSN, + TXNLIST_TXNID, + TXNLIST_PGNO +} db_txnlist_type; + +/* + * Currently, region offsets are limited to 32-bits. I expect that's going + * to have to be fixed in the not-too-distant future, since we won't want to + * split 100Gb memory pools into that many different regions. It's typedef'd + * so it won't be too painful to upgrade. + */ +typedef u_int32_t roff_t; + +#if defined(__cplusplus) +} +#endif + +/******************************************************* + * More general includes. + *******************************************************/ +#include "debug.h" +#include "mutex.h" +#include "region.h" +#include "mutex_ext.h" +#include "env_ext.h" +#include "os.h" +#include "os_ext.h" +#include "common_ext.h" + +#endif /* !_DB_INTERNAL_H_ */ diff --git a/bdb/include/db_join.h b/bdb/include/db_join.h new file mode 100644 index 00000000000..d92887bb589 --- /dev/null +++ b/bdb/include/db_join.h @@ -0,0 +1,30 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * @(#)db_join.h 11.1 (Sleepycat) 7/25/99 + */ + +#ifndef _DB_JOIN_H_ +#define _DB_JOIN_H_ + +/* + * Joins use a join cursor that is similar to a regular DB cursor except + * that it only supports c_get and c_close functionality. Also, it does + * not support the full range of flags for get. + */ +typedef struct __join_cursor { + u_int8_t *j_exhausted; /* Array of flags; is cursor i exhausted? */ + DBC **j_curslist; /* Array of cursors in the join: constant. */ + DBC **j_fdupcurs; /* Cursors w/ first intances of current dup. */ + DBC **j_workcurs; /* Scratch cursor copies to muck with. */ + DB *j_primary; /* Primary dbp. */ + DBT j_key; /* Used to do lookups. */ + u_int32_t j_ncurs; /* How many cursors do we have? */ +#define JOIN_RETRY 0x01 /* Error on primary get; re-return same key. */ + u_int32_t flags; +} JOIN_CURSOR; + +#endif diff --git a/bdb/include/db_page.h b/bdb/include/db_page.h new file mode 100644 index 00000000000..8066424143b --- /dev/null +++ b/bdb/include/db_page.h @@ -0,0 +1,576 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: db_page.h,v 11.28 2000/12/06 19:55:45 ubell Exp $ + */ + +#ifndef _DB_PAGE_H_ +#define _DB_PAGE_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * DB page formats. + * + * !!! + * This implementation requires that values within the following structures + * NOT be padded -- note, ANSI C permits random padding within structures. + * If your compiler pads randomly you can just forget ever making DB run on + * your system. In addition, no data type can require larger alignment than + * its own size, e.g., a 4-byte data element may not require 8-byte alignment. + * + * Note that key/data lengths are often stored in db_indx_t's -- this is + * not accidental, nor does it limit the key/data size. If the key/data + * item fits on a page, it's guaranteed to be small enough to fit into a + * db_indx_t, and storing it in one saves space. + */ + +#define PGNO_INVALID 0 /* Invalid page number in any database. */ +#define PGNO_BASE_MD 0 /* Base database: metadata page number. */ + +/* Page types. */ +#define P_INVALID 0 /* Invalid page type. */ +#define __P_DUPLICATE 1 /* Duplicate. DEPRECATED in 3.1 */ +#define P_HASH 2 /* Hash. */ +#define P_IBTREE 3 /* Btree internal. */ +#define P_IRECNO 4 /* Recno internal. */ +#define P_LBTREE 5 /* Btree leaf. */ +#define P_LRECNO 6 /* Recno leaf. */ +#define P_OVERFLOW 7 /* Overflow. */ +#define P_HASHMETA 8 /* Hash metadata page. */ +#define P_BTREEMETA 9 /* Btree metadata page. */ +#define P_QAMMETA 10 /* Queue metadata page. */ +#define P_QAMDATA 11 /* Queue data page. */ +#define P_LDUP 12 /* Off-page duplicate leaf. */ +#define P_PAGETYPE_MAX 13 + +/* + * When we create pages in mpool, we ask mpool to clear some number of bytes + * in the header. This number must be at least as big as the regular page + * headers and cover enough of the btree and hash meta-data pages to obliterate + * the page type. + */ +#define DB_PAGE_DB_LEN 32 +#define DB_PAGE_QUEUE_LEN 0 + +/************************************************************************ + GENERIC METADATA PAGE HEADER + * + * !!! + * The magic and version numbers have to be in the same place in all versions + * of the metadata page as the application may not have upgraded the database. + ************************************************************************/ +typedef struct _dbmeta31 { + DB_LSN lsn; /* 00-07: LSN. */ + db_pgno_t pgno; /* 08-11: Current page number. */ + u_int32_t magic; /* 12-15: Magic number. */ + u_int32_t version; /* 16-19: Version. */ + u_int32_t pagesize; /* 20-23: Pagesize. */ + u_int8_t unused1[1]; /* 24: Unused. */ + u_int8_t type; /* 25: Page type. */ + u_int8_t unused2[2]; /* 26-27: Unused. */ + u_int32_t free; /* 28-31: Free list page number. */ + DB_LSN unused3; /* 32-39: former Lsn for allocation */ + u_int32_t key_count; /* 40-43: Cached key count. */ + u_int32_t record_count; /* 44-47: Cached record count. */ + u_int32_t flags; /* 48-51: Flags: unique to each AM. */ + /* 52-71: Unique file ID. */ + u_int8_t uid[DB_FILE_ID_LEN]; +} DBMETA31, DBMETA; + +/************************************************************************ + BTREE METADATA PAGE LAYOUT + ************************************************************************/ +typedef struct _btmeta31 { +#define BTM_DUP 0x001 /* Duplicates. */ +#define BTM_RECNO 0x002 /* Recno tree. */ +#define BTM_RECNUM 0x004 /* Btree: maintain record count. */ +#define BTM_FIXEDLEN 0x008 /* Recno: fixed length records. */ +#define BTM_RENUMBER 0x010 /* Recno: renumber on insert/delete. */ +#define BTM_SUBDB 0x020 /* Subdatabases. */ +#define BTM_DUPSORT 0x040 /* Duplicates are sorted. */ +#define BTM_MASK 0x07f + DBMETA dbmeta; /* 00-71: Generic meta-data header. */ + + u_int32_t maxkey; /* 72-75: Btree: Maxkey. */ + u_int32_t minkey; /* 76-79: Btree: Minkey. */ + u_int32_t re_len; /* 80-83: Recno: fixed-length record length. */ + u_int32_t re_pad; /* 84-87: Recno: fixed-length record pad. */ + u_int32_t root; /* 88-92: Root page. */ + + /* + * Minimum page size is 128. + */ +} BTMETA31, BTMETA; + +/************************************************************************ + HASH METADATA PAGE LAYOUT + ************************************************************************/ +typedef struct _hashmeta31 { +#define DB_HASH_DUP 0x01 /* Duplicates. */ +#define DB_HASH_SUBDB 0x02 /* Subdatabases. */ +#define DB_HASH_DUPSORT 0x04 /* Duplicates are sorted. */ + DBMETA dbmeta; /* 00-71: Generic meta-data page header. */ + + u_int32_t max_bucket; /* 72-75: ID of Maximum bucket in use */ + u_int32_t high_mask; /* 76-79: Modulo mask into table */ + u_int32_t low_mask; /* 80-83: Modulo mask into table lower half */ + u_int32_t ffactor; /* 84-87: Fill factor */ + u_int32_t nelem; /* 88-91: Number of keys in hash table */ + u_int32_t h_charkey; /* 92-95: Value of hash(CHARKEY) */ +#define NCACHED 32 /* number of spare points */ + /* 96-223: Spare pages for overflow */ + u_int32_t spares[NCACHED]; + + /* + * Minimum page size is 256. + */ +} HMETA31, HMETA; + +/************************************************************************ + QUEUE METADATA PAGE LAYOUT + ************************************************************************/ +/* + * QAM Meta data page structure + * + */ +typedef struct _qmeta32 { + DBMETA dbmeta; /* 00-71: Generic meta-data header. */ + + u_int32_t first_recno; /* 72-75: First not deleted record. */ + u_int32_t cur_recno; /* 76-79: Last recno allocated. */ + u_int32_t re_len; /* 80-83: Fixed-length record length. */ + u_int32_t re_pad; /* 84-87: Fixed-length record pad. */ + u_int32_t rec_page; /* 88-91: Records Per Page. */ + u_int32_t page_ext; /* 92-95: Pages per extent */ + + /* + * Minimum page size is 128. + */ +} QMETA32, QMETA; + +/* + * DBMETASIZE is a constant used by __db_file_setup and DB->verify + * as a buffer which is guaranteed to be larger than any possible + * metadata page size and smaller than any disk sector. + */ +#define DBMETASIZE 256 + +/************************************************************************ + BTREE/HASH MAIN PAGE LAYOUT + ************************************************************************/ +/* + * +-----------------------------------+ + * | lsn | pgno | prev pgno | + * +-----------------------------------+ + * | next pgno | entries | hf offset | + * +-----------------------------------+ + * | level | type | index | + * +-----------------------------------+ + * | index | free --> | + * +-----------+-----------------------+ + * | F R E E A R E A | + * +-----------------------------------+ + * | <-- free | item | + * +-----------------------------------+ + * | item | item | item | + * +-----------------------------------+ + * + * sizeof(PAGE) == 26 bytes, and the following indices are guaranteed to be + * two-byte aligned. + * + * For hash and btree leaf pages, index items are paired, e.g., inp[0] is the + * key for inp[1]'s data. All other types of pages only contain single items. + */ +typedef struct _db_page { + DB_LSN lsn; /* 00-07: Log sequence number. */ + db_pgno_t pgno; /* 08-11: Current page number. */ + db_pgno_t prev_pgno; /* 12-15: Previous page number. */ + db_pgno_t next_pgno; /* 16-19: Next page number. */ + db_indx_t entries; /* 20-21: Number of items on the page. */ + db_indx_t hf_offset; /* 22-23: High free byte page offset. */ + + /* + * The btree levels are numbered from the leaf to the root, starting + * with 1, so the leaf is level 1, its parent is level 2, and so on. + * We maintain this level on all btree pages, but the only place that + * we actually need it is on the root page. It would not be difficult + * to hide the byte on the root page once it becomes an internal page, + * so we could get this byte back if we needed it for something else. + */ +#define LEAFLEVEL 1 +#define MAXBTREELEVEL 255 + u_int8_t level; /* 24: Btree tree level. */ + u_int8_t type; /* 25: Page type. */ + db_indx_t inp[1]; /* Variable length index of items. */ +} PAGE; + +/* PAGE element macros. */ +#define LSN(p) (((PAGE *)p)->lsn) +#define PGNO(p) (((PAGE *)p)->pgno) +#define PREV_PGNO(p) (((PAGE *)p)->prev_pgno) +#define NEXT_PGNO(p) (((PAGE *)p)->next_pgno) +#define NUM_ENT(p) (((PAGE *)p)->entries) +#define HOFFSET(p) (((PAGE *)p)->hf_offset) +#define LEVEL(p) (((PAGE *)p)->level) +#define TYPE(p) (((PAGE *)p)->type) + +/************************************************************************ + QUEUE MAIN PAGE LAYOUT + ************************************************************************/ +typedef struct _qpage { + DB_LSN lsn; /* 00-07: Log sequence number. */ + db_pgno_t pgno; /* 08-11: Current page number. */ + u_int32_t unused0[3]; /* 12-23: Unused. */ + u_int8_t unused1[1]; /* 24: Unused. */ + u_int8_t type; /* 25: Page type. */ + u_int8_t unused2[2]; /* 26-27: Unused. */ +} QPAGE; + +/* + * !!! + * The next_pgno and prev_pgno fields are not maintained for btree and recno + * internal pages. Doing so only provides a minor performance improvement, + * it's hard to do when deleting internal pages, and it increases the chance + * of deadlock during deletes and splits because we have to re-link pages at + * more than the leaf level. + * + * !!! + * The btree/recno access method needs db_recno_t bytes of space on the root + * page to specify how many records are stored in the tree. (The alternative + * is to store the number of records in the meta-data page, which will create + * a second hot spot in trees being actively modified, or recalculate it from + * the BINTERNAL fields on each access.) Overload the PREV_PGNO field. + */ +#define RE_NREC(p) \ + ((TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO) ? \ + PREV_PGNO(p) : (TYPE(p) == P_LBTREE ? NUM_ENT(p) / 2 : NUM_ENT(p))) +#define RE_NREC_ADJ(p, adj) \ + PREV_PGNO(p) += adj; +#define RE_NREC_SET(p, num) \ + PREV_PGNO(p) = num; + +/* + * Initialize a page. + * + * !!! + * Don't modify the page's LSN, code depends on it being unchanged after a + * P_INIT call. + */ +#define P_INIT(pg, pg_size, n, pg_prev, pg_next, btl, pg_type) do { \ + PGNO(pg) = n; \ + PREV_PGNO(pg) = pg_prev; \ + NEXT_PGNO(pg) = pg_next; \ + NUM_ENT(pg) = 0; \ + HOFFSET(pg) = pg_size; \ + LEVEL(pg) = btl; \ + TYPE(pg) = pg_type; \ +} while (0) + +/* Page header length (offset to first index). */ +#define P_OVERHEAD (SSZA(PAGE, inp)) + +/* First free byte. */ +#define LOFFSET(pg) (P_OVERHEAD + NUM_ENT(pg) * sizeof(db_indx_t)) + +/* Free space on a regular page. */ +#define P_FREESPACE(pg) (HOFFSET(pg) - LOFFSET(pg)) + +/* Get a pointer to the bytes at a specific index. */ +#define P_ENTRY(pg, indx) ((u_int8_t *)pg + ((PAGE *)pg)->inp[indx]) + +/************************************************************************ + OVERFLOW PAGE LAYOUT + ************************************************************************/ + +/* + * Overflow items are referenced by HOFFPAGE and BOVERFLOW structures, which + * store a page number (the first page of the overflow item) and a length + * (the total length of the overflow item). The overflow item consists of + * some number of overflow pages, linked by the next_pgno field of the page. + * A next_pgno field of PGNO_INVALID flags the end of the overflow item. + * + * Overflow page overloads: + * The amount of overflow data stored on each page is stored in the + * hf_offset field. + * + * The implementation reference counts overflow items as it's possible + * for them to be promoted onto btree internal pages. The reference + * count is stored in the entries field. + */ +#define OV_LEN(p) (((PAGE *)p)->hf_offset) +#define OV_REF(p) (((PAGE *)p)->entries) + +/* Maximum number of bytes that you can put on an overflow page. */ +#define P_MAXSPACE(psize) ((psize) - P_OVERHEAD) + +/* Free space on an overflow page. */ +#define P_OVFLSPACE(psize, pg) (P_MAXSPACE(psize) - HOFFSET(pg)) + +/************************************************************************ + HASH PAGE LAYOUT + ************************************************************************/ + +/* Each index references a group of bytes on the page. */ +#define H_KEYDATA 1 /* Key/data item. */ +#define H_DUPLICATE 2 /* Duplicate key/data item. */ +#define H_OFFPAGE 3 /* Overflow key/data item. */ +#define H_OFFDUP 4 /* Overflow page of duplicates. */ + +/* + * !!! + * Items on hash pages are (potentially) unaligned, so we can never cast the + * (page + offset) pointer to an HKEYDATA, HOFFPAGE or HOFFDUP structure, as + * we do with B+tree on-page structures. Because we frequently want the type + * field, it requires no alignment, and it's in the same location in all three + * structures, there's a pair of macros. + */ +#define HPAGE_PTYPE(p) (*(u_int8_t *)p) +#define HPAGE_TYPE(pg, indx) (*P_ENTRY(pg, indx)) + +/* + * The first and second types are H_KEYDATA and H_DUPLICATE, represented + * by the HKEYDATA structure: + * + * +-----------------------------------+ + * | type | key/data ... | + * +-----------------------------------+ + * + * For duplicates, the data field encodes duplicate elements in the data + * field: + * + * +---------------------------------------------------------------+ + * | type | len1 | element1 | len1 | len2 | element2 | len2 | + * +---------------------------------------------------------------+ + * + * Thus, by keeping track of the offset in the element, we can do both + * backward and forward traversal. + */ +typedef struct _hkeydata { + u_int8_t type; /* 00: Page type. */ + u_int8_t data[1]; /* Variable length key/data item. */ +} HKEYDATA; +#define HKEYDATA_DATA(p) (((u_int8_t *)p) + SSZA(HKEYDATA, data)) + +/* + * The length of any HKEYDATA item. Note that indx is an element index, + * not a PAIR index. + */ +#define LEN_HITEM(pg, pgsize, indx) \ + (((indx) == 0 ? pgsize : \ + ((PAGE *)(pg))->inp[indx - 1]) - ((PAGE *)(pg))->inp[indx]) + +#define LEN_HKEYDATA(pg, psize, indx) \ + (LEN_HITEM(pg, psize, indx) - HKEYDATA_SIZE(0)) + +/* + * Page space required to add a new HKEYDATA item to the page, with and + * without the index value. + */ +#define HKEYDATA_SIZE(len) \ + ((len) + SSZA(HKEYDATA, data)) +#define HKEYDATA_PSIZE(len) \ + (HKEYDATA_SIZE(len) + sizeof(db_indx_t)) + +/* Put a HKEYDATA item at the location referenced by a page entry. */ +#define PUT_HKEYDATA(pe, kd, len, type) { \ + ((HKEYDATA *)pe)->type = type; \ + memcpy((u_int8_t *)pe + sizeof(u_int8_t), kd, len); \ +} + +/* + * Macros the describe the page layout in terms of key-data pairs. + */ +#define H_NUMPAIRS(pg) (NUM_ENT(pg) / 2) +#define H_KEYINDEX(indx) (indx) +#define H_DATAINDEX(indx) ((indx) + 1) +#define H_PAIRKEY(pg, indx) P_ENTRY(pg, H_KEYINDEX(indx)) +#define H_PAIRDATA(pg, indx) P_ENTRY(pg, H_DATAINDEX(indx)) +#define H_PAIRSIZE(pg, psize, indx) \ + (LEN_HITEM(pg, psize, H_KEYINDEX(indx)) + \ + LEN_HITEM(pg, psize, H_DATAINDEX(indx))) +#define LEN_HDATA(p, psize, indx) LEN_HKEYDATA(p, psize, H_DATAINDEX(indx)) +#define LEN_HKEY(p, psize, indx) LEN_HKEYDATA(p, psize, H_KEYINDEX(indx)) + +/* + * The third type is the H_OFFPAGE, represented by the HOFFPAGE structure: + */ +typedef struct _hoffpage { + u_int8_t type; /* 00: Page type and delete flag. */ + u_int8_t unused[3]; /* 01-03: Padding, unused. */ + db_pgno_t pgno; /* 04-07: Offpage page number. */ + u_int32_t tlen; /* 08-11: Total length of item. */ +} HOFFPAGE; + +#define HOFFPAGE_PGNO(p) (((u_int8_t *)p) + SSZ(HOFFPAGE, pgno)) +#define HOFFPAGE_TLEN(p) (((u_int8_t *)p) + SSZ(HOFFPAGE, tlen)) + +/* + * Page space required to add a new HOFFPAGE item to the page, with and + * without the index value. + */ +#define HOFFPAGE_SIZE (sizeof(HOFFPAGE)) +#define HOFFPAGE_PSIZE (HOFFPAGE_SIZE + sizeof(db_indx_t)) + +/* + * The fourth type is H_OFFDUP represented by the HOFFDUP structure: + */ +typedef struct _hoffdup { + u_int8_t type; /* 00: Page type and delete flag. */ + u_int8_t unused[3]; /* 01-03: Padding, unused. */ + db_pgno_t pgno; /* 04-07: Offpage page number. */ +} HOFFDUP; +#define HOFFDUP_PGNO(p) (((u_int8_t *)p) + SSZ(HOFFDUP, pgno)) + +/* + * Page space required to add a new HOFFDUP item to the page, with and + * without the index value. + */ +#define HOFFDUP_SIZE (sizeof(HOFFDUP)) + +/************************************************************************ + BTREE PAGE LAYOUT + ************************************************************************/ + +/* Each index references a group of bytes on the page. */ +#define B_KEYDATA 1 /* Key/data item. */ +#define B_DUPLICATE 2 /* Duplicate key/data item. */ +#define B_OVERFLOW 3 /* Overflow key/data item. */ + +/* + * We have to store a deleted entry flag in the page. The reason is complex, + * but the simple version is that we can't delete on-page items referenced by + * a cursor -- the return order of subsequent insertions might be wrong. The + * delete flag is an overload of the top bit of the type byte. + */ +#define B_DELETE (0x80) +#define B_DCLR(t) (t) &= ~B_DELETE +#define B_DSET(t) (t) |= B_DELETE +#define B_DISSET(t) ((t) & B_DELETE) + +#define B_TYPE(t) ((t) & ~B_DELETE) +#define B_TSET(t, type, deleted) { \ + (t) = (type); \ + if (deleted) \ + B_DSET(t); \ +} + +/* + * The first type is B_KEYDATA, represented by the BKEYDATA structure: + */ +typedef struct _bkeydata { + db_indx_t len; /* 00-01: Key/data item length. */ + u_int8_t type; /* 02: Page type AND DELETE FLAG. */ + u_int8_t data[1]; /* Variable length key/data item. */ +} BKEYDATA; + +/* Get a BKEYDATA item for a specific index. */ +#define GET_BKEYDATA(pg, indx) \ + ((BKEYDATA *)P_ENTRY(pg, indx)) + +/* + * Page space required to add a new BKEYDATA item to the page, with and + * without the index value. + */ +#define BKEYDATA_SIZE(len) \ + ALIGN((len) + SSZA(BKEYDATA, data), sizeof(u_int32_t)) +#define BKEYDATA_PSIZE(len) \ + (BKEYDATA_SIZE(len) + sizeof(db_indx_t)) + +/* + * The second and third types are B_DUPLICATE and B_OVERFLOW, represented + * by the BOVERFLOW structure. + */ +typedef struct _boverflow { + db_indx_t unused1; /* 00-01: Padding, unused. */ + u_int8_t type; /* 02: Page type AND DELETE FLAG. */ + u_int8_t unused2; /* 03: Padding, unused. */ + db_pgno_t pgno; /* 04-07: Next page number. */ + u_int32_t tlen; /* 08-11: Total length of item. */ +} BOVERFLOW; + +/* Get a BOVERFLOW item for a specific index. */ +#define GET_BOVERFLOW(pg, indx) \ + ((BOVERFLOW *)P_ENTRY(pg, indx)) + +/* + * Page space required to add a new BOVERFLOW item to the page, with and + * without the index value. + */ +#define BOVERFLOW_SIZE \ + ALIGN(sizeof(BOVERFLOW), sizeof(u_int32_t)) +#define BOVERFLOW_PSIZE \ + (BOVERFLOW_SIZE + sizeof(db_indx_t)) + +/* + * Btree leaf and hash page layouts group indices in sets of two, one for the + * key and one for the data. Everything else does it in sets of one to save + * space. Use the following macros so that it's real obvious what's going on. + */ +#define O_INDX 1 +#define P_INDX 2 + +/************************************************************************ + BTREE INTERNAL PAGE LAYOUT + ************************************************************************/ + +/* + * Btree internal entry. + */ +typedef struct _binternal { + db_indx_t len; /* 00-01: Key/data item length. */ + u_int8_t type; /* 02: Page type AND DELETE FLAG. */ + u_int8_t unused; /* 03: Padding, unused. */ + db_pgno_t pgno; /* 04-07: Page number of referenced page. */ + db_recno_t nrecs; /* 08-11: Subtree record count. */ + u_int8_t data[1]; /* Variable length key item. */ +} BINTERNAL; + +/* Get a BINTERNAL item for a specific index. */ +#define GET_BINTERNAL(pg, indx) \ + ((BINTERNAL *)P_ENTRY(pg, indx)) + +/* + * Page space required to add a new BINTERNAL item to the page, with and + * without the index value. + */ +#define BINTERNAL_SIZE(len) \ + ALIGN((len) + SSZA(BINTERNAL, data), sizeof(u_int32_t)) +#define BINTERNAL_PSIZE(len) \ + (BINTERNAL_SIZE(len) + sizeof(db_indx_t)) + +/************************************************************************ + RECNO INTERNAL PAGE LAYOUT + ************************************************************************/ + +/* + * The recno internal entry. + */ +typedef struct _rinternal { + db_pgno_t pgno; /* 00-03: Page number of referenced page. */ + db_recno_t nrecs; /* 04-07: Subtree record count. */ +} RINTERNAL; + +/* Get a RINTERNAL item for a specific index. */ +#define GET_RINTERNAL(pg, indx) \ + ((RINTERNAL *)P_ENTRY(pg, indx)) + +/* + * Page space required to add a new RINTERNAL item to the page, with and + * without the index value. + */ +#define RINTERNAL_SIZE \ + ALIGN(sizeof(RINTERNAL), sizeof(u_int32_t)) +#define RINTERNAL_PSIZE \ + (RINTERNAL_SIZE + sizeof(db_indx_t)) + +#if defined(__cplusplus) +} +#endif + +#endif /* _DB_PAGE_H_ */ diff --git a/bdb/include/db_server.h b/bdb/include/db_server.h new file mode 100644 index 00000000000..e12fdb212d3 --- /dev/null +++ b/bdb/include/db_server.h @@ -0,0 +1,762 @@ +/* + * Please do not edit this file. + * It was generated using rpcgen. + */ + +#ifndef _DB_SERVER_H_RPCGEN +#define _DB_SERVER_H_RPCGEN + +#include <rpc/rpc.h> + +struct __env_cachesize_msg { + u_int dbenvcl_id; + u_int gbytes; + u_int bytes; + u_int ncache; +}; +typedef struct __env_cachesize_msg __env_cachesize_msg; + +struct __env_cachesize_reply { + u_int status; +}; +typedef struct __env_cachesize_reply __env_cachesize_reply; + +struct __env_close_msg { + u_int dbenvcl_id; + u_int flags; +}; +typedef struct __env_close_msg __env_close_msg; + +struct __env_close_reply { + u_int status; +}; +typedef struct __env_close_reply __env_close_reply; + +struct __env_create_msg { + u_int timeout; +}; +typedef struct __env_create_msg __env_create_msg; + +struct __env_create_reply { + u_int status; + u_int envcl_id; +}; +typedef struct __env_create_reply __env_create_reply; + +struct __env_flags_msg { + u_int dbenvcl_id; + u_int flags; + u_int onoff; +}; +typedef struct __env_flags_msg __env_flags_msg; + +struct __env_flags_reply { + u_int status; +}; +typedef struct __env_flags_reply __env_flags_reply; + +struct __env_open_msg { + u_int dbenvcl_id; + char *home; + u_int flags; + u_int mode; +}; +typedef struct __env_open_msg __env_open_msg; + +struct __env_open_reply { + u_int status; +}; +typedef struct __env_open_reply __env_open_reply; + +struct __env_remove_msg { + u_int dbenvcl_id; + char *home; + u_int flags; +}; +typedef struct __env_remove_msg __env_remove_msg; + +struct __env_remove_reply { + u_int status; +}; +typedef struct __env_remove_reply __env_remove_reply; + +struct __txn_abort_msg { + u_int txnpcl_id; +}; +typedef struct __txn_abort_msg __txn_abort_msg; + +struct __txn_abort_reply { + u_int status; +}; +typedef struct __txn_abort_reply __txn_abort_reply; + +struct __txn_begin_msg { + u_int envpcl_id; + u_int parentcl_id; + u_int flags; +}; +typedef struct __txn_begin_msg __txn_begin_msg; + +struct __txn_begin_reply { + u_int status; + u_int txnidcl_id; +}; +typedef struct __txn_begin_reply __txn_begin_reply; + +struct __txn_commit_msg { + u_int txnpcl_id; + u_int flags; +}; +typedef struct __txn_commit_msg __txn_commit_msg; + +struct __txn_commit_reply { + u_int status; +}; +typedef struct __txn_commit_reply __txn_commit_reply; + +struct __db_bt_maxkey_msg { + u_int dbpcl_id; + u_int maxkey; +}; +typedef struct __db_bt_maxkey_msg __db_bt_maxkey_msg; + +struct __db_bt_maxkey_reply { + u_int status; +}; +typedef struct __db_bt_maxkey_reply __db_bt_maxkey_reply; + +struct __db_bt_minkey_msg { + u_int dbpcl_id; + u_int minkey; +}; +typedef struct __db_bt_minkey_msg __db_bt_minkey_msg; + +struct __db_bt_minkey_reply { + u_int status; +}; +typedef struct __db_bt_minkey_reply __db_bt_minkey_reply; + +struct __db_close_msg { + u_int dbpcl_id; + u_int flags; +}; +typedef struct __db_close_msg __db_close_msg; + +struct __db_close_reply { + u_int status; +}; +typedef struct __db_close_reply __db_close_reply; + +struct __db_create_msg { + u_int flags; + u_int envpcl_id; +}; +typedef struct __db_create_msg __db_create_msg; + +struct __db_create_reply { + u_int status; + u_int dbpcl_id; +}; +typedef struct __db_create_reply __db_create_reply; + +struct __db_del_msg { + u_int dbpcl_id; + u_int txnpcl_id; + u_int keydlen; + u_int keydoff; + u_int keyflags; + struct { + u_int keydata_len; + char *keydata_val; + } keydata; + u_int flags; +}; +typedef struct __db_del_msg __db_del_msg; + +struct __db_del_reply { + u_int status; +}; +typedef struct __db_del_reply __db_del_reply; + +struct __db_extentsize_msg { + u_int dbpcl_id; + u_int extentsize; +}; +typedef struct __db_extentsize_msg __db_extentsize_msg; + +struct __db_extentsize_reply { + u_int status; +}; +typedef struct __db_extentsize_reply __db_extentsize_reply; + +struct __db_flags_msg { + u_int dbpcl_id; + u_int flags; +}; +typedef struct __db_flags_msg __db_flags_msg; + +struct __db_flags_reply { + u_int status; +}; +typedef struct __db_flags_reply __db_flags_reply; + +struct __db_get_msg { + u_int dbpcl_id; + u_int txnpcl_id; + u_int keydlen; + u_int keydoff; + u_int keyflags; + struct { + u_int keydata_len; + char *keydata_val; + } keydata; + u_int datadlen; + u_int datadoff; + u_int dataflags; + struct { + u_int datadata_len; + char *datadata_val; + } datadata; + u_int flags; +}; +typedef struct __db_get_msg __db_get_msg; + +struct __db_get_reply { + u_int status; + struct { + u_int keydata_len; + char *keydata_val; + } keydata; + struct { + u_int datadata_len; + char *datadata_val; + } datadata; +}; +typedef struct __db_get_reply __db_get_reply; + +struct __db_h_ffactor_msg { + u_int dbpcl_id; + u_int ffactor; +}; +typedef struct __db_h_ffactor_msg __db_h_ffactor_msg; + +struct __db_h_ffactor_reply { + u_int status; +}; +typedef struct __db_h_ffactor_reply __db_h_ffactor_reply; + +struct __db_h_nelem_msg { + u_int dbpcl_id; + u_int nelem; +}; +typedef struct __db_h_nelem_msg __db_h_nelem_msg; + +struct __db_h_nelem_reply { + u_int status; +}; +typedef struct __db_h_nelem_reply __db_h_nelem_reply; + +struct __db_key_range_msg { + u_int dbpcl_id; + u_int txnpcl_id; + u_int keydlen; + u_int keydoff; + u_int keyflags; + struct { + u_int keydata_len; + char *keydata_val; + } keydata; + u_int flags; +}; +typedef struct __db_key_range_msg __db_key_range_msg; + +struct __db_key_range_reply { + u_int status; + double less; + double equal; + double greater; +}; +typedef struct __db_key_range_reply __db_key_range_reply; + +struct __db_lorder_msg { + u_int dbpcl_id; + u_int lorder; +}; +typedef struct __db_lorder_msg __db_lorder_msg; + +struct __db_lorder_reply { + u_int status; +}; +typedef struct __db_lorder_reply __db_lorder_reply; + +struct __db_open_msg { + u_int dbpcl_id; + char *name; + char *subdb; + u_int type; + u_int flags; + u_int mode; +}; +typedef struct __db_open_msg __db_open_msg; + +struct __db_open_reply { + u_int status; + u_int type; + u_int dbflags; +}; +typedef struct __db_open_reply __db_open_reply; + +struct __db_pagesize_msg { + u_int dbpcl_id; + u_int pagesize; +}; +typedef struct __db_pagesize_msg __db_pagesize_msg; + +struct __db_pagesize_reply { + u_int status; +}; +typedef struct __db_pagesize_reply __db_pagesize_reply; + +struct __db_put_msg { + u_int dbpcl_id; + u_int txnpcl_id; + u_int keydlen; + u_int keydoff; + u_int keyflags; + struct { + u_int keydata_len; + char *keydata_val; + } keydata; + u_int datadlen; + u_int datadoff; + u_int dataflags; + struct { + u_int datadata_len; + char *datadata_val; + } datadata; + u_int flags; +}; +typedef struct __db_put_msg __db_put_msg; + +struct __db_put_reply { + u_int status; + struct { + u_int keydata_len; + char *keydata_val; + } keydata; +}; +typedef struct __db_put_reply __db_put_reply; + +struct __db_re_delim_msg { + u_int dbpcl_id; + u_int delim; +}; +typedef struct __db_re_delim_msg __db_re_delim_msg; + +struct __db_re_delim_reply { + u_int status; +}; +typedef struct __db_re_delim_reply __db_re_delim_reply; + +struct __db_re_len_msg { + u_int dbpcl_id; + u_int len; +}; +typedef struct __db_re_len_msg __db_re_len_msg; + +struct __db_re_len_reply { + u_int status; +}; +typedef struct __db_re_len_reply __db_re_len_reply; + +struct __db_re_pad_msg { + u_int dbpcl_id; + u_int pad; +}; +typedef struct __db_re_pad_msg __db_re_pad_msg; + +struct __db_re_pad_reply { + u_int status; +}; +typedef struct __db_re_pad_reply __db_re_pad_reply; + +struct __db_remove_msg { + u_int dbpcl_id; + char *name; + char *subdb; + u_int flags; +}; +typedef struct __db_remove_msg __db_remove_msg; + +struct __db_remove_reply { + u_int status; +}; +typedef struct __db_remove_reply __db_remove_reply; + +struct __db_rename_msg { + u_int dbpcl_id; + char *name; + char *subdb; + char *newname; + u_int flags; +}; +typedef struct __db_rename_msg __db_rename_msg; + +struct __db_rename_reply { + u_int status; +}; +typedef struct __db_rename_reply __db_rename_reply; + +struct __db_stat_msg { + u_int dbpcl_id; + u_int flags; +}; +typedef struct __db_stat_msg __db_stat_msg; + +struct __db_stat_statsreplist { + struct { + u_int ent_len; + char *ent_val; + } ent; + struct __db_stat_statsreplist *next; +}; +typedef struct __db_stat_statsreplist __db_stat_statsreplist; + +struct __db_stat_reply { + u_int status; + __db_stat_statsreplist *statslist; +}; +typedef struct __db_stat_reply __db_stat_reply; + +struct __db_swapped_msg { + u_int dbpcl_id; +}; +typedef struct __db_swapped_msg __db_swapped_msg; + +struct __db_swapped_reply { + u_int status; +}; +typedef struct __db_swapped_reply __db_swapped_reply; + +struct __db_sync_msg { + u_int dbpcl_id; + u_int flags; +}; +typedef struct __db_sync_msg __db_sync_msg; + +struct __db_sync_reply { + u_int status; +}; +typedef struct __db_sync_reply __db_sync_reply; + +struct __db_cursor_msg { + u_int dbpcl_id; + u_int txnpcl_id; + u_int flags; +}; +typedef struct __db_cursor_msg __db_cursor_msg; + +struct __db_cursor_reply { + u_int status; + u_int dbcidcl_id; +}; +typedef struct __db_cursor_reply __db_cursor_reply; + +struct __db_join_curslist { + struct { + u_int ent_len; + char *ent_val; + } ent; + struct __db_join_curslist *next; +}; +typedef struct __db_join_curslist __db_join_curslist; + +struct __db_join_msg { + u_int dbpcl_id; + __db_join_curslist *curslist; + u_int flags; +}; +typedef struct __db_join_msg __db_join_msg; + +struct __db_join_reply { + u_int status; + u_int dbcidcl_id; +}; +typedef struct __db_join_reply __db_join_reply; + +struct __dbc_close_msg { + u_int dbccl_id; +}; +typedef struct __dbc_close_msg __dbc_close_msg; + +struct __dbc_close_reply { + u_int status; +}; +typedef struct __dbc_close_reply __dbc_close_reply; + +struct __dbc_count_msg { + u_int dbccl_id; + u_int flags; +}; +typedef struct __dbc_count_msg __dbc_count_msg; + +struct __dbc_count_reply { + u_int status; + u_int dupcount; +}; +typedef struct __dbc_count_reply __dbc_count_reply; + +struct __dbc_del_msg { + u_int dbccl_id; + u_int flags; +}; +typedef struct __dbc_del_msg __dbc_del_msg; + +struct __dbc_del_reply { + u_int status; +}; +typedef struct __dbc_del_reply __dbc_del_reply; + +struct __dbc_dup_msg { + u_int dbccl_id; + u_int flags; +}; +typedef struct __dbc_dup_msg __dbc_dup_msg; + +struct __dbc_dup_reply { + u_int status; + u_int dbcidcl_id; +}; +typedef struct __dbc_dup_reply __dbc_dup_reply; + +struct __dbc_get_msg { + u_int dbccl_id; + u_int keydlen; + u_int keydoff; + u_int keyflags; + struct { + u_int keydata_len; + char *keydata_val; + } keydata; + u_int datadlen; + u_int datadoff; + u_int dataflags; + struct { + u_int datadata_len; + char *datadata_val; + } datadata; + u_int flags; +}; +typedef struct __dbc_get_msg __dbc_get_msg; + +struct __dbc_get_reply { + u_int status; + struct { + u_int keydata_len; + char *keydata_val; + } keydata; + struct { + u_int datadata_len; + char *datadata_val; + } datadata; +}; +typedef struct __dbc_get_reply __dbc_get_reply; + +struct __dbc_put_msg { + u_int dbccl_id; + u_int keydlen; + u_int keydoff; + u_int keyflags; + struct { + u_int keydata_len; + char *keydata_val; + } keydata; + u_int datadlen; + u_int datadoff; + u_int dataflags; + struct { + u_int datadata_len; + char *datadata_val; + } datadata; + u_int flags; +}; +typedef struct __dbc_put_msg __dbc_put_msg; + +struct __dbc_put_reply { + u_int status; + struct { + u_int keydata_len; + char *keydata_val; + } keydata; +}; +typedef struct __dbc_put_reply __dbc_put_reply; + +#define DB_SERVERPROG ((unsigned long)(351457)) +#define DB_SERVERVERS ((unsigned long)(1)) +#define __DB_env_cachesize ((unsigned long)(1)) +extern __env_cachesize_reply * __db_env_cachesize_1(); +#define __DB_env_close ((unsigned long)(2)) +extern __env_close_reply * __db_env_close_1(); +#define __DB_env_create ((unsigned long)(3)) +extern __env_create_reply * __db_env_create_1(); +#define __DB_env_flags ((unsigned long)(4)) +extern __env_flags_reply * __db_env_flags_1(); +#define __DB_env_open ((unsigned long)(5)) +extern __env_open_reply * __db_env_open_1(); +#define __DB_env_remove ((unsigned long)(6)) +extern __env_remove_reply * __db_env_remove_1(); +#define __DB_txn_abort ((unsigned long)(7)) +extern __txn_abort_reply * __db_txn_abort_1(); +#define __DB_txn_begin ((unsigned long)(8)) +extern __txn_begin_reply * __db_txn_begin_1(); +#define __DB_txn_commit ((unsigned long)(9)) +extern __txn_commit_reply * __db_txn_commit_1(); +#define __DB_db_bt_maxkey ((unsigned long)(10)) +extern __db_bt_maxkey_reply * __db_db_bt_maxkey_1(); +#define __DB_db_bt_minkey ((unsigned long)(11)) +extern __db_bt_minkey_reply * __db_db_bt_minkey_1(); +#define __DB_db_close ((unsigned long)(12)) +extern __db_close_reply * __db_db_close_1(); +#define __DB_db_create ((unsigned long)(13)) +extern __db_create_reply * __db_db_create_1(); +#define __DB_db_del ((unsigned long)(14)) +extern __db_del_reply * __db_db_del_1(); +#define __DB_db_extentsize ((unsigned long)(15)) +extern __db_extentsize_reply * __db_db_extentsize_1(); +#define __DB_db_flags ((unsigned long)(16)) +extern __db_flags_reply * __db_db_flags_1(); +#define __DB_db_get ((unsigned long)(17)) +extern __db_get_reply * __db_db_get_1(); +#define __DB_db_h_ffactor ((unsigned long)(18)) +extern __db_h_ffactor_reply * __db_db_h_ffactor_1(); +#define __DB_db_h_nelem ((unsigned long)(19)) +extern __db_h_nelem_reply * __db_db_h_nelem_1(); +#define __DB_db_key_range ((unsigned long)(20)) +extern __db_key_range_reply * __db_db_key_range_1(); +#define __DB_db_lorder ((unsigned long)(21)) +extern __db_lorder_reply * __db_db_lorder_1(); +#define __DB_db_open ((unsigned long)(22)) +extern __db_open_reply * __db_db_open_1(); +#define __DB_db_pagesize ((unsigned long)(23)) +extern __db_pagesize_reply * __db_db_pagesize_1(); +#define __DB_db_put ((unsigned long)(24)) +extern __db_put_reply * __db_db_put_1(); +#define __DB_db_re_delim ((unsigned long)(25)) +extern __db_re_delim_reply * __db_db_re_delim_1(); +#define __DB_db_re_len ((unsigned long)(26)) +extern __db_re_len_reply * __db_db_re_len_1(); +#define __DB_db_re_pad ((unsigned long)(27)) +extern __db_re_pad_reply * __db_db_re_pad_1(); +#define __DB_db_remove ((unsigned long)(28)) +extern __db_remove_reply * __db_db_remove_1(); +#define __DB_db_rename ((unsigned long)(29)) +extern __db_rename_reply * __db_db_rename_1(); +#define __DB_db_stat ((unsigned long)(30)) +extern __db_stat_reply * __db_db_stat_1(); +#define __DB_db_swapped ((unsigned long)(31)) +extern __db_swapped_reply * __db_db_swapped_1(); +#define __DB_db_sync ((unsigned long)(32)) +extern __db_sync_reply * __db_db_sync_1(); +#define __DB_db_cursor ((unsigned long)(33)) +extern __db_cursor_reply * __db_db_cursor_1(); +#define __DB_db_join ((unsigned long)(34)) +extern __db_join_reply * __db_db_join_1(); +#define __DB_dbc_close ((unsigned long)(35)) +extern __dbc_close_reply * __db_dbc_close_1(); +#define __DB_dbc_count ((unsigned long)(36)) +extern __dbc_count_reply * __db_dbc_count_1(); +#define __DB_dbc_del ((unsigned long)(37)) +extern __dbc_del_reply * __db_dbc_del_1(); +#define __DB_dbc_dup ((unsigned long)(38)) +extern __dbc_dup_reply * __db_dbc_dup_1(); +#define __DB_dbc_get ((unsigned long)(39)) +extern __dbc_get_reply * __db_dbc_get_1(); +#define __DB_dbc_put ((unsigned long)(40)) +extern __dbc_put_reply * __db_dbc_put_1(); +extern int db_serverprog_1_freeresult(); + +/* the xdr functions */ +extern bool_t xdr___env_cachesize_msg(); +extern bool_t xdr___env_cachesize_reply(); +extern bool_t xdr___env_close_msg(); +extern bool_t xdr___env_close_reply(); +extern bool_t xdr___env_create_msg(); +extern bool_t xdr___env_create_reply(); +extern bool_t xdr___env_flags_msg(); +extern bool_t xdr___env_flags_reply(); +extern bool_t xdr___env_open_msg(); +extern bool_t xdr___env_open_reply(); +extern bool_t xdr___env_remove_msg(); +extern bool_t xdr___env_remove_reply(); +extern bool_t xdr___txn_abort_msg(); +extern bool_t xdr___txn_abort_reply(); +extern bool_t xdr___txn_begin_msg(); +extern bool_t xdr___txn_begin_reply(); +extern bool_t xdr___txn_commit_msg(); +extern bool_t xdr___txn_commit_reply(); +extern bool_t xdr___db_bt_maxkey_msg(); +extern bool_t xdr___db_bt_maxkey_reply(); +extern bool_t xdr___db_bt_minkey_msg(); +extern bool_t xdr___db_bt_minkey_reply(); +extern bool_t xdr___db_close_msg(); +extern bool_t xdr___db_close_reply(); +extern bool_t xdr___db_create_msg(); +extern bool_t xdr___db_create_reply(); +extern bool_t xdr___db_del_msg(); +extern bool_t xdr___db_del_reply(); +extern bool_t xdr___db_extentsize_msg(); +extern bool_t xdr___db_extentsize_reply(); +extern bool_t xdr___db_flags_msg(); +extern bool_t xdr___db_flags_reply(); +extern bool_t xdr___db_get_msg(); +extern bool_t xdr___db_get_reply(); +extern bool_t xdr___db_h_ffactor_msg(); +extern bool_t xdr___db_h_ffactor_reply(); +extern bool_t xdr___db_h_nelem_msg(); +extern bool_t xdr___db_h_nelem_reply(); +extern bool_t xdr___db_key_range_msg(); +extern bool_t xdr___db_key_range_reply(); +extern bool_t xdr___db_lorder_msg(); +extern bool_t xdr___db_lorder_reply(); +extern bool_t xdr___db_open_msg(); +extern bool_t xdr___db_open_reply(); +extern bool_t xdr___db_pagesize_msg(); +extern bool_t xdr___db_pagesize_reply(); +extern bool_t xdr___db_put_msg(); +extern bool_t xdr___db_put_reply(); +extern bool_t xdr___db_re_delim_msg(); +extern bool_t xdr___db_re_delim_reply(); +extern bool_t xdr___db_re_len_msg(); +extern bool_t xdr___db_re_len_reply(); +extern bool_t xdr___db_re_pad_msg(); +extern bool_t xdr___db_re_pad_reply(); +extern bool_t xdr___db_remove_msg(); +extern bool_t xdr___db_remove_reply(); +extern bool_t xdr___db_rename_msg(); +extern bool_t xdr___db_rename_reply(); +extern bool_t xdr___db_stat_msg(); +extern bool_t xdr___db_stat_statsreplist(); +extern bool_t xdr___db_stat_reply(); +extern bool_t xdr___db_swapped_msg(); +extern bool_t xdr___db_swapped_reply(); +extern bool_t xdr___db_sync_msg(); +extern bool_t xdr___db_sync_reply(); +extern bool_t xdr___db_cursor_msg(); +extern bool_t xdr___db_cursor_reply(); +extern bool_t xdr___db_join_curslist(); +extern bool_t xdr___db_join_msg(); +extern bool_t xdr___db_join_reply(); +extern bool_t xdr___dbc_close_msg(); +extern bool_t xdr___dbc_close_reply(); +extern bool_t xdr___dbc_count_msg(); +extern bool_t xdr___dbc_count_reply(); +extern bool_t xdr___dbc_del_msg(); +extern bool_t xdr___dbc_del_reply(); +extern bool_t xdr___dbc_dup_msg(); +extern bool_t xdr___dbc_dup_reply(); +extern bool_t xdr___dbc_get_msg(); +extern bool_t xdr___dbc_get_reply(); +extern bool_t xdr___dbc_put_msg(); +extern bool_t xdr___dbc_put_reply(); + +#endif /* !_DB_SERVER_H_RPCGEN */ diff --git a/bdb/include/db_server_int.h b/bdb/include/db_server_int.h new file mode 100644 index 00000000000..69e88ea5aec --- /dev/null +++ b/bdb/include/db_server_int.h @@ -0,0 +1,85 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: db_server_int.h,v 1.13 2001/01/11 18:19:52 bostic Exp $ + */ + +#ifndef _DB_SERVER_INT_H_ +#define _DB_SERVER_INT_H_ + +#define DB_SERVER_TIMEOUT 300 /* 5 minutes */ +#define DB_SERVER_MAXTIMEOUT 1200 /* 20 minutes */ +#define DB_SERVER_IDLETIMEOUT 86400 /* 1 day */ + +#define CT_CURSOR 0x001 /* Cursor */ +#define CT_DB 0x002 /* Database */ +#define CT_ENV 0x004 /* Env */ +#define CT_TXN 0x008 /* Txn */ + +#define CT_JOIN 0x10000000 /* Join cursor component */ +#define CT_JOINCUR 0x20000000 /* Join cursor */ + +typedef struct home_entry home_entry; +struct home_entry { + LIST_ENTRY(home_entry) entries; + char *home; + char *dir; + char *name; +}; + +/* + * We maintain an activity timestamp for each handle. However, we + * set it to point, possibly to the ct_active field of its own handle + * or it may point to the ct_active field of a parent. In the case + * of nested transactions and any cursors within transactions it must + * point to the ct_active field of the ultimate parent of the transaction + * no matter how deeply it is nested. + */ +typedef struct ct_entry ct_entry; +struct ct_entry { + LIST_ENTRY(ct_entry) entries; /* List of entries */ + union { + DB_ENV *envp; /* H_ENV */ + DB_TXN *txnp; /* H_TXN */ + DB *dbp; /* H_DB */ + DBC *dbc; /* H_CURSOR */ + void *anyp; + } handle_u; + long ct_id; /* Client ID */ + long *ct_activep; /* Activity timestamp pointer*/ + long *ct_origp; /* Original timestamp pointer*/ + long ct_active; /* Activity timestamp */ + long ct_timeout; /* Resource timeout */ + long ct_idle; /* Idle timeout */ + u_int32_t ct_type; /* This entry's type */ + struct ct_entry *ct_parent; /* Its parent */ + struct ct_entry *ct_envparent; /* Its environment */ +}; + +#define ct_envp handle_u.envp +#define ct_txnp handle_u.txnp +#define ct_dbp handle_u.dbp +#define ct_dbc handle_u.dbc +#define ct_anyp handle_u.anyp + +extern int __dbsrv_verbose; + +/* + * Get ctp and activate it. + * Assumes local variable 'replyp'. + * NOTE: May 'return' from macro. + */ +#define ACTIVATE_CTP(ctp, id, type) { \ + (ctp) = get_tableent(id); \ + if ((ctp) == NULL) { \ + replyp->status = DB_NOSERVER_ID;\ + return; \ + } \ + DB_ASSERT((ctp)->ct_type & (type)); \ + __dbsrv_active(ctp); \ +} + +#endif /* _DB_SERVER_INT_H_ */ diff --git a/bdb/include/db_shash.h b/bdb/include/db_shash.h new file mode 100644 index 00000000000..0b9aac98f53 --- /dev/null +++ b/bdb/include/db_shash.h @@ -0,0 +1,77 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: db_shash.h,v 11.7 2000/12/12 17:43:56 bostic Exp $ + */ + +/* Hash Headers */ +typedef SH_TAILQ_HEAD(__hash_head) DB_HASHTAB; + +/* + * HASHLOOKUP -- + * + * Look up something in a shared memory hash table. The "elt" argument + * should be a key, and cmp_func must know how to compare a key to whatever + * structure it is that appears in the hash table. The comparison function + * + * begin: address of the beginning of the hash table. + * ndx: index into table for this item. + * type: the structure type of the elements that are linked in each bucket. + * field: the name of the field by which the "type" structures are linked. + * elt: the item for which we are searching in the hash table. + * res: the variable into which we'll store the element if we find it. + * cmp: called as: cmp(lookup_elt, table_elt). + * + * If the element is not in the hash table, this macro exits with res set + * to NULL. + */ +#define HASHLOOKUP(begin, ndx, type, field, elt, res, cmp) do { \ + DB_HASHTAB *__bucket; \ + \ + __bucket = &begin[ndx]; \ + for (res = SH_TAILQ_FIRST(__bucket, type); \ + res != NULL; res = SH_TAILQ_NEXT(res, field, type)) \ + if (cmp(elt, res)) \ + break; \ +} while (0) + +/* + * HASHINSERT -- + * + * Insert a new entry into the hash table. This assumes that you already + * have the bucket locked and that lookup has failed; don't call it if you + * haven't already called HASHLOOKUP. If you do, you could get duplicate + * entries. + * + * begin: the beginning address of the hash table. + * ndx: the index for this element. + * type: the structure type of the elements that are linked in each bucket. + * field: the name of the field by which the "type" structures are linked. + * elt: the item to be inserted. + */ +#define HASHINSERT(begin, ndx, type, field, elt) do { \ + DB_HASHTAB *__bucket; \ + \ + __bucket = &begin[ndx]; \ + SH_TAILQ_INSERT_HEAD(__bucket, elt, field, type); \ +} while (0) + +/* + * HASHREMOVE_EL -- + * Given the object "obj" in the table, remove it. + * + * begin: address of the beginning of the hash table. + * ndx: index into hash table of where this element belongs. + * type: the structure type of the elements that are linked in each bucket. + * field: the name of the field by which the "type" structures are linked. + * obj: the object in the table that we with to delete. + */ +#define HASHREMOVE_EL(begin, ndx, type, field, obj) { \ + DB_HASHTAB *__bucket; \ + \ + __bucket = &begin[ndx]; \ + SH_TAILQ_REMOVE(__bucket, obj, field, type); \ +} diff --git a/bdb/include/db_swap.h b/bdb/include/db_swap.h new file mode 100644 index 00000000000..bc96afb7a10 --- /dev/null +++ b/bdb/include/db_swap.h @@ -0,0 +1,115 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: db_swap.h,v 11.5 2000/03/28 16:14:36 bostic Exp $ + */ + +#ifndef _DB_SWAP_H_ +#define _DB_SWAP_H_ + +/* + * Little endian <==> big endian 32-bit swap macros. + * M_32_SWAP swap a memory location + * P_32_COPY copy potentially unaligned 4 byte quantities + * P_32_SWAP swap a referenced memory location + */ +#define M_32_SWAP(a) { \ + u_int32_t _tmp; \ + _tmp = a; \ + ((u_int8_t *)&a)[0] = ((u_int8_t *)&_tmp)[3]; \ + ((u_int8_t *)&a)[1] = ((u_int8_t *)&_tmp)[2]; \ + ((u_int8_t *)&a)[2] = ((u_int8_t *)&_tmp)[1]; \ + ((u_int8_t *)&a)[3] = ((u_int8_t *)&_tmp)[0]; \ +} +#define P_32_COPY(a, b) { \ + ((u_int8_t *)b)[0] = ((u_int8_t *)a)[0]; \ + ((u_int8_t *)b)[1] = ((u_int8_t *)a)[1]; \ + ((u_int8_t *)b)[2] = ((u_int8_t *)a)[2]; \ + ((u_int8_t *)b)[3] = ((u_int8_t *)a)[3]; \ +} +#define P_32_SWAP(a) { \ + u_int32_t _tmp; \ + P_32_COPY(a, &_tmp); \ + ((u_int8_t *)a)[0] = ((u_int8_t *)&_tmp)[3]; \ + ((u_int8_t *)a)[1] = ((u_int8_t *)&_tmp)[2]; \ + ((u_int8_t *)a)[2] = ((u_int8_t *)&_tmp)[1]; \ + ((u_int8_t *)a)[3] = ((u_int8_t *)&_tmp)[0]; \ +} + +/* + * Little endian <==> big endian 16-bit swap macros. + * M_16_SWAP swap a memory location + * P_16_COPY copy potentially unaligned 2 byte quantities + * P_16_SWAP swap a referenced memory location + */ +#define M_16_SWAP(a) { \ + u_int16_t _tmp; \ + _tmp = (u_int16_t)a; \ + ((u_int8_t *)&a)[0] = ((u_int8_t *)&_tmp)[1]; \ + ((u_int8_t *)&a)[1] = ((u_int8_t *)&_tmp)[0]; \ +} +#define P_16_COPY(a, b) { \ + ((u_int8_t *)b)[0] = ((u_int8_t *)a)[0]; \ + ((u_int8_t *)b)[1] = ((u_int8_t *)a)[1]; \ +} +#define P_16_SWAP(a) { \ + u_int16_t _tmp; \ + P_16_COPY(a, &_tmp); \ + ((u_int8_t *)a)[0] = ((u_int8_t *)&_tmp)[1]; \ + ((u_int8_t *)a)[1] = ((u_int8_t *)&_tmp)[0]; \ +} + +#define SWAP32(p) { \ + P_32_SWAP(p); \ + (p) += sizeof(u_int32_t); \ +} +#define SWAP16(p) { \ + P_16_SWAP(p); \ + (p) += sizeof(u_int16_t); \ +} + +/* + * DB has local versions of htonl() and ntohl() that only operate on pointers + * to the right size memory locations, the portability magic for finding the + * real ones isn't worth the effort. + */ +#if defined(WORDS_BIGENDIAN) +#define DB_HTONL(p) +#define DB_NTOHL(p) +#else +#define DB_HTONL(p) P_32_SWAP(p) +#define DB_NTOHL(p) P_32_SWAP(p) +#endif + +#endif /* !_DB_SWAP_H_ */ diff --git a/bdb/include/db_upgrade.h b/bdb/include/db_upgrade.h new file mode 100644 index 00000000000..d8d99645231 --- /dev/null +++ b/bdb/include/db_upgrade.h @@ -0,0 +1,174 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: db_upgrade.h,v 1.5 2000/11/16 23:40:56 ubell Exp $ + */ + +#ifndef _DB_UPGRADE_H_ +#define _DB_UPGRADE_H_ + +/* + * This file defines the metadata pages from the previous release. + * These structures are only used to upgrade old versions of databases. + */ + +/* Structures from the 3.1 release */ +/* + * QAM Meta data page structure + * + */ +typedef struct _qmeta31 { + DBMETA dbmeta; /* 00-71: Generic meta-data header. */ + + u_int32_t start; /* 72-75: Start offset. */ + u_int32_t first_recno; /* 76-79: First not deleted record. */ + u_int32_t cur_recno; /* 80-83: Last recno allocated. */ + u_int32_t re_len; /* 84-87: Fixed-length record length. */ + u_int32_t re_pad; /* 88-91: Fixed-length record pad. */ + u_int32_t rec_page; /* 92-95: Records Per Page. */ + + /* + * Minimum page size is 128. + */ +} QMETA31; + +/* Structures from the 3.0 release */ + +typedef struct _dbmeta30 { + DB_LSN lsn; /* 00-07: LSN. */ + db_pgno_t pgno; /* 08-11: Current page number. */ + u_int32_t magic; /* 12-15: Magic number. */ + u_int32_t version; /* 16-19: Version. */ + u_int32_t pagesize; /* 20-23: Pagesize. */ + u_int8_t unused1[1]; /* 24: Unused. */ + u_int8_t type; /* 25: Page type. */ + u_int8_t unused2[2]; /* 26-27: Unused. */ + u_int32_t free; /* 28-31: Free list page number. */ + u_int32_t flags; /* 32-35: Flags: unique to each AM. */ + /* 36-55: Unique file ID. */ + u_int8_t uid[DB_FILE_ID_LEN]; +} DBMETA30; + +/************************************************************************ + BTREE METADATA PAGE LAYOUT + ************************************************************************/ +typedef struct _btmeta30 { + DBMETA30 dbmeta; /* 00-55: Generic meta-data header. */ + + u_int32_t maxkey; /* 56-59: Btree: Maxkey. */ + u_int32_t minkey; /* 60-63: Btree: Minkey. */ + u_int32_t re_len; /* 64-67: Recno: fixed-length record length. */ + u_int32_t re_pad; /* 68-71: Recno: fixed-length record pad. */ + u_int32_t root; /* 72-75: Root page. */ + + /* + * Minimum page size is 128. + */ +} BTMETA30; + +/************************************************************************ + HASH METADATA PAGE LAYOUT + ************************************************************************/ +typedef struct _hashmeta30 { + DBMETA30 dbmeta; /* 00-55: Generic meta-data page header. */ + + u_int32_t max_bucket; /* 56-59: ID of Maximum bucket in use */ + u_int32_t high_mask; /* 60-63: Modulo mask into table */ + u_int32_t low_mask; /* 64-67: Modulo mask into table lower half */ + u_int32_t ffactor; /* 68-71: Fill factor */ + u_int32_t nelem; /* 72-75: Number of keys in hash table */ + u_int32_t h_charkey; /* 76-79: Value of hash(CHARKEY) */ +#define NCACHED30 32 /* number of spare points */ + /* 80-207: Spare pages for overflow */ + u_int32_t spares[NCACHED30]; + + /* + * Minimum page size is 256. + */ +} HMETA30; + +/************************************************************************ + QUEUE METADATA PAGE LAYOUT + ************************************************************************/ +/* + * QAM Meta data page structure + * + */ +typedef struct _qmeta30 { + DBMETA30 dbmeta; /* 00-55: Generic meta-data header. */ + + u_int32_t start; /* 56-59: Start offset. */ + u_int32_t first_recno; /* 60-63: First not deleted record. */ + u_int32_t cur_recno; /* 64-67: Last recno allocated. */ + u_int32_t re_len; /* 68-71: Fixed-length record length. */ + u_int32_t re_pad; /* 72-75: Fixed-length record pad. */ + u_int32_t rec_page; /* 76-79: Records Per Page. */ + + /* + * Minimum page size is 128. + */ +} QMETA30; + +/* Structures from Release 2.x */ + +/************************************************************************ + BTREE METADATA PAGE LAYOUT + ************************************************************************/ + +/* + * Btree metadata page layout: + */ +typedef struct _btmeta2X { + DB_LSN lsn; /* 00-07: LSN. */ + db_pgno_t pgno; /* 08-11: Current page number. */ + u_int32_t magic; /* 12-15: Magic number. */ + u_int32_t version; /* 16-19: Version. */ + u_int32_t pagesize; /* 20-23: Pagesize. */ + u_int32_t maxkey; /* 24-27: Btree: Maxkey. */ + u_int32_t minkey; /* 28-31: Btree: Minkey. */ + u_int32_t free; /* 32-35: Free list page number. */ + u_int32_t flags; /* 36-39: Flags. */ + u_int32_t re_len; /* 40-43: Recno: fixed-length record length. */ + u_int32_t re_pad; /* 44-47: Recno: fixed-length record pad. */ + /* 48-67: Unique file ID. */ + u_int8_t uid[DB_FILE_ID_LEN]; +} BTMETA2X; + +/************************************************************************ + HASH METADATA PAGE LAYOUT + ************************************************************************/ + +/* + * Hash metadata page layout: + */ +/* Hash Table Information */ +typedef struct hashhdr { /* Disk resident portion */ + DB_LSN lsn; /* 00-07: LSN of the header page */ + db_pgno_t pgno; /* 08-11: Page number (btree compatibility). */ + u_int32_t magic; /* 12-15: Magic NO for hash tables */ + u_int32_t version; /* 16-19: Version ID */ + u_int32_t pagesize; /* 20-23: Bucket/Page Size */ + u_int32_t ovfl_point; /* 24-27: Overflow page allocation location */ + u_int32_t last_freed; /* 28-31: Last freed overflow page pgno */ + u_int32_t max_bucket; /* 32-35: ID of Maximum bucket in use */ + u_int32_t high_mask; /* 36-39: Modulo mask into table */ + u_int32_t low_mask; /* 40-43: Modulo mask into table lower half */ + u_int32_t ffactor; /* 44-47: Fill factor */ + u_int32_t nelem; /* 48-51: Number of keys in hash table */ + u_int32_t h_charkey; /* 52-55: Value of hash(CHARKEY) */ + u_int32_t flags; /* 56-59: Allow duplicates. */ +#define NCACHED2X 32 /* number of spare points */ + /* 60-187: Spare pages for overflow */ + u_int32_t spares[NCACHED2X]; + /* 188-207: Unique file ID. */ + u_int8_t uid[DB_FILE_ID_LEN]; + + /* + * Minimum page size is 256. + */ +} HASHHDR; + +#endif diff --git a/bdb/include/db_verify.h b/bdb/include/db_verify.h new file mode 100644 index 00000000000..2507f1f1082 --- /dev/null +++ b/bdb/include/db_verify.h @@ -0,0 +1,191 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: db_verify.h,v 1.18 2000/12/31 17:51:52 bostic Exp $ + */ + +#ifndef _DB_VERIFY_H_ +#define _DB_VERIFY_H_ + +/* + * Structures and macros for the storage and retrieval of all information + * needed for inter-page verification of a database. + */ + +/* + * EPRINT is the macro for error printing. Takes as an arg the arg set + * for DB->err. + */ +#define EPRINT(x) \ + do { \ + if (!LF_ISSET(DB_SALVAGE)) \ + __db_err x; \ + } while (0) + +/* For fatal type errors--i.e., verifier bugs. */ +#define TYPE_ERR_PRINT(dbenv, func, pgno, ptype) \ + EPRINT(((dbenv), "%s called on nonsensical page %lu of type %lu", \ + (func), (u_long)(pgno), (u_long)(ptype))); + +/* Is x a power of two? (Tests true for zero, which doesn't matter here.) */ +#define POWER_OF_TWO(x) (((x) & ((x) - 1)) == 0) + +#define IS_VALID_PAGESIZE(x) \ + (POWER_OF_TWO(x) && (x) >= DB_MIN_PGSIZE && ((x) <= DB_MAX_PGSIZE)) + +/* + * Note that 0 is, in general, a valid pgno, despite equalling PGNO_INVALID; + * we have to test it separately where it's not appropriate. + */ +#define IS_VALID_PGNO(x) ((x) <= vdp->last_pgno) + +/* + * Flags understood by the btree structure checks (esp. __bam_vrfy_subtree). + * These share the same space as the global flags to __db_verify, and must not + * dip below 0x00010000. + */ +#define ST_DUPOK 0x00010000 /* Duplicates are acceptable. */ +#define ST_DUPSET 0x00020000 /* Subtree is in a duplicate tree. */ +#define ST_DUPSORT 0x00040000 /* Duplicates are sorted. */ +#define ST_IS_RECNO 0x00080000 /* Subtree is a recno. */ +#define ST_OVFL_LEAF 0x00100000 /* Overflow reffed from leaf page. */ +#define ST_RECNUM 0x00200000 /* Subtree has record numbering on. */ +#define ST_RELEN 0x00400000 /* Subtree has fixed-length records. */ +#define ST_TOPLEVEL 0x00800000 /* Subtree == entire tree */ + +/* + * Flags understood by __bam_salvage and __db_salvage. These need not share + * the same space with the __bam_vrfy_subtree flags, but must share with + * __db_verify. + */ +#define SA_SKIPFIRSTKEY 0x00080000 + +/* + * VRFY_DBINFO is the fundamental structure; it either represents the database + * of subdatabases, or the sole database if there are no subdatabases. + */ +struct __vrfy_dbinfo { + /* Info about this database in particular. */ + DBTYPE type; + + /* List of subdatabase meta pages, if any. */ + LIST_HEAD(__subdbs, __vrfy_childinfo) subdbs; + + /* File-global info--stores VRFY_PAGEINFOs for each page. */ + DB *pgdbp; + + /* Child database--stores VRFY_CHILDINFOs of each page. */ + DB *cdbp; + + /* Page info structures currently in use. */ + LIST_HEAD(__activepips, __vrfy_pageinfo) activepips; + + /* + * DB we use to keep track of which pages are linked somehow + * during verification. 0 is the default, "unseen"; 1 is seen. + */ + DB *pgset; + + /* + * This is a database we use during salvaging to keep track of which + * overflow and dup pages we need to come back to at the end and print + * with key "UNKNOWN". Pages which print with a good key get set + * to SALVAGE_IGNORE; others get set, as appropriate, to SALVAGE_LDUP, + * SALVAGE_LRECNODUP, SALVAGE_OVERFLOW for normal db overflow pages, + * and SALVAGE_BTREE, SALVAGE_LRECNO, and SALVAGE_HASH for subdb + * pages. + */ +#define SALVAGE_INVALID 0 +#define SALVAGE_IGNORE 1 +#define SALVAGE_LDUP 2 +#define SALVAGE_LRECNODUP 3 +#define SALVAGE_OVERFLOW 4 +#define SALVAGE_LBTREE 5 +#define SALVAGE_HASH 6 +#define SALVAGE_LRECNO 7 + DB *salvage_pages; + + db_pgno_t last_pgno; + db_pgno_t pgs_remaining; /* For dbp->db_feedback(). */ + + /* Queue needs these to verify data pages in the first pass. */ + u_int32_t re_len; + u_int32_t rec_page; + +#define SALVAGE_PRINTHEADER 0x01 +#define SALVAGE_PRINTFOOTER 0x02 + u_int32_t flags; +}; /* VRFY_DBINFO */ + +/* + * The amount of state information we need per-page is small enough that + * it's not worth the trouble to define separate structures for each + * possible type of page, and since we're doing verification with these we + * have to be open to the possibility that page N will be of a completely + * unexpected type anyway. So we define one structure here with all the + * info we need for inter-page verification. + */ +struct __vrfy_pageinfo { + u_int8_t type; + u_int8_t bt_level; + u_int8_t unused1; + u_int8_t unused2; + db_pgno_t pgno; + db_pgno_t prev_pgno; + db_pgno_t next_pgno; + + /* meta pages */ + db_pgno_t root; + db_pgno_t free; /* Free list head. */ + + db_indx_t entries; /* Actual number of entries. */ + u_int16_t unused; + db_recno_t rec_cnt; /* Record count. */ + u_int32_t re_len; /* Record length. */ + u_int32_t bt_minkey; + u_int32_t bt_maxkey; + u_int32_t h_ffactor; + u_int32_t h_nelem; + + /* overflow pages */ + /* + * Note that refcount is the refcount for an overflow page; pi_refcount + * is this structure's own refcount! + */ + u_int32_t refcount; + u_int32_t olen; + +#define VRFY_DUPS_UNSORTED 0x0001 /* Have to flag the negative! */ +#define VRFY_HAS_DUPS 0x0002 +#define VRFY_HAS_DUPSORT 0x0004 /* Has the flag set. */ +#define VRFY_HAS_SUBDBS 0x0008 +#define VRFY_HAS_RECNUMS 0x0010 +#define VRFY_INCOMPLETE 0x0020 /* Meta or item order checks incomp. */ +#define VRFY_IS_ALLZEROES 0x0040 /* Hash page we haven't touched? */ +#define VRFY_IS_FIXEDLEN 0x0080 +#define VRFY_IS_RECNO 0x0100 +#define VRFY_IS_RRECNO 0x0200 +#define VRFY_OVFL_LEAFSEEN 0x0400 + u_int32_t flags; + + LIST_ENTRY(__vrfy_pageinfo) links; + u_int32_t pi_refcount; +}; /* VRFY_PAGEINFO */ + +struct __vrfy_childinfo { + db_pgno_t pgno; + +#define V_DUPLICATE 1 /* off-page dup metadata */ +#define V_OVERFLOW 2 /* overflow page */ +#define V_RECNO 3 /* btree internal or leaf page */ + u_int32_t type; + db_recno_t nrecs; /* record count on a btree subtree */ + u_int32_t tlen; /* ovfl. item total size */ + + LIST_ENTRY(__vrfy_childinfo) links; +}; /* VRFY_CHILDINFO */ + +#endif /* _DB_VERIFY_H_ */ diff --git a/bdb/include/debug.h b/bdb/include/debug.h new file mode 100644 index 00000000000..9a3ffc1acb6 --- /dev/null +++ b/bdb/include/debug.h @@ -0,0 +1,104 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: debug.h,v 11.17 2000/07/07 15:50:36 bostic Exp $ + */ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * When running with #DIAGNOSTIC defined, we smash memory and do memory + * guarding with a special byte value. + */ +#define CLEAR_BYTE 0xdb +#define GUARD_BYTE 0xdc + +/* + * DB assertions. + */ +#if defined(DIAGNOSTIC) && defined(__STDC__) +#define DB_ASSERT(e) ((e) ? (void)0 : __db_assert(#e, __FILE__, __LINE__)) +#else +#define DB_ASSERT(e) ((void)0) +#endif + +/* + * Purify and other run-time tools complain about uninitialized reads/writes + * of structure fields whose only purpose is padding, as well as when heap + * memory that was never initialized is written to disk. + */ +#ifdef UMRW +#define UMRW_SET(v) (v) = 0 +#else +#define UMRW_SET(v) +#endif + +/* + * Debugging macro to log operations. + * If DEBUG_WOP is defined, log operations that modify the database. + * If DEBUG_ROP is defined, log operations that read the database. + * + * D dbp + * T txn + * O operation (string) + * K key + * A data + * F flags + */ +#define LOG_OP(C, T, O, K, A, F) { \ + DB_LSN __lsn; \ + DBT __op; \ + if (DB_LOGGING((C))) { \ + memset(&__op, 0, sizeof(__op)); \ + __op.data = O; \ + __op.size = strlen(O) + 1; \ + (void)__db_debug_log((C)->dbp->dbenv, \ + T, &__lsn, 0, &__op, (C)->dbp->log_fileid, K, A, F);\ + } \ +} +#ifdef DEBUG_ROP +#define DEBUG_LREAD(C, T, O, K, A, F) LOG_OP(C, T, O, K, A, F) +#else +#define DEBUG_LREAD(C, T, O, K, A, F) +#endif +#ifdef DEBUG_WOP +#define DEBUG_LWRITE(C, T, O, K, A, F) LOG_OP(C, T, O, K, A, F) +#else +#define DEBUG_LWRITE(C, T, O, K, A, F) +#endif + +/* + * Hook for testing recovery at various places in the create/delete paths. + */ +#if CONFIG_TEST +#define DB_TEST_RECOVERY(dbp, val, ret, name) \ +do { \ + int __ret; \ + PANIC_CHECK((dbp)->dbenv); \ + if ((dbp)->dbenv->test_copy == (val)) { \ + /* COPY the FILE */ \ + if (F_ISSET((dbp), DB_OPEN_CALLED) && (dbp)->mpf != NULL) \ + (void)(dbp)->sync((dbp), 0); \ + if ((__ret = __db_testcopy((dbp), (name))) != 0) \ + (ret) = __db_panic((dbp)->dbenv, __ret); \ + } \ + if ((dbp)->dbenv->test_abort == (val)) { \ + /* ABORT the TXN */ \ + (ret) = EINVAL; \ + goto db_tr_err; \ + } \ +} while (0) +#define DB_TEST_RECOVERY_LABEL db_tr_err: +#else +#define DB_TEST_RECOVERY(dbp, val, ret, name) +#define DB_TEST_RECOVERY_LABEL +#endif + +#if defined(__cplusplus) +} +#endif diff --git a/bdb/include/env_ext.h b/bdb/include/env_ext.h new file mode 100644 index 00000000000..0e7313fde9d --- /dev/null +++ b/bdb/include/env_ext.h @@ -0,0 +1,35 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _env_ext_h_ +#define _env_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +void __db_shalloc_init __P((void *, size_t)); +int __db_shalloc_size __P((size_t, size_t)); +int __db_shalloc __P((void *, size_t, size_t, void *)); +void __db_shalloc_free __P((void *, void *)); +size_t __db_shalloc_count __P((void *)); +size_t __db_shsizeof __P((void *)); +void __db_shalloc_dump __P((void *, FILE *)); +int __db_tablesize __P((u_int32_t)); +void __db_hashinit __P((void *, u_int32_t)); +int __dbenv_init __P((DB_ENV *)); +int __db_mi_env __P((DB_ENV *, const char *)); +int __db_mi_open __P((DB_ENV *, const char *, int)); +int __db_env_config __P((DB_ENV *, int)); +int __dbenv_open __P((DB_ENV *, const char *, u_int32_t, int)); +int __dbenv_remove __P((DB_ENV *, const char *, u_int32_t)); +int __dbenv_close __P((DB_ENV *, u_int32_t)); +int __db_appname __P((DB_ENV *, APPNAME, + const char *, const char *, u_int32_t, DB_FH *, char **)); +int __db_apprec __P((DB_ENV *, u_int32_t)); +int __db_e_attach __P((DB_ENV *, u_int32_t *)); +int __db_e_detach __P((DB_ENV *, int)); +int __db_e_remove __P((DB_ENV *, int)); +int __db_e_stat __P((DB_ENV *, REGENV *, REGION *, int *)); +int __db_r_attach __P((DB_ENV *, REGINFO *, size_t)); +int __db_r_detach __P((DB_ENV *, REGINFO *, int)); +#if defined(__cplusplus) +} +#endif +#endif /* _env_ext_h_ */ diff --git a/bdb/include/gen_client_ext.h b/bdb/include/gen_client_ext.h new file mode 100644 index 00000000000..5675b74d3ec --- /dev/null +++ b/bdb/include/gen_client_ext.h @@ -0,0 +1,121 @@ +/* Do not edit: automatically built by gen_rpc.awk. */ +int __dbcl_env_cachesize __P((DB_ENV *, u_int32_t, u_int32_t, int)); +int __dbcl_env_close __P((DB_ENV *, u_int32_t)); +int __dbcl_env_close_ret __P((DB_ENV *, u_int32_t, __env_close_reply *)); +int __dbcl_rpc_illegal __P((DB_ENV *, char *)); +int __dbcl_set_data_dir __P((DB_ENV *, const char *)); +int __dbcl_env_set_feedback __P((DB_ENV *, void (*)(DB_ENV *, int, int))); +int __dbcl_env_flags __P((DB_ENV *, u_int32_t, int)); +int __dbcl_set_lg_bsize __P((DB_ENV *, u_int32_t)); +int __dbcl_set_lg_dir __P((DB_ENV *, const char *)); +int __dbcl_set_lg_max __P((DB_ENV *, u_int32_t)); +int __dbcl_set_lk_conflict __P((DB_ENV *, u_int8_t *, int)); +int __dbcl_set_lk_detect __P((DB_ENV *, u_int32_t)); +int __dbcl_set_lk_max __P((DB_ENV *, u_int32_t)); +int __dbcl_set_lk_max_locks __P((DB_ENV *, u_int32_t)); +int __dbcl_set_lk_max_lockers __P((DB_ENV *, u_int32_t)); +int __dbcl_set_lk_max_objects __P((DB_ENV *, u_int32_t)); +int __dbcl_set_mp_mmapsize __P((DB_ENV *, size_t)); +int __dbcl_set_mutex_locks __P((DB_ENV *, int)); +int __dbcl_env_open __P((DB_ENV *, const char *, u_int32_t, int)); +int __dbcl_env_open_ret __P((DB_ENV *, const char *, u_int32_t, int, __env_open_reply *)); +int __dbcl_env_paniccall __P((DB_ENV *, void (*)(DB_ENV *, int))); +int __dbcl_set_recovery_init __P((DB_ENV *, int (*)(DB_ENV *))); +int __dbcl_env_remove __P((DB_ENV *, const char *, u_int32_t)); +int __dbcl_env_remove_ret __P((DB_ENV *, const char *, u_int32_t, __env_remove_reply *)); +int __dbcl_set_shm_key __P((DB_ENV *, long)); +int __dbcl_set_tmp_dir __P((DB_ENV *, const char *)); +int __dbcl_set_tx_recover __P((DB_ENV *, int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops))); +int __dbcl_set_tx_max __P((DB_ENV *, u_int32_t)); +int __dbcl_set_tx_timestamp __P((DB_ENV *, time_t *)); +int __dbcl_set_verbose __P((DB_ENV *, u_int32_t, int)); +int __dbcl_txn_abort __P((DB_TXN *)); +int __dbcl_txn_abort_ret __P((DB_TXN *, __txn_abort_reply *)); +int __dbcl_txn_begin __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t)); +int __dbcl_txn_begin_ret __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t, __txn_begin_reply *)); +int __dbcl_txn_checkpoint __P((DB_ENV *, u_int32_t, u_int32_t)); +int __dbcl_txn_commit __P((DB_TXN *, u_int32_t)); +int __dbcl_txn_commit_ret __P((DB_TXN *, u_int32_t, __txn_commit_reply *)); +int __dbcl_txn_prepare __P((DB_TXN *)); +int __dbcl_txn_stat __P((DB_ENV *, DB_TXN_STAT **, void *(*)(size_t))); +int __dbcl_db_bt_compare __P((DB *, int (*)(DB *, const DBT *, const DBT *))); +int __dbcl_db_bt_maxkey __P((DB *, u_int32_t)); +int __dbcl_db_bt_minkey __P((DB *, u_int32_t)); +int __dbcl_db_bt_prefix __P((DB *, size_t(*)(DB *, const DBT *, const DBT *))); +int __dbcl_db_set_append_recno __P((DB *, int (*)(DB *, DBT *, db_recno_t))); +int __dbcl_db_cachesize __P((DB *, u_int32_t, u_int32_t, int)); +int __dbcl_db_close __P((DB *, u_int32_t)); +int __dbcl_db_close_ret __P((DB *, u_int32_t, __db_close_reply *)); +int __dbcl_db_del __P((DB *, DB_TXN *, DBT *, u_int32_t)); +int __dbcl_db_extentsize __P((DB *, u_int32_t)); +int __dbcl_db_fd __P((DB *, int *)); +int __dbcl_db_feedback __P((DB *, void (*)(DB *, int, int))); +int __dbcl_db_flags __P((DB *, u_int32_t)); +int __dbcl_db_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); +int __dbcl_db_get_ret __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t, __db_get_reply *)); +int __dbcl_db_h_ffactor __P((DB *, u_int32_t)); +int __dbcl_db_h_hash __P((DB *, u_int32_t(*)(DB *, const void *, u_int32_t))); +int __dbcl_db_h_nelem __P((DB *, u_int32_t)); +int __dbcl_db_key_range __P((DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t)); +int __dbcl_db_key_range_ret __P((DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t, __db_key_range_reply *)); +int __dbcl_db_lorder __P((DB *, int)); +int __dbcl_db_malloc __P((DB *, void *(*)(size_t))); +int __dbcl_db_open __P((DB *, const char *, const char *, DBTYPE, u_int32_t, int)); +int __dbcl_db_open_ret __P((DB *, const char *, const char *, DBTYPE, u_int32_t, int, __db_open_reply *)); +int __dbcl_db_pagesize __P((DB *, u_int32_t)); +int __dbcl_db_panic __P((DB *, void (*)(DB_ENV *, int))); +int __dbcl_db_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); +int __dbcl_db_put_ret __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t, __db_put_reply *)); +int __dbcl_db_realloc __P((DB *, void *(*)(void *, size_t))); +int __dbcl_db_re_delim __P((DB *, int)); +int __dbcl_db_re_len __P((DB *, u_int32_t)); +int __dbcl_db_re_pad __P((DB *, int)); +int __dbcl_db_re_source __P((DB *, const char *)); +int __dbcl_db_remove __P((DB *, const char *, const char *, u_int32_t)); +int __dbcl_db_remove_ret __P((DB *, const char *, const char *, u_int32_t, __db_remove_reply *)); +int __dbcl_db_rename __P((DB *, const char *, const char *, const char *, u_int32_t)); +int __dbcl_db_rename_ret __P((DB *, const char *, const char *, const char *, u_int32_t, __db_rename_reply *)); +int __dbcl_db_stat __P((DB *, void *, void *(*)(size_t), u_int32_t)); +int __dbcl_db_stat_ret __P((DB *, void *, void *(*)(size_t), u_int32_t, __db_stat_reply *)); +int __dbcl_db_swapped __P((DB *)); +int __dbcl_db_sync __P((DB *, u_int32_t)); +int __dbcl_db_upgrade __P((DB *, const char *, u_int32_t)); +int __dbcl_db_cursor __P((DB *, DB_TXN *, DBC **, u_int32_t)); +int __dbcl_db_cursor_ret __P((DB *, DB_TXN *, DBC **, u_int32_t, __db_cursor_reply *)); +int __dbcl_db_join __P((DB *, DBC **, DBC **, u_int32_t)); +int __dbcl_db_join_ret __P((DB *, DBC **, DBC **, u_int32_t, __db_join_reply *)); +int __dbcl_dbc_close __P((DBC *)); +int __dbcl_dbc_close_ret __P((DBC *, __dbc_close_reply *)); +int __dbcl_dbc_count __P((DBC *, db_recno_t *, u_int32_t)); +int __dbcl_dbc_count_ret __P((DBC *, db_recno_t *, u_int32_t, __dbc_count_reply *)); +int __dbcl_dbc_del __P((DBC *, u_int32_t)); +int __dbcl_dbc_dup __P((DBC *, DBC **, u_int32_t)); +int __dbcl_dbc_dup_ret __P((DBC *, DBC **, u_int32_t, __dbc_dup_reply *)); +int __dbcl_dbc_get __P((DBC *, DBT *, DBT *, u_int32_t)); +int __dbcl_dbc_get_ret __P((DBC *, DBT *, DBT *, u_int32_t, __dbc_get_reply *)); +int __dbcl_dbc_put __P((DBC *, DBT *, DBT *, u_int32_t)); +int __dbcl_dbc_put_ret __P((DBC *, DBT *, DBT *, u_int32_t, __dbc_put_reply *)); +int __dbcl_lock_detect __P((DB_ENV *, u_int32_t, u_int32_t, int *)); +int __dbcl_lock_get __P((DB_ENV *, u_int32_t, u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *)); +int __dbcl_lock_id __P((DB_ENV *, u_int32_t *)); +int __dbcl_lock_put __P((DB_ENV *, DB_LOCK *)); +int __dbcl_lock_stat __P((DB_ENV *, DB_LOCK_STAT **, void *(*)(size_t))); +int __dbcl_lock_vec __P((DB_ENV *, u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **)); +int __dbcl_log_archive __P((DB_ENV *, char ***, u_int32_t, void *(*)(size_t))); +int __dbcl_log_file __P((DB_ENV *, const DB_LSN *, char *, size_t)); +int __dbcl_log_flush __P((DB_ENV *, const DB_LSN *)); +int __dbcl_log_get __P((DB_ENV *, DB_LSN *, DBT *, u_int32_t)); +int __dbcl_log_put __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t)); +int __dbcl_log_register __P((DB_ENV *, DB *, const char *)); +int __dbcl_log_stat __P((DB_ENV *, DB_LOG_STAT **, void *(*)(size_t))); +int __dbcl_log_unregister __P((DB_ENV *, DB *)); +int __dbcl_memp_fclose __P((DB_MPOOLFILE *)); +int __dbcl_memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, u_int32_t, void **)); +int __dbcl_memp_fopen __P((DB_ENV *, const char *, u_int32_t, int, size_t, DB_MPOOL_FINFO *, DB_MPOOLFILE **)); +int __dbcl_memp_fput __P((DB_MPOOLFILE *, void *, u_int32_t)); +int __dbcl_memp_fset __P((DB_MPOOLFILE *, void *, u_int32_t)); +int __dbcl_memp_fsync __P((DB_MPOOLFILE *)); +int __dbcl_memp_register __P((DB_ENV *, int, int (*)(DB_ENV *, db_pgno_t, void *, DBT *), int (*)(DB_ENV *, db_pgno_t, void *, DBT *))); +int __dbcl_memp_stat __P((DB_ENV *, DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, void *(*)(size_t))); +int __dbcl_memp_sync __P((DB_ENV *, DB_LSN *)); +int __dbcl_memp_trickle __P((DB_ENV *, int, int *)); diff --git a/bdb/include/gen_server_ext.h b/bdb/include/gen_server_ext.h new file mode 100644 index 00000000000..9037d908f17 --- /dev/null +++ b/bdb/include/gen_server_ext.h @@ -0,0 +1,106 @@ +/* Do not edit: automatically built by gen_rpc.awk. */ +__env_cachesize_reply * __db_env_cachesize_1 __P((__env_cachesize_msg *)); +void __env_cachesize_1_proc __P((long, u_int32_t, u_int32_t, + u_int32_t, __env_cachesize_reply *)); +__env_close_reply * __db_env_close_1 __P((__env_close_msg *)); +void __env_close_1_proc __P((long, u_int32_t, __env_close_reply *)); +__env_create_reply * __db_env_create_1 __P((__env_create_msg *)); +void __env_create_1_proc __P((u_int32_t, __env_create_reply *)); +__env_flags_reply * __db_env_flags_1 __P((__env_flags_msg *)); +void __env_flags_1_proc __P((long, u_int32_t, u_int32_t, __env_flags_reply *)); +__env_open_reply * __db_env_open_1 __P((__env_open_msg *)); +void __env_open_1_proc __P((long, char *, u_int32_t, + u_int32_t, __env_open_reply *)); +__env_remove_reply * __db_env_remove_1 __P((__env_remove_msg *)); +void __env_remove_1_proc __P((long, char *, u_int32_t, __env_remove_reply *)); +__txn_abort_reply * __db_txn_abort_1 __P((__txn_abort_msg *)); +void __txn_abort_1_proc __P((long, __txn_abort_reply *)); +__txn_begin_reply * __db_txn_begin_1 __P((__txn_begin_msg *)); +void __txn_begin_1_proc __P((long, long, + u_int32_t, __txn_begin_reply *)); +__txn_commit_reply * __db_txn_commit_1 __P((__txn_commit_msg *)); +void __txn_commit_1_proc __P((long, u_int32_t, __txn_commit_reply *)); +__db_bt_maxkey_reply * __db_db_bt_maxkey_1 __P((__db_bt_maxkey_msg *)); +void __db_bt_maxkey_1_proc __P((long, u_int32_t, __db_bt_maxkey_reply *)); +__db_bt_minkey_reply * __db_db_bt_minkey_1 __P((__db_bt_minkey_msg *)); +void __db_bt_minkey_1_proc __P((long, u_int32_t, __db_bt_minkey_reply *)); +__db_close_reply * __db_db_close_1 __P((__db_close_msg *)); +void __db_close_1_proc __P((long, u_int32_t, __db_close_reply *)); +__db_create_reply * __db_db_create_1 __P((__db_create_msg *)); +void __db_create_1_proc __P((u_int32_t, long, __db_create_reply *)); +__db_del_reply * __db_db_del_1 __P((__db_del_msg *)); +void __db_del_1_proc __P((long, long, u_int32_t, + u_int32_t, u_int32_t, void *, u_int32_t, + u_int32_t, __db_del_reply *)); +__db_extentsize_reply * __db_db_extentsize_1 __P((__db_extentsize_msg *)); +void __db_extentsize_1_proc __P((long, u_int32_t, __db_extentsize_reply *)); +__db_flags_reply * __db_db_flags_1 __P((__db_flags_msg *)); +void __db_flags_1_proc __P((long, u_int32_t, __db_flags_reply *)); +__db_get_reply * __db_db_get_1 __P((__db_get_msg *)); +void __db_get_1_proc __P((long, long, u_int32_t, + u_int32_t, u_int32_t, void *, u_int32_t, + u_int32_t, u_int32_t, u_int32_t, void *, + u_int32_t, u_int32_t, __db_get_reply *, int *)); +__db_h_ffactor_reply * __db_db_h_ffactor_1 __P((__db_h_ffactor_msg *)); +void __db_h_ffactor_1_proc __P((long, u_int32_t, __db_h_ffactor_reply *)); +__db_h_nelem_reply * __db_db_h_nelem_1 __P((__db_h_nelem_msg *)); +void __db_h_nelem_1_proc __P((long, u_int32_t, __db_h_nelem_reply *)); +__db_key_range_reply * __db_db_key_range_1 __P((__db_key_range_msg *)); +void __db_key_range_1_proc __P((long, long, u_int32_t, + u_int32_t, u_int32_t, void *, u_int32_t, + u_int32_t, __db_key_range_reply *)); +__db_lorder_reply * __db_db_lorder_1 __P((__db_lorder_msg *)); +void __db_lorder_1_proc __P((long, u_int32_t, __db_lorder_reply *)); +__db_open_reply * __db_db_open_1 __P((__db_open_msg *)); +void __db_open_1_proc __P((long, char *, char *, + u_int32_t, u_int32_t, u_int32_t, __db_open_reply *)); +__db_pagesize_reply * __db_db_pagesize_1 __P((__db_pagesize_msg *)); +void __db_pagesize_1_proc __P((long, u_int32_t, __db_pagesize_reply *)); +__db_put_reply * __db_db_put_1 __P((__db_put_msg *)); +void __db_put_1_proc __P((long, long, u_int32_t, + u_int32_t, u_int32_t, void *, u_int32_t, + u_int32_t, u_int32_t, u_int32_t, void *, + u_int32_t, u_int32_t, __db_put_reply *, int *)); +__db_re_delim_reply * __db_db_re_delim_1 __P((__db_re_delim_msg *)); +void __db_re_delim_1_proc __P((long, u_int32_t, __db_re_delim_reply *)); +__db_re_len_reply * __db_db_re_len_1 __P((__db_re_len_msg *)); +void __db_re_len_1_proc __P((long, u_int32_t, __db_re_len_reply *)); +__db_re_pad_reply * __db_db_re_pad_1 __P((__db_re_pad_msg *)); +void __db_re_pad_1_proc __P((long, u_int32_t, __db_re_pad_reply *)); +__db_remove_reply * __db_db_remove_1 __P((__db_remove_msg *)); +void __db_remove_1_proc __P((long, char *, char *, + u_int32_t, __db_remove_reply *)); +__db_rename_reply * __db_db_rename_1 __P((__db_rename_msg *)); +void __db_rename_1_proc __P((long, char *, char *, + char *, u_int32_t, __db_rename_reply *)); +__db_stat_reply * __db_db_stat_1 __P((__db_stat_msg *)); +void __db_stat_1_proc __P((long, + u_int32_t, __db_stat_reply *, int *)); +__db_swapped_reply * __db_db_swapped_1 __P((__db_swapped_msg *)); +void __db_swapped_1_proc __P((long, __db_swapped_reply *)); +__db_sync_reply * __db_db_sync_1 __P((__db_sync_msg *)); +void __db_sync_1_proc __P((long, u_int32_t, __db_sync_reply *)); +__db_cursor_reply * __db_db_cursor_1 __P((__db_cursor_msg *)); +void __db_cursor_1_proc __P((long, long, + u_int32_t, __db_cursor_reply *)); +__db_join_reply * __db_db_join_1 __P((__db_join_msg *)); +void __db_join_1_proc __P((long, u_int32_t *, + u_int32_t, __db_join_reply *)); +__dbc_close_reply * __db_dbc_close_1 __P((__dbc_close_msg *)); +void __dbc_close_1_proc __P((long, __dbc_close_reply *)); +__dbc_count_reply * __db_dbc_count_1 __P((__dbc_count_msg *)); +void __dbc_count_1_proc __P((long, u_int32_t, __dbc_count_reply *)); +__dbc_del_reply * __db_dbc_del_1 __P((__dbc_del_msg *)); +void __dbc_del_1_proc __P((long, u_int32_t, __dbc_del_reply *)); +__dbc_dup_reply * __db_dbc_dup_1 __P((__dbc_dup_msg *)); +void __dbc_dup_1_proc __P((long, u_int32_t, __dbc_dup_reply *)); +__dbc_get_reply * __db_dbc_get_1 __P((__dbc_get_msg *)); +void __dbc_get_1_proc __P((long, u_int32_t, u_int32_t, + u_int32_t, void *, u_int32_t, u_int32_t, + u_int32_t, u_int32_t, void *, u_int32_t, + u_int32_t, __dbc_get_reply *, int *)); +__dbc_put_reply * __db_dbc_put_1 __P((__dbc_put_msg *)); +void __dbc_put_1_proc __P((long, u_int32_t, u_int32_t, + u_int32_t, void *, u_int32_t, u_int32_t, + u_int32_t, u_int32_t, void *, u_int32_t, + u_int32_t, __dbc_put_reply *, int *)); diff --git a/bdb/include/hash.h b/bdb/include/hash.h new file mode 100644 index 00000000000..14a88c80b9c --- /dev/null +++ b/bdb/include/hash.h @@ -0,0 +1,140 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: hash.h,v 11.19 2000/12/21 23:05:16 krinsky Exp $ + */ + +/* Hash internal structure. */ +typedef struct hash_t { + db_pgno_t meta_pgno; /* Page number of the meta data page. */ + u_int32_t h_ffactor; /* Fill factor. */ + u_int32_t h_nelem; /* Number of elements. */ + /* Hash function. */ + u_int32_t (*h_hash) __P((DB *, const void *, u_int32_t)); +} HASH; + +/* Cursor structure definitions. */ +typedef struct cursor_t { + /* struct __dbc_internal */ + __DBC_INTERNAL + + /* Hash private part */ + + /* Per-thread information */ + DB_LOCK hlock; /* Metadata page lock. */ + HMETA *hdr; /* Pointer to meta-data page. */ + PAGE *split_buf; /* Temporary buffer for splits. */ + + /* Hash cursor information */ + db_pgno_t bucket; /* Bucket we are traversing. */ + db_pgno_t lbucket; /* Bucket for which we are locked. */ + db_indx_t dup_off; /* Offset within a duplicate set. */ + db_indx_t dup_len; /* Length of current duplicate. */ + db_indx_t dup_tlen; /* Total length of duplicate entry. */ + u_int32_t seek_size; /* Number of bytes we need for add. */ + db_pgno_t seek_found_page;/* Page on which we can insert. */ + u_int32_t order; /* Relative order among deleted curs. */ + +#define H_CONTINUE 0x0001 /* Join--search strictly fwd for data */ +#define H_DELETED 0x0002 /* Cursor item is deleted. */ +#define H_DIRTY 0x0004 /* Meta-data page needs to be written */ +#define H_DUPONLY 0x0008 /* Dups only; do not change key. */ +#define H_EXPAND 0x0010 /* Table expanded. */ +#define H_ISDUP 0x0020 /* Cursor is within duplicate set. */ +#define H_NEXT_NODUP 0x0040 /* Get next non-dup entry. */ +#define H_NOMORE 0x0080 /* No more entries in bucket. */ +#define H_OK 0x0100 /* Request succeeded. */ + u_int32_t flags; +} HASH_CURSOR; + +/* Test string. */ +#define CHARKEY "%$sniglet^&" + +/* Overflow management */ +/* + * The spares table indicates the page number at which each doubling begins. + * From this page number we subtract the number of buckets already allocated + * so that we can do a simple addition to calculate the page number here. + */ +#define BS_TO_PAGE(bucket, spares) \ + ((bucket) + (spares)[__db_log2((bucket) + 1)]) +#define BUCKET_TO_PAGE(I, B) (BS_TO_PAGE((B), (I)->hdr->spares)) + +/* Constraints about much data goes on a page. */ + +#define MINFILL 4 +#define ISBIG(I, N) (((N) > ((I)->hdr->dbmeta.pagesize / MINFILL)) ? 1 : 0) + +/* Shorthands for accessing structure */ +#define NDX_INVALID 0xFFFF +#define BUCKET_INVALID 0xFFFFFFFF + +/* On page duplicates are stored as a string of size-data-size triples. */ +#define DUP_SIZE(len) ((len) + 2 * sizeof(db_indx_t)) + +/* Log messages types (these are subtypes within a record type) */ +#define PAIR_KEYMASK 0x1 +#define PAIR_DATAMASK 0x2 +#define PAIR_DUPMASK 0x4 +#define PAIR_MASK 0xf +#define PAIR_ISKEYBIG(N) (N & PAIR_KEYMASK) +#define PAIR_ISDATABIG(N) (N & PAIR_DATAMASK) +#define PAIR_ISDATADUP(N) (N & PAIR_DUPMASK) +#define OPCODE_OF(N) (N & ~PAIR_MASK) + +#define PUTPAIR 0x20 +#define DELPAIR 0x30 +#define PUTOVFL 0x40 +#define DELOVFL 0x50 +#define HASH_UNUSED1 0x60 +#define HASH_UNUSED2 0x70 +#define SPLITOLD 0x80 +#define SPLITNEW 0x90 + +typedef enum { + DB_HAM_CHGPG = 1, + DB_HAM_SPLIT = 2, + DB_HAM_DUP = 3 +} db_ham_mode; + +#include "hash_auto.h" +#include "hash_ext.h" +#include "db_am.h" diff --git a/bdb/include/hash_auto.h b/bdb/include/hash_auto.h new file mode 100644 index 00000000000..5d816d5bbd4 --- /dev/null +++ b/bdb/include/hash_auto.h @@ -0,0 +1,248 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef ham_AUTO_H +#define ham_AUTO_H + +#define DB_ham_insdel 21 +typedef struct _ham_insdel_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t pgno; + u_int32_t ndx; + DB_LSN pagelsn; + DBT key; + DBT data; +} __ham_insdel_args; + +int __ham_insdel_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, int32_t, db_pgno_t, u_int32_t, DB_LSN *, const DBT *, const DBT *)); +int __ham_insdel_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_insdel_read __P((DB_ENV *, void *, __ham_insdel_args **)); + +#define DB_ham_newpage 22 +typedef struct _ham_newpage_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t prev_pgno; + DB_LSN prevlsn; + db_pgno_t new_pgno; + DB_LSN pagelsn; + db_pgno_t next_pgno; + DB_LSN nextlsn; +} __ham_newpage_args; + +int __ham_newpage_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, int32_t, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *)); +int __ham_newpage_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_newpage_read __P((DB_ENV *, void *, __ham_newpage_args **)); + +#define DB_ham_splitmeta 23 +typedef struct _ham_splitmeta_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + u_int32_t bucket; + u_int32_t ovflpoint; + u_int32_t spares; + DB_LSN metalsn; +} __ham_splitmeta_args; + +int __ham_splitmeta_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_splitmeta_read __P((DB_ENV *, void *, __ham_splitmeta_args **)); + +#define DB_ham_splitdata 24 +typedef struct _ham_splitdata_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + u_int32_t opcode; + db_pgno_t pgno; + DBT pageimage; + DB_LSN pagelsn; +} __ham_splitdata_args; + +int __ham_splitdata_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, u_int32_t, db_pgno_t, const DBT *, DB_LSN *)); +int __ham_splitdata_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_splitdata_read __P((DB_ENV *, void *, __ham_splitdata_args **)); + +#define DB_ham_replace 25 +typedef struct _ham_replace_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + u_int32_t ndx; + DB_LSN pagelsn; + int32_t off; + DBT olditem; + DBT newitem; + u_int32_t makedup; +} __ham_replace_args; + +int __ham_replace_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_pgno_t, u_int32_t, DB_LSN *, int32_t, const DBT *, const DBT *, u_int32_t)); +int __ham_replace_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_replace_read __P((DB_ENV *, void *, __ham_replace_args **)); + +#define DB_ham_newpgno 26 +typedef struct _ham_newpgno_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_pgno_t pgno; + db_pgno_t free_pgno; + u_int32_t old_type; + db_pgno_t old_pgno; + u_int32_t new_type; + DB_LSN pagelsn; + DB_LSN metalsn; +} __ham_newpgno_args; + +int __ham_newpgno_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_newpgno_read __P((DB_ENV *, void *, __ham_newpgno_args **)); + +#define DB_ham_ovfl 27 +typedef struct _ham_ovfl_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t start_pgno; + u_int32_t npages; + db_pgno_t free_pgno; + u_int32_t ovflpoint; + DB_LSN metalsn; +} __ham_ovfl_args; + +int __ham_ovfl_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_ovfl_read __P((DB_ENV *, void *, __ham_ovfl_args **)); + +#define DB_ham_copypage 28 +typedef struct _ham_copypage_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN pagelsn; + db_pgno_t next_pgno; + DB_LSN nextlsn; + db_pgno_t nnext_pgno; + DB_LSN nnextlsn; + DBT page; +} __ham_copypage_args; + +int __ham_copypage_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *, const DBT *)); +int __ham_copypage_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_copypage_read __P((DB_ENV *, void *, __ham_copypage_args **)); + +#define DB_ham_metagroup 29 +typedef struct _ham_metagroup_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + u_int32_t bucket; + db_pgno_t pgno; + DB_LSN metalsn; + DB_LSN pagelsn; +} __ham_metagroup_args; + +int __ham_metagroup_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, u_int32_t, db_pgno_t, DB_LSN *, DB_LSN *)); +int __ham_metagroup_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_metagroup_read __P((DB_ENV *, void *, __ham_metagroup_args **)); + +#define DB_ham_groupalloc1 30 +typedef struct _ham_groupalloc1_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + DB_LSN metalsn; + DB_LSN mmetalsn; + db_pgno_t start_pgno; + u_int32_t num; +} __ham_groupalloc1_args; + +int __ham_groupalloc1_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_groupalloc1_read __P((DB_ENV *, void *, __ham_groupalloc1_args **)); + +#define DB_ham_groupalloc2 31 +typedef struct _ham_groupalloc2_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN meta_lsn; + DB_LSN alloc_lsn; + db_pgno_t start_pgno; + u_int32_t num; + db_pgno_t free; +} __ham_groupalloc2_args; + +int __ham_groupalloc2_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_groupalloc2_read __P((DB_ENV *, void *, __ham_groupalloc2_args **)); + +#define DB_ham_groupalloc 32 +typedef struct _ham_groupalloc_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN meta_lsn; + db_pgno_t start_pgno; + u_int32_t num; + db_pgno_t free; +} __ham_groupalloc_args; + +int __ham_groupalloc_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, DB_LSN *, db_pgno_t, u_int32_t, db_pgno_t)); +int __ham_groupalloc_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_groupalloc_read __P((DB_ENV *, void *, __ham_groupalloc_args **)); + +#define DB_ham_curadj 33 +typedef struct _ham_curadj_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_pgno_t pgno; + u_int32_t indx; + u_int32_t len; + u_int32_t dup_off; + int add; + int is_dup; + u_int32_t order; +} __ham_curadj_args; + +int __ham_curadj_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_pgno_t, u_int32_t, u_int32_t, u_int32_t, int, int, u_int32_t)); +int __ham_curadj_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_curadj_read __P((DB_ENV *, void *, __ham_curadj_args **)); + +#define DB_ham_chgpg 34 +typedef struct _ham_chgpg_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_ham_mode mode; + db_pgno_t old_pgno; + db_pgno_t new_pgno; + u_int32_t old_indx; + u_int32_t new_indx; +} __ham_chgpg_args; + +int __ham_chgpg_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_ham_mode, db_pgno_t, db_pgno_t, u_int32_t, u_int32_t)); +int __ham_chgpg_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_chgpg_read __P((DB_ENV *, void *, __ham_chgpg_args **)); +int __ham_init_print __P((DB_ENV *)); +int __ham_init_recover __P((DB_ENV *)); +#endif diff --git a/bdb/include/hash_ext.h b/bdb/include/hash_ext.h new file mode 100644 index 00000000000..babb77a7902 --- /dev/null +++ b/bdb/include/hash_ext.h @@ -0,0 +1,106 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _hash_ext_h_ +#define _hash_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +int __ham_metachk __P((DB *, const char *, HMETA *)); +int __ham_open __P((DB *, const char *, db_pgno_t, u_int32_t)); +int __ham_c_init __P((DBC *)); +int __ham_c_count __P((DBC *, db_recno_t *)); +int __ham_c_dup __P((DBC *, DBC *)); +u_int32_t __ham_call_hash __P((DBC *, u_int8_t *, int32_t)); +int __ham_init_dbt __P((DB_ENV *, + DBT *, u_int32_t, void **, u_int32_t *)); +int __ham_c_update + __P((DBC *, u_int32_t, int, int)); +int __ham_get_clist __P((DB *, + db_pgno_t, u_int32_t, DBC ***)); +int __ham_c_chgpg + __P((DBC *, db_pgno_t, u_int32_t, db_pgno_t, u_int32_t)); +int __ham_pgin __P((DB_ENV *, db_pgno_t, void *, DBT *)); +int __ham_pgout __P((DB_ENV *, db_pgno_t, void *, DBT *)); +int __ham_mswap __P((void *)); +int __ham_add_dup __P((DBC *, DBT *, u_int32_t, db_pgno_t *)); +int __ham_dup_convert __P((DBC *)); +int __ham_make_dup __P((DB_ENV *, + const DBT *, DBT *d, void **, u_int32_t *)); +void __ham_move_offpage __P((DBC *, PAGE *, u_int32_t, db_pgno_t)); +void __ham_dsearch __P((DBC *, DBT *, u_int32_t *, int *)); +int __ham_cprint __P((DB *)); +u_int32_t __ham_func2 __P((DB *, const void *, u_int32_t)); +u_int32_t __ham_func3 __P((DB *, const void *, u_int32_t)); +u_int32_t __ham_func4 __P((DB *, const void *, u_int32_t)); +u_int32_t __ham_func5 __P((DB *, const void *, u_int32_t)); +int __ham_get_meta __P((DBC *)); +int __ham_release_meta __P((DBC *)); +int __ham_dirty_meta __P((DBC *)); +int __ham_db_create __P((DB *)); +int __ham_db_close __P((DB *)); +int __ham_item __P((DBC *, db_lockmode_t, db_pgno_t *)); +int __ham_item_reset __P((DBC *)); +void __ham_item_init __P((DBC *)); +int __ham_item_last __P((DBC *, db_lockmode_t, db_pgno_t *)); +int __ham_item_first __P((DBC *, db_lockmode_t, db_pgno_t *)); +int __ham_item_prev __P((DBC *, db_lockmode_t, db_pgno_t *)); +int __ham_item_next __P((DBC *, db_lockmode_t, db_pgno_t *)); +void __ham_putitem __P((PAGE *p, const DBT *, int)); +void __ham_reputpair + __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *)); +int __ham_del_pair __P((DBC *, int)); +int __ham_replpair __P((DBC *, DBT *, u_int32_t)); +void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t, + int32_t, DBT *)); +int __ham_split_page __P((DBC *, u_int32_t, u_int32_t)); +int __ham_add_el __P((DBC *, const DBT *, const DBT *, int)); +void __ham_copy_item __P((size_t, PAGE *, u_int32_t, PAGE *)); +int __ham_add_ovflpage __P((DBC *, PAGE *, int, PAGE **)); +int __ham_get_cpage __P((DBC *, db_lockmode_t)); +int __ham_next_cpage __P((DBC *, db_pgno_t, int)); +int __ham_lock_bucket __P((DBC *, db_lockmode_t)); +void __ham_dpair __P((DB *, PAGE *, u_int32_t)); +int __ham_insdel_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_newpage_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_replace_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_splitdata_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_copypage_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_metagroup_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_groupalloc_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_curadj_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_chgpg_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __ham_reclaim __P((DB *, DB_TXN *txn)); +int __ham_stat __P((DB *, void *, void *(*)(size_t), u_int32_t)); + int __ham_traverse __P((DB *, DBC *, db_lockmode_t, + int (*)(DB *, PAGE *, void *, int *), void *)); +int __ham_30_hashmeta __P((DB *, char *, u_int8_t *)); +int __ham_30_sizefix __P((DB *, DB_FH *, char *, u_int8_t *)); +int __ham_31_hashmeta + __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); +int __ham_31_hash + __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); +int __ham_vrfy_meta __P((DB *, VRFY_DBINFO *, HMETA *, + db_pgno_t, u_int32_t)); +int __ham_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, + u_int32_t)); +int __ham_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, + u_int32_t)); +int __ham_vrfy_hashing __P((DB *, + u_int32_t, HMETA *, u_int32_t, db_pgno_t, u_int32_t, + u_int32_t (*) __P((DB *, const void *, u_int32_t)))); +int __ham_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, + void *, int (*)(void *, const void *), u_int32_t)); +int __ham_meta2pgset __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t, + DB *)); +#if defined(__cplusplus) +} +#endif +#endif /* _hash_ext_h_ */ diff --git a/bdb/include/lock.h b/bdb/include/lock.h new file mode 100644 index 00000000000..e4a01ddf9c7 --- /dev/null +++ b/bdb/include/lock.h @@ -0,0 +1,190 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: lock.h,v 11.20 2000/12/12 17:43:56 bostic Exp $ + */ + +#define DB_LOCK_DEFAULT_N 1000 /* Default # of locks in region. */ + +/* + * Out of band value for a lock. Locks contain an offset into a lock region, + * so we use an invalid region offset to indicate an invalid or unset lock. + */ +#define LOCK_INVALID INVALID_ROFF + +/* + * The locker id space is divided between the transaction manager and the lock + * manager. Lock IDs start at 0 and go to DB_LOCK_MAXID. Txn IDs start at + * DB_LOCK_MAXID + 1 and go up to TXN_INVALID. + */ +#define DB_LOCK_MAXID 0x7fffffff + +/* + * DB_LOCKREGION -- + * The lock shared region. + */ +typedef struct __db_lockregion { + u_int32_t id; /* unique id generator */ + u_int32_t need_dd; /* flag for deadlock detector */ + u_int32_t detect; /* run dd on every conflict */ + /* free lock header */ + SH_TAILQ_HEAD(__flock) free_locks; + /* free obj header */ + SH_TAILQ_HEAD(__fobj) free_objs; + /* free locker header */ + SH_TAILQ_HEAD(__flocker) free_lockers; + SH_TAILQ_HEAD(__dobj) dd_objs; /* objects with waiters */ + u_int32_t maxlocks; /* maximum number of locks in table */ + u_int32_t maxlockers; /* maximum number of lockers in table */ + u_int32_t maxobjects; /* maximum number of objects in table */ + u_int32_t locker_t_size; /* size of locker hash table */ + u_int32_t object_t_size; /* size of object hash table */ + u_int32_t nmodes; /* number of lock modes */ + u_int32_t nlocks; /* current number of locks */ + u_int32_t maxnlocks; /* maximum number of locks so far*/ + u_int32_t nlockers; /* current number of lockers */ + u_int32_t maxnlockers; /* maximum number of lockers so far */ + u_int32_t nobjects; /* current number of objects */ + u_int32_t maxnobjects; /* maximum number of objects so far */ + roff_t conf_off; /* offset of conflicts array */ + roff_t obj_off; /* offset of object hash table */ + roff_t osynch_off; /* offset of the object mutex table */ + roff_t locker_off; /* offset of locker hash table */ + roff_t lsynch_off; /* offset of the locker mutex table */ + u_int32_t nconflicts; /* number of lock conflicts */ + u_int32_t nrequests; /* number of lock gets */ + u_int32_t nreleases; /* number of lock puts */ + u_int32_t nnowaits; /* number of lock requests that would + have waited without nowait */ + u_int32_t ndeadlocks; /* number of deadlocks */ +#ifdef MUTEX_SYSTEM_RESOURCES + roff_t maint_off; /* offset of region maintenance info */ +#endif +} DB_LOCKREGION; + +/* + * Since we will store DBTs in shared memory, we need the equivalent of a + * DBT that will work in shared memory. + */ +typedef struct __sh_dbt { + u_int32_t size; /* Byte length. */ + ssize_t off; /* Region offset. */ +} SH_DBT; + +#define SH_DBT_PTR(p) ((void *)(((u_int8_t *)(p)) + (p)->off)) + +/* + * Object structures; these live in the object hash table. + */ +typedef struct __db_lockobj { + SH_DBT lockobj; /* Identifies object locked. */ + SH_TAILQ_ENTRY links; /* Links for free list or hash list. */ + SH_TAILQ_ENTRY dd_links; /* Links for dd list. */ + SH_TAILQ_HEAD(__wait) waiters; /* List of waiting locks. */ + SH_TAILQ_HEAD(__hold) holders; /* List of held locks. */ + /* Declare room in the object to hold + * typical DB lock structures so that + * we do not have to allocate them from + * shalloc at run-time. */ + u_int8_t objdata[sizeof(struct __db_ilock)]; +} DB_LOCKOBJ; + +/* + * Locker structures; these live in the locker hash table. + */ +typedef struct __db_locker { + u_int32_t id; /* Locker id. */ + u_int32_t dd_id; /* Deadlock detector id. */ + size_t master_locker; /* Locker of master transaction. */ + size_t parent_locker; /* Parent of this child. */ + SH_LIST_HEAD(_child) child_locker; /* List of descendant txns; + only used in a "master" + txn. */ + SH_LIST_ENTRY child_link; /* Links transactions in the family; + elements of the child_locker + list. */ + SH_TAILQ_ENTRY links; /* Links for free list. */ + SH_LIST_HEAD(_held) heldby; /* Locks held by this locker. */ + +#define DB_LOCKER_DELETED 0x0001 + u_int32_t flags; +} DB_LOCKER; + +/* + * Lockers can be freed if they are not part of a transaction family. + * Members of a family either point at the master transaction or are + * the master transaction and have children lockers. + */ +#define LOCKER_FREEABLE(lp) \ + ((lp)->master_locker == TXN_INVALID_ID && \ + SH_LIST_FIRST(&(lp)->child_locker, __db_locker) == NULL) + +/* + * DB_LOCKTAB -- + * The primary library lock data structure (i.e., the one referenced + * by the environment, as opposed to the internal one laid out in the region.) + */ +typedef struct __db_locktab { + DB_ENV *dbenv; /* Environment. */ + REGINFO reginfo; /* Region information. */ + u_int8_t *conflicts; /* Pointer to conflict matrix. */ + DB_HASHTAB *obj_tab; /* Beginning of object hash table. */ + DB_HASHTAB *locker_tab; /* Beginning of locker hash table. */ +} DB_LOCKTAB; + +/* Test for conflicts. */ +#define CONFLICTS(T, R, HELD, WANTED) \ + (T)->conflicts[(HELD) * (R)->nmodes + (WANTED)] + +#define OBJ_LINKS_VALID(L) ((L)->links.stqe_prev != -1) + +struct __db_lock { + /* + * Wait on mutex to wait on lock. You reference your own mutex with + * ID 0 and others reference your mutex with ID 1. + */ + MUTEX mutex; + + u_int32_t holder; /* Who holds this lock. */ + u_int32_t gen; /* Generation count. */ + SH_TAILQ_ENTRY links; /* Free or holder/waiter list. */ + SH_LIST_ENTRY locker_links; /* List of locks held by a locker. */ + u_int32_t refcount; /* Reference count the lock. */ + db_lockmode_t mode; /* What sort of lock. */ + ssize_t obj; /* Relative offset of object struct. */ + db_status_t status; /* Status of this lock. */ +}; + +/* + * Flag values for __lock_put_internal: + * DB_LOCK_DOALL: Unlock all references in this lock (instead of only 1). + * DB_LOCK_FREE: Free the lock (used in checklocker). + * DB_LOCK_IGNOREDEL: Remove from the locker hash table even if already + deleted (used in checklocker). + * DB_LOCK_NOPROMOTE: Don't bother running promotion when releasing locks + * (used by __lock_put_internal). + * DB_LOCK_UNLINK: Remove from the locker links (used in checklocker). + */ +#define DB_LOCK_DOALL 0x001 +#define DB_LOCK_FREE 0x002 +#define DB_LOCK_IGNOREDEL 0x004 +#define DB_LOCK_NOPROMOTE 0x008 +#define DB_LOCK_UNLINK 0x010 +#define DB_LOCK_NOWAITERS 0x020 + +/* + * Macros to get/release different types of mutexes. + */ +#define OBJECT_LOCK(lt, reg, obj, ndx) \ + ndx = __lock_ohash(obj) % (reg)->object_t_size +#define SHOBJECT_LOCK(lt, reg, shobj, ndx) \ + ndx = __lock_lhash(shobj) % (reg)->object_t_size +#define LOCKER_LOCK(lt, reg, locker, ndx) \ + ndx = __lock_locker_hash(locker) % (reg)->locker_t_size; + +#define LOCKREGION(dbenv, lt) R_LOCK((dbenv), &(lt)->reginfo) +#define UNLOCKREGION(dbenv, lt) R_UNLOCK((dbenv), &(lt)->reginfo) +#include "lock_ext.h" diff --git a/bdb/include/lock_ext.h b/bdb/include/lock_ext.h new file mode 100644 index 00000000000..7ed9b1c695b --- /dev/null +++ b/bdb/include/lock_ext.h @@ -0,0 +1,39 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _lock_ext_h_ +#define _lock_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +int __lock_downgrade __P((DB_ENV *, + DB_LOCK *, db_lockmode_t, u_int32_t)); +int __lock_addfamilylocker __P((DB_ENV *, u_int32_t, u_int32_t)); +int __lock_freefamilylocker __P((DB_LOCKTAB *, u_int32_t)); +void __lock_freelocker __P((DB_LOCKTAB *, + DB_LOCKREGION *, DB_LOCKER *, u_int32_t)); +int __lock_getlocker __P((DB_LOCKTAB *, + u_int32_t, u_int32_t, int, DB_LOCKER **)); +int __lock_getobj __P((DB_LOCKTAB *, + const DBT *, u_int32_t, int, DB_LOCKOBJ **)); +int __lock_promote __P((DB_LOCKTAB *, DB_LOCKOBJ *, int)); +void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int)); +int __lock_set_lk_conflicts __P((DB_ENV *, u_int8_t *, int)); +int __lock_set_lk_detect __P((DB_ENV *, u_int32_t)); +int __lock_set_lk_max __P((DB_ENV *, u_int32_t)); +int __lock_set_lk_max_locks __P((DB_ENV *, u_int32_t)); +int __lock_set_lk_max_lockers __P((DB_ENV *, u_int32_t)); +int __lock_set_lk_max_objects __P((DB_ENV *, u_int32_t)); +void __lock_dbenv_create __P((DB_ENV *)); +void __lock_dbenv_close __P((DB_ENV *)); +int __lock_open __P((DB_ENV *)); +int __lock_close __P((DB_ENV *)); +void __lock_region_destroy __P((DB_ENV *, REGINFO *)); +void __lock_dump_region __P((DB_ENV *, char *, FILE *)); +int __lock_cmp __P((const DBT *, DB_LOCKOBJ *)); +int __lock_locker_cmp __P((u_int32_t, DB_LOCKER *)); +u_int32_t __lock_ohash __P((const DBT *)); +u_int32_t __lock_lhash __P((DB_LOCKOBJ *)); +u_int32_t __lock_locker_hash __P((u_int32_t)); +#if defined(__cplusplus) +} +#endif +#endif /* _lock_ext_h_ */ diff --git a/bdb/include/log.h b/bdb/include/log.h new file mode 100644 index 00000000000..08c2b8076be --- /dev/null +++ b/bdb/include/log.h @@ -0,0 +1,208 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: log.h,v 11.19 2001/01/11 18:19:52 bostic Exp $ + */ + +#ifndef _LOG_H_ +#define _LOG_H_ + +struct __db_log; typedef struct __db_log DB_LOG; +struct __fname; typedef struct __fname FNAME; +struct __hdr; typedef struct __hdr HDR; +struct __log; typedef struct __log LOG; +struct __log_persist; typedef struct __log_persist LOGP; + +#define LFPREFIX "log." /* Log file name prefix. */ +#define LFNAME "log.%010d" /* Log file name template. */ +#define LFNAME_V1 "log.%05d" /* Log file name template, rev 1. */ + +#define LG_MAX_DEFAULT (10 * MEGABYTE) /* 10 MB. */ +#define LG_BSIZE_DEFAULT (32 * 1024) /* 32 KB. */ +#define LG_BASE_REGION_SIZE (60 * 1024) /* 60 KB. */ + +/* + * The per-process table that maps log file-id's to DB structures. + */ +typedef struct __db_entry { + TAILQ_HEAD(dblist, __db) dblist; /* Associated DB structures. */ + u_int32_t refcount; /* Reference counted. */ + u_int32_t count; /* Number of ops on a deleted db. */ + int deleted; /* File was not found during open. */ +} DB_ENTRY; + +/* + * DB_LOG + * Per-process log structure. + */ +struct __db_log { +/* + * These fields need to be protected for multi-threaded support. + * + * !!! + * As this structure is allocated in per-process memory, the mutex may need + * to be stored elsewhere on architectures unable to support mutexes in heap + * memory, e.g., HP/UX 9. + */ + MUTEX *mutexp; /* Mutex for thread protection. */ + + DB_ENTRY *dbentry; /* Recovery file-id mapping. */ +#define DB_GROW_SIZE 64 + int32_t dbentry_cnt; /* Entries. Grows by DB_GROW_SIZE. */ + +/* + * These fields are always accessed while the region lock is held, so they do + * not have to be protected by the thread lock as well, OR, they are only used + * when threads are not being used, i.e. most cursor operations are disallowed + * on threaded logs. + */ + u_int32_t lfname; /* Log file "name". */ + DB_FH lfh; /* Log file handle. */ + + DB_LSN c_lsn; /* Cursor: current LSN. */ + DBT c_dbt; /* Cursor: return DBT structure. */ + DB_FH c_fh; /* Cursor: file handle. */ + FILE *c_fp; /* Cursor: file pointer. */ + u_int32_t c_off; /* Cursor: previous record offset. */ + u_int32_t c_len; /* Cursor: current record length. */ + u_int32_t r_file; /* Cursor: current read file */ + u_int32_t r_off; /* Cursor: offset of read buffer. */ + u_int32_t r_size; /* Cursor: size of data in read buf. */ + + u_int8_t *bufp; /* Region buffer. */ + u_int8_t *readbufp; /* Read buffer. */ + +/* These fields are not protected. */ + DB_ENV *dbenv; /* Reference to error information. */ + REGINFO reginfo; /* Region information. */ + +/* + * These fields are used by XA; since XA forbids threaded execution, these + * do not have to be protected. + */ + void *xa_info; /* Committed transaction list that + * has to be carried between calls + * to xa_recover. */ + DB_LSN xa_lsn; /* Position of an XA recovery scan. */ + DB_LSN xa_first; /* LSN to which we need to roll back + for this XA recovery scan. */ + +#define DBLOG_RECOVER 0x01 /* We are in recovery. */ +#define DBLOG_FORCE_OPEN 0x02 /* Force the db open even + * if it appears to be deleted. + */ + u_int32_t flags; +}; + +/* + * HDR -- + * Log record header. + */ +struct __hdr { + u_int32_t prev; /* Previous offset. */ + u_int32_t cksum; /* Current checksum. */ + u_int32_t len; /* Current length. */ +}; + +struct __log_persist { + u_int32_t magic; /* DB_LOGMAGIC */ + u_int32_t version; /* DB_LOGVERSION */ + + u_int32_t lg_max; /* Maximum file size. */ + int mode; /* Log file mode. */ +}; + +/* + * LOG -- + * Shared log region. One of these is allocated in shared memory, + * and describes the log. + */ +struct __log { + LOGP persist; /* Persistent information. */ + + SH_TAILQ_HEAD(__fq) fq; /* List of file names. */ + + /* + * The lsn LSN is the file offset that we're about to write and which + * we will return to the user. + */ + DB_LSN lsn; /* LSN at current file offset. */ + + /* + * The s_lsn LSN is the last LSN that we know is on disk, not just + * written, but synced. + */ + DB_LSN s_lsn; /* LSN of the last sync. */ + + u_int32_t len; /* Length of the last record. */ + + u_int32_t w_off; /* Current write offset in the file. */ + + DB_LSN chkpt_lsn; /* LSN of the last checkpoint. */ + time_t chkpt; /* Time of the last checkpoint. */ + + DB_LOG_STAT stat; /* Log statistics. */ + + /* + * The f_lsn LSN is the LSN (returned to the user) that "owns" the + * first byte of the buffer. If the record associated with the LSN + * spans buffers, it may not reflect the physical file location of + * the first byte of the buffer. + */ + DB_LSN f_lsn; /* LSN of first byte in the buffer. */ + size_t b_off; /* Current offset in the buffer. */ + + roff_t buffer_off; /* Log buffer offset. */ + u_int32_t buffer_size; /* Log buffer size. */ +}; + +/* + * FNAME -- + * File name and id. + */ +struct __fname { + SH_TAILQ_ENTRY q; /* File name queue. */ + + u_int16_t ref; /* Reference count. */ + u_int16_t locked; /* Table is locked. */ + + int32_t id; /* Logging file id. */ + DBTYPE s_type; /* Saved DB type. */ + + roff_t name_off; /* Name offset. */ + db_pgno_t meta_pgno; /* Page number of the meta page. */ + u_int8_t ufid[DB_FILE_ID_LEN]; /* Unique file id. */ +}; + +/* File open/close register log record opcodes. */ +#define LOG_CHECKPOINT 1 /* Checkpoint: file name/id dump. */ +#define LOG_CLOSE 2 /* File close. */ +#define LOG_OPEN 3 /* File open. */ + +#define CHECK_LSN(redo, cmp, lsn, prev) \ + DB_ASSERT(!DB_REDO(redo) || (cmp) >= 0); \ + if (DB_REDO(redo) && (cmp) < 0) { \ + __db_err(dbenv, \ + "Log sequence error: page LSN %lu:%lu; previous LSN %lu %lu", \ + (u_long)(lsn)->file, (u_long)(lsn)->offset, \ + (u_long)(prev)->file, (u_long)(prev)->offset); \ + goto out; \ + } + +/* + * Status codes indicating the validity of a log file examined by + * __log_valid(). + */ +typedef enum { + DB_LV_INCOMPLETE, + DB_LV_NORMAL, + DB_LV_OLD_READABLE, + DB_LV_OLD_UNREADABLE +} logfile_validity; + +#include "log_auto.h" +#include "log_ext.h" +#endif /* _LOG_H_ */ diff --git a/bdb/include/log_auto.h b/bdb/include/log_auto.h new file mode 100644 index 00000000000..ddbcbcb3ec6 --- /dev/null +++ b/bdb/include/log_auto.h @@ -0,0 +1,39 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef log_AUTO_H +#define log_AUTO_H + +#define DB_log_register1 1 +typedef struct _log_register1_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + DBT name; + DBT uid; + int32_t fileid; + DBTYPE ftype; +} __log_register1_args; + +int __log_register1_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __log_register1_read __P((DB_ENV *, void *, __log_register1_args **)); + +#define DB_log_register 2 +typedef struct _log_register_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + DBT name; + DBT uid; + int32_t fileid; + DBTYPE ftype; + db_pgno_t meta_pgno; +} __log_register_args; + +int __log_register_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, const DBT *, const DBT *, int32_t, DBTYPE, db_pgno_t)); +int __log_register_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __log_register_read __P((DB_ENV *, void *, __log_register_args **)); +int __log_init_print __P((DB_ENV *)); +int __log_init_recover __P((DB_ENV *)); +#endif diff --git a/bdb/include/log_ext.h b/bdb/include/log_ext.h new file mode 100644 index 00000000000..985c5d7745b --- /dev/null +++ b/bdb/include/log_ext.h @@ -0,0 +1,33 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _log_ext_h_ +#define _log_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +int __log_open __P((DB_ENV *)); +int __log_find __P((DB_LOG *, int, int *, logfile_validity *)); +int __log_valid __P((DB_LOG *, u_int32_t, int, logfile_validity *)); +int __log_close __P((DB_ENV *)); +int __log_lastckp __P((DB_ENV *, DB_LSN *)); +int __log_findckp __P((DB_ENV *, DB_LSN *)); +int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int)); +void __log_dbenv_create __P((DB_ENV *)); +int __log_put __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t)); +int __log_name __P((DB_LOG *, + u_int32_t, char **, DB_FH *, u_int32_t)); +int __log_register_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __log_reopen_file __P((DB_ENV *, + char *, int32_t, u_int8_t *, db_pgno_t)); +int __log_add_logid __P((DB_ENV *, DB_LOG *, DB *, int32_t)); +int __db_fileid_to_db __P((DB_ENV *, DB **, int32_t, int)); +void __log_close_files __P((DB_ENV *)); +void __log_rem_logid __P((DB_LOG *, DB *, int32_t)); +int __log_lid_to_fname __P((DB_LOG *, int32_t, FNAME **)); +int __log_filelist_update + __P((DB_ENV *, DB *, int32_t, const char *, int *)); +int __log_file_lock __P((DB *)); +#if defined(__cplusplus) +} +#endif +#endif /* _log_ext_h_ */ diff --git a/bdb/include/mp.h b/bdb/include/mp.h new file mode 100644 index 00000000000..233cb1c2b10 --- /dev/null +++ b/bdb/include/mp.h @@ -0,0 +1,244 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: mp.h,v 11.16 2001/01/10 04:50:53 ubell Exp $ + */ + +struct __bh; typedef struct __bh BH; +struct __db_mpool; typedef struct __db_mpool DB_MPOOL; +struct __db_mpreg; typedef struct __db_mpreg DB_MPREG; +struct __mpool; typedef struct __mpool MPOOL; +struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE; + +/* We require at least 40K of cache. */ +#define DB_CACHESIZE_MIN (20 * 1024) + +/* + * DB_MPOOL -- + * Per-process memory pool structure. + */ +struct __db_mpool { + /* These fields need to be protected for multi-threaded support. */ + MUTEX *mutexp; /* Structure thread lock. */ + + /* List of pgin/pgout routines. */ + LIST_HEAD(__db_mpregh, __db_mpreg) dbregq; + + /* List of DB_MPOOLFILE's. */ + TAILQ_HEAD(__db_mpoolfileh, __db_mpoolfile) dbmfq; + + /* These fields are not thread-protected. */ + DB_ENV *dbenv; /* Reference to error information. */ + + u_int32_t nreg; /* N underlying cache regions. */ + REGINFO *reginfo; /* Underlying cache regions. */ +}; + +/* + * DB_MPREG -- + * DB_MPOOL registry of pgin/pgout functions. + */ +struct __db_mpreg { + LIST_ENTRY(__db_mpreg) q; /* Linked list. */ + + int ftype; /* File type. */ + /* Pgin, pgout routines. */ + int (*pgin) __P((DB_ENV *, db_pgno_t, void *, DBT *)); + int (*pgout) __P((DB_ENV *, db_pgno_t, void *, DBT *)); +}; + +/* + * DB_MPOOLFILE -- + * Per-process DB_MPOOLFILE information. + */ +struct __db_mpoolfile { + /* These fields need to be protected for multi-threaded support. */ + MUTEX *mutexp; /* Structure thread lock. */ + + DB_FH fh; /* Underlying file handle. */ + + u_int32_t ref; /* Reference count. */ + + /* + * !!! + * This field is a special case -- it's protected by the region lock + * NOT the thread lock. The reason for this is that we always have + * the region lock immediately before or after we modify the field, + * and we don't want to use the structure lock to protect it because + * then I/O (which is done with the structure lock held because of + * the race between the seek and write of the file descriptor) will + * block any other put/get calls using this DB_MPOOLFILE structure. + */ + u_int32_t pinref; /* Pinned block reference count. */ + + /* + * !!! + * This field is a special case -- it's protected by the region lock + * since it's manipulated only when new files are added to the list. + */ + TAILQ_ENTRY(__db_mpoolfile) q; /* Linked list of DB_MPOOLFILE's. */ + + /* These fields are not thread-protected. */ + DB_MPOOL *dbmp; /* Overlying DB_MPOOL. */ + MPOOLFILE *mfp; /* Underlying MPOOLFILE. */ + + void *addr; /* Address of mmap'd region. */ + size_t len; /* Length of mmap'd region. */ + + /* These fields need to be protected for multi-threaded support. */ +#define MP_READONLY 0x01 /* File is readonly. */ +#define MP_UPGRADE 0x02 /* File descriptor is readwrite. */ +#define MP_UPGRADE_FAIL 0x04 /* Upgrade wasn't possible. */ + u_int32_t flags; +}; + +/* + * NCACHE -- + * Select a cache based on the page number. This assumes accesses are + * uniform across pages, which is probably OK -- what we really want to + * avoid is anything that puts all the pages for any single file in the + * same cache, as we expect that file access will be bursty. + */ +#define NCACHE(mp, pgno) \ + ((pgno) % ((MPOOL *)mp)->nreg) + +/* + * NBUCKET -- + * We make the assumption that early pages of the file are more likely + * to be retrieved than the later pages, which means the top bits will + * be more interesting for hashing as they're less likely to collide. + * That said, as 512 8K pages represents a 4MB file, so only reasonably + * large files will have page numbers with any other than the bottom 9 + * bits set. We XOR in the MPOOL offset of the MPOOLFILE that backs the + * page, since that should also be unique for the page. We don't want + * to do anything very fancy -- speed is more important to us than using + * good hashing. + */ +#define NBUCKET(mc, mf_offset, pgno) \ + (((pgno) ^ ((mf_offset) << 9)) % (mc)->htab_buckets) + +/* + * MPOOL -- + * Shared memory pool region. + */ +struct __mpool { + /* + * The memory pool can be broken up into individual pieces/files. + * Not what we would have liked, but on Solaris you can allocate + * only a little more than 2GB of memory in a contiguous chunk, + * and I expect to see more systems with similar issues. + * + * The first of these pieces/files describes the entire pool, all + * subsequent ones only describe a part of the cache. + * + * We single-thread memp_sync and memp_fsync calls. + * + * This mutex is intended *only* to single-thread access to the call, + * it is not used to protect the lsn and lsn_cnt fields, the region + * lock is used to protect them. + */ + MUTEX sync_mutex; /* Checkpoint lock. */ + DB_LSN lsn; /* Maximum checkpoint LSN. */ + u_int32_t lsn_cnt; /* Checkpoint buffers left to write. */ + + SH_TAILQ_HEAD(__mpfq) mpfq; /* List of MPOOLFILEs. */ + + u_int32_t nreg; /* Number of underlying REGIONS. */ + roff_t regids; /* Array of underlying REGION Ids. */ + +#define MP_LSN_RETRY 0x01 /* Retry all BH_WRITE buffers. */ + u_int32_t flags; + + /* + * The following structure fields only describe the cache portion of + * the region. + */ + SH_TAILQ_HEAD(__bhq) bhq; /* LRU list of buffer headers. */ + + int htab_buckets; /* Number of hash table entries. */ + roff_t htab; /* Hash table offset. */ + + DB_MPOOL_STAT stat; /* Per-cache mpool statistics. */ +#ifdef MUTEX_SYSTEM_RESOURCES + roff_t maint_off; /* Maintenance information offset */ +#endif +}; + +/* + * MPOOLFILE -- + * Shared DB_MPOOLFILE information. + */ +struct __mpoolfile { + SH_TAILQ_ENTRY q; /* List of MPOOLFILEs */ + + db_pgno_t mpf_cnt; /* Ref count: DB_MPOOLFILEs. */ + db_pgno_t block_cnt; /* Ref count: blocks in cache. */ + db_pgno_t lsn_cnt; /* Checkpoint buffers left to write. */ + + int ftype; /* File type. */ + int32_t lsn_off; /* Page's LSN offset. */ + u_int32_t clear_len; /* Bytes to clear on page create. */ + + roff_t path_off; /* File name location. */ + roff_t fileid_off; /* File identification location. */ + + roff_t pgcookie_len; /* Pgin/pgout cookie length. */ + roff_t pgcookie_off; /* Pgin/pgout cookie location. */ + + db_pgno_t last_pgno; /* Last page in the file. */ + db_pgno_t orig_last_pgno; /* Original last page in the file. */ + + DB_MPOOL_FSTAT stat; /* Per-file mpool statistics. */ + +#define MP_CAN_MMAP 0x01 /* If the file can be mmap'd. */ +#define MP_DEADFILE 0x02 /* Dirty pages can simply be trashed. */ +#define MP_TEMP 0x04 /* Backing file is a temporary. */ +#define MP_UNLINK 0x08 /* Unlink file on last close. */ + u_int32_t flags; +}; + +/* + * BH_TO_CACHE -- + * Return the cache where we can find the specified buffer header. + */ +#define BH_TO_CACHE(dbmp, bhp) \ + (dbmp)->reginfo[NCACHE((dbmp)->reginfo[0].primary, (bhp)->pgno)].primary + +/* + * BH -- + * Buffer header. + */ +struct __bh { + MUTEX mutex; /* Buffer thread/process lock. */ + + u_int16_t ref; /* Reference count. */ + +#define BH_CALLPGIN 0x001 /* Page needs to be reworked... */ +#define BH_DIRTY 0x002 /* Page was modified. */ +#define BH_DISCARD 0x004 /* Page is useless. */ +#define BH_LOCKED 0x008 /* Page is locked (I/O in progress). */ +#define BH_SYNC 0x010 /* memp sync: write the page */ +#define BH_SYNC_LOGFLSH 0x020 /* memp sync: also flush the log */ +#define BH_TRASH 0x040 /* Page is garbage. */ + u_int16_t flags; + + SH_TAILQ_ENTRY q; /* LRU queue. */ + SH_TAILQ_ENTRY hq; /* MPOOL hash bucket queue. */ + + db_pgno_t pgno; /* Underlying MPOOLFILE page number. */ + roff_t mf_offset; /* Associated MPOOLFILE offset. */ + + /* + * !!! + * This array must be at least size_t aligned -- the DB access methods + * put PAGE and other structures into it, and then access them directly. + * (We guarantee size_t alignment to applications in the documentation, + * too.) + */ + u_int8_t buf[1]; /* Variable length data. */ +}; + +#include "mp_ext.h" diff --git a/bdb/include/mp_ext.h b/bdb/include/mp_ext.h new file mode 100644 index 00000000000..9f2b8c61f45 --- /dev/null +++ b/bdb/include/mp_ext.h @@ -0,0 +1,33 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _mp_ext_h_ +#define _mp_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +int __memp_alloc __P((DB_MPOOL *, + REGINFO *, MPOOLFILE *, size_t, roff_t *, void *)); +int __memp_bhwrite + __P((DB_MPOOL *, MPOOLFILE *, BH *, int *, int *)); +int __memp_pgread __P((DB_MPOOLFILE *, BH *, int)); +int __memp_pgwrite + __P((DB_MPOOL *, DB_MPOOLFILE *, BH *, int *, int *)); +int __memp_pg __P((DB_MPOOLFILE *, BH *, int)); +void __memp_bhfree __P((DB_MPOOL *, BH *, int)); +void __memp_set_unlink __P((DB_MPOOLFILE *)); +void __memp_clear_unlink __P((DB_MPOOLFILE *)); +int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, + u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **)); +void __memp_mf_discard __P((DB_MPOOL *, MPOOLFILE *)); +int __memp_fremove __P((DB_MPOOLFILE *)); +char * __memp_fn __P((DB_MPOOLFILE *)); +char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *)); +void __memp_dbenv_create __P((DB_ENV *)); +int __memp_open __P((DB_ENV *)); +int __memp_close __P((DB_ENV *)); +void __mpool_region_destroy __P((DB_ENV *, REGINFO *)); +void __memp_dump_region __P((DB_ENV *, char *, FILE *)); +int __mp_xxx_fh __P((DB_MPOOLFILE *, DB_FH **)); +#if defined(__cplusplus) +} +#endif +#endif /* _mp_ext_h_ */ diff --git a/bdb/include/mutex.h b/bdb/include/mutex.h new file mode 100644 index 00000000000..a8a41451012 --- /dev/null +++ b/bdb/include/mutex.h @@ -0,0 +1,744 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: mutex.h,v 11.41 2000/12/22 19:28:15 bostic Exp $ + */ + +/* + * Some of the Berkeley DB ports require single-threading at various + * places in the code. In those cases, these #defines will be set. + */ +#define DB_BEGIN_SINGLE_THREAD +#define DB_END_SINGLE_THREAD + +/* + * When the underlying system mutexes require system resources, we have + * to clean up after application failure. This violates the rule that + * we never look at a shared region after a failure, but there's no other + * choice. In those cases, this #define is set. + */ +#ifdef HAVE_QNX +#define MUTEX_SYSTEM_RESOURCES +#endif + +/********************************************************************* + * POSIX.1 pthreads interface. + *********************************************************************/ +#ifdef HAVE_MUTEX_PTHREADS +#include <pthread.h> + +#define MUTEX_FIELDS \ + pthread_mutex_t mutex; /* Mutex. */ \ + pthread_cond_t cond; /* Condition variable. */ +#endif + +/********************************************************************* + * Solaris lwp threads interface. + * + * !!! + * We use LWP mutexes on Solaris instead of UI or POSIX mutexes (both of + * which are available), for two reasons. First, the Solaris C library + * includes versions of the both UI and POSIX thread mutex interfaces, but + * they are broken in that they don't support inter-process locking, and + * there's no way to detect it, e.g., calls to configure the mutexes for + * inter-process locking succeed without error. So, we use LWP mutexes so + * that we don't fail in fairly undetectable ways because the application + * wasn't linked with the appropriate threads library. Second, there were + * bugs in SunOS 5.7 (Solaris 7) where if an application loaded the C library + * before loading the libthread/libpthread threads libraries (e.g., by using + * dlopen to load the DB library), the pwrite64 interface would be translated + * into a call to pwrite and DB would drop core. + *********************************************************************/ +#ifdef HAVE_MUTEX_SOLARIS_LWP +/* + * XXX + * Don't change <synch.h> to <sys/lwp.h> -- although lwp.h is listed in the + * Solaris manual page as the correct include to use, it causes the Solaris + * compiler on SunOS 2.6 to fail. + */ +#include <synch.h> + +#define MUTEX_FIELDS \ + lwp_mutex_t mutex; /* Mutex. */ \ + lwp_cond_t cond; /* Condition variable. */ +#endif + +/********************************************************************* + * Solaris/Unixware threads interface. + *********************************************************************/ +#ifdef HAVE_MUTEX_UI_THREADS +#include <thread.h> +#include <synch.h> + +#define MUTEX_FIELDS \ + mutex_t mutex; /* Mutex. */ \ + cond_t cond; /* Condition variable. */ +#endif + +/********************************************************************* + * AIX C library functions. + *********************************************************************/ +#ifdef HAVE_MUTEX_AIX_CHECK_LOCK +#include <sys/atomic_op.h> +typedef int tsl_t; +#define MUTEX_ALIGN sizeof(int) + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_INIT(x) 0 +#define MUTEX_SET(x) (!_check_lock(x, 0, 1)) +#define MUTEX_UNSET(x) _clear_lock(x, 0) +#endif +#endif + +/********************************************************************* + * General C library functions (msemaphore). + * + * !!! + * Check for HPPA as a special case, because it requires unusual alignment, + * and doesn't support semaphores in malloc(3) or shmget(2) memory. + * + * !!! + * Do not remove the MSEM_IF_NOWAIT flag. The problem is that if a single + * process makes two msem_lock() calls in a row, the second one returns an + * error. We depend on the fact that we can lock against ourselves in the + * locking subsystem, where we set up a mutex so that we can block ourselves. + * Tested on OSF1 v4.0. + *********************************************************************/ +#ifdef HAVE_MUTEX_HPPA_MSEM_INIT +#define MUTEX_NO_MALLOC_LOCKS +#define MUTEX_NO_SHMGET_LOCKS + +#define MUTEX_ALIGN 16 +#endif + +#if defined(HAVE_MUTEX_MSEM_INIT) || defined(HAVE_MUTEX_HPPA_MSEM_INIT) +#include <sys/mman.h> +typedef msemaphore tsl_t; + +#ifndef MUTEX_ALIGN +#define MUTEX_ALIGN sizeof(int) +#endif + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_INIT(x) (msem_init(x, MSEM_UNLOCKED) <= (msemaphore *)0) +#define MUTEX_SET(x) (!msem_lock(x, MSEM_IF_NOWAIT)) +#define MUTEX_UNSET(x) msem_unlock(x, 0) +#endif +#endif + +/********************************************************************* + * Plan 9 library functions. + *********************************************************************/ +#ifdef HAVE_MUTEX_PLAN9 +typedef Lock tsl_t; + +#define MUTEX_ALIGN sizeof(int) + +#define MUTEX_INIT(x) (memset(x, 0, sizeof(Lock)), 0) +#define MUTEX_SET(x) canlock(x) +#define MUTEX_UNSET(x) unlock(x) +#endif + +/********************************************************************* + * Reliant UNIX C library functions. + *********************************************************************/ +#ifdef HAVE_MUTEX_RELIANTUNIX_INITSPIN +#include <ulocks.h> +typedef spinlock_t tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_INIT(x) (initspin(x, 1), 0) +#define MUTEX_SET(x) (cspinlock(x) == 0) +#define MUTEX_UNSET(x) spinunlock(x) +#endif +#endif + +/********************************************************************* + * General C library functions (POSIX 1003.1 sema_XXX). + * + * !!! + * Never selected by autoconfig in this release (semaphore calls are known + * to not work in Solaris 5.5). + *********************************************************************/ +#ifdef HAVE_MUTEX_SEMA_INIT +#include <synch.h> +typedef sema_t tsl_t; +#define MUTEX_ALIGN sizeof(int) + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_DESTROY(x) sema_destroy(x) +#define MUTEX_INIT(x) (sema_init(x, 1, USYNC_PROCESS, NULL) != 0) +#define MUTEX_SET(x) (sema_wait(x) == 0) +#define MUTEX_UNSET(x) sema_post(x) +#endif +#endif + +/********************************************************************* + * SGI C library functions. + *********************************************************************/ +#ifdef HAVE_MUTEX_SGI_INIT_LOCK +#include <abi_mutex.h> +typedef abilock_t tsl_t; +#define MUTEX_ALIGN sizeof(int) + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_INIT(x) (init_lock(x) != 0) +#define MUTEX_SET(x) (!acquire_lock(x)) +#define MUTEX_UNSET(x) release_lock(x) +#endif +#endif + +/********************************************************************* + * Solaris C library functions. + * + * !!! + * These are undocumented functions, but they're the only ones that work + * correctly as far as we know. + *********************************************************************/ +#ifdef HAVE_MUTEX_SOLARIS_LOCK_TRY +#include <sys/machlock.h> +typedef lock_t tsl_t; +#define MUTEX_ALIGN sizeof(int) + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_INIT(x) 0 +#define MUTEX_SET(x) _lock_try(x) +#define MUTEX_UNSET(x) _lock_clear(x) +#endif +#endif + +/********************************************************************* + * VMS. + *********************************************************************/ +#ifdef HAVE_MUTEX_VMS +#include <sys/mman.h>; +#include <builtins.h> +typedef unsigned char tsl_t; +#define MUTEX_ALIGN sizeof(unsigned int) + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#ifdef __ALPHA +#define MUTEX_SET(tsl) (!__TESTBITSSI(tsl, 0)) +#else /* __VAX */ +#define MUTEX_SET(tsl) (!(int)_BBSSI(0, tsl)) +#endif +#define MUTEX_UNSET(tsl) (*(tsl) = 0) +#define MUTEX_INIT(tsl) MUTEX_UNSET(tsl) +#endif +#endif + +/********************************************************************* + * VxWorks + * Use basic binary semaphores in VxWorks, as we currently do not need + * any special features. We do need the ability to single-thread the + * entire system, however, because VxWorks doesn't support the open(2) + * flag O_EXCL, the mechanism we normally use to single thread access + * when we're first looking for a DB environment. + *********************************************************************/ +#ifdef HAVE_MUTEX_VXWORKS +#define MUTEX_SYSTEM_RESOURCES + +#include "semLib.h" +typedef SEM_ID tsl_t; +#define MUTEX_ALIGN sizeof(unsigned int) + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_SET(tsl) (semTake((*tsl), WAIT_FOREVER) == OK) +#define MUTEX_UNSET(tsl) (semGive((*tsl)) == OK) +#define MUTEX_INIT(tsl) \ + ((*(tsl) = semBCreate(SEM_Q_FIFO, SEM_FULL)) == NULL) +#define MUTEX_DESTROY(tsl) semDelete(*tsl) +#endif + +/* + * Use the taskLock() mutex to eliminate a race where two tasks are + * trying to initialize the global lock at the same time. + */ +#undef DB_BEGIN_SINGLE_THREAD +#define DB_BEGIN_SINGLE_THREAD \ +do { \ + if (DB_GLOBAL(db_global_init)) \ + (void)semTake(DB_GLOBAL(db_global_lock), WAIT_FOREVER); \ + else { \ + taskLock(); \ + if (DB_GLOBAL(db_global_init)) { \ + taskUnlock(); \ + (void)semTake(DB_GLOBAL(db_global_lock), \ + WAIT_FOREVER); \ + continue; \ + } \ + DB_GLOBAL(db_global_lock) = \ + semBCreate(SEM_Q_FIFO, SEM_EMPTY); \ + if (DB_GLOBAL(db_global_lock) != NULL) \ + DB_GLOBAL(db_global_init) = 1; \ + taskUnlock(); \ + } \ +} while (DB_GLOBAL(db_global_init) == 0) +#undef DB_END_SINGLE_THREAD +#define DB_END_SINGLE_THREAD (void)semGive(DB_GLOBAL(db_global_lock)) +#endif + +/********************************************************************* + * Win16 + * + * Win16 spinlocks are simple because we cannot possibly be preempted. + * + * !!! + * We should simplify this by always returning a no-need-to-lock lock + * when we initialize the mutex. + *********************************************************************/ +#ifdef HAVE_MUTEX_WIN16 +typedef unsigned int tsl_t; +#define MUTEX_ALIGN sizeof(unsigned int) + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_INIT(x) 0 +#define MUTEX_SET(tsl) (*(tsl) = 1) +#define MUTEX_UNSET(tsl) (*(tsl) = 0) +#endif +#endif + +/********************************************************************* + * Win32 + *********************************************************************/ +#ifdef HAVE_MUTEX_WIN32 +typedef unsigned int tsl_t; +#define MUTEX_ALIGN sizeof(unsigned int) + +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_INIT(x) 0 +#define MUTEX_SET(tsl) (!InterlockedExchange((PLONG)tsl, 1)) +#define MUTEX_UNSET(tsl) (*(tsl) = 0) +#endif +#endif + +/********************************************************************* + * 68K/gcc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_68K_GCC_ASSEMBLY +typedef unsigned char tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * For gcc/68K, 0 is clear, 1 is set. + */ +#define MUTEX_SET(tsl) ({ \ + register tsl_t *__l = (tsl); \ + int __r; \ + asm volatile("tas %1; \n \ + seq %0" \ + : "=dm" (__r), "=m" (*__l) \ + : "1" (*__l) \ + ); \ + __r & 1; \ +}) + +#define MUTEX_UNSET(tsl) (*(tsl) = 0) +#define MUTEX_INIT(tsl) MUTEX_UNSET(tsl) +#endif +#endif + +/********************************************************************* + * ALPHA/gcc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_ALPHA_GCC_ASSEMBLY +typedef u_int32_t tsl_t; +#define MUTEX_ALIGN 4 + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * For gcc/alpha. Should return 0 if could not acquire the lock, 1 if + * lock was acquired properly. + */ +#ifdef __GNUC__ +static inline int +MUTEX_SET(tsl_t *tsl) { + register tsl_t *__l = tsl; + register tsl_t __r; + asm volatile( + "1: ldl_l %0,%2\n" + " blbs %0,2f\n" + " or $31,1,%0\n" + " stl_c %0,%1\n" + " beq %0,3f\n" + " mb\n" + " br 3f\n" + "2: xor %0,%0\n" + "3:" + : "=&r"(__r), "=m"(*__l) : "1"(*__l) : "memory"); + return __r; +} + +/* + * Unset mutex. Judging by Alpha Architecture Handbook, the mb instruction + * might be necessary before unlocking + */ +static inline int +MUTEX_UNSET(tsl_t *tsl) { + asm volatile(" mb\n"); + return *tsl = 0; +} +#endif + +#ifdef __DECC +#include <alpha/builtins.h> +#define MUTEX_SET(tsl) (__LOCK_LONG_RETRY((tsl), 1) != 0) +#define MUTEX_UNSET(tsl) (*(tsl) = 0) +#endif + +#define MUTEX_INIT(tsl) MUTEX_UNSET(tsl) +#endif +#endif + +/********************************************************************* + * HPPA/gcc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_HPPA_GCC_ASSEMBLY +typedef u_int32_t tsl_t; +#define MUTEX_ALIGN 16 + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * The PA-RISC has a "load and clear" instead of a "test and set" instruction. + * The 32-bit word used by that instruction must be 16-byte aligned. We could + * use the "aligned" attribute in GCC but that doesn't work for stack variables. + */ +#define MUTEX_SET(tsl) ({ \ + register tsl_t *__l = (tsl); \ + int __r; \ + asm volatile("ldcws 0(%1),%0" : "=r" (__r) : "r" (__l)); \ + __r & 1; \ +}) + +#define MUTEX_UNSET(tsl) (*(tsl) = -1) +#define MUTEX_INIT(tsl) MUTEX_UNSET(tsl) +#endif +#endif + +/********************************************************************* + * IA64/gcc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_IA64_GCC_ASSEMBLY +typedef unsigned char tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * For gcc/ia64, 0 is clear, 1 is set. + */ +#define MUTEX_SET(tsl) ({ \ + register tsl_t *__l = (tsl); \ + long __r; \ + asm volatile("xchg1 %0=%1,%3" : "=r"(__r), "=m"(*__l) : "1"(*__l), "r"(1));\ + __r ^ 1; \ +}) + +/* + * Store through a "volatile" pointer so we get a store with "release" + * semantics. + */ +#define MUTEX_UNSET(tsl) (*(volatile unsigned char *)(tsl) = 0) +#define MUTEX_INIT(tsl) MUTEX_UNSET(tsl) +#endif +#endif + +/********************************************************************* + * PowerPC/gcc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_PPC_GCC_ASSEMBLY +typedef u_int32_t tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * The PowerPC does a sort of pseudo-atomic locking. You set up a + * 'reservation' on a chunk of memory containing a mutex by loading the + * mutex value with LWARX. If the mutex has an 'unlocked' (arbitrary) + * value, you then try storing into it with STWCX. If no other process or + * thread broke your 'reservation' by modifying the memory containing the + * mutex, then the STCWX succeeds; otherwise it fails and you try to get + * a reservation again. + * + * While mutexes are explicitly 4 bytes, a 'reservation' applies to an + * entire cache line, normally 32 bytes, aligned naturally. If the mutex + * lives near data that gets changed a lot, there's a chance that you'll + * see more broken reservations than you might otherwise. The only + * situation in which this might be a problem is if one processor is + * beating on a variable in the same cache block as the mutex while another + * processor tries to acquire the mutex. That's bad news regardless + * because of the way it bashes caches, but if you can't guarantee that a + * mutex will reside in a relatively quiescent cache line, you might + * consider padding the mutex to force it to live in a cache line by + * itself. No, you aren't guaranteed that cache lines are 32 bytes. Some + * embedded processors use 16-byte cache lines, while some 64-bit + * processors use 128-bit cache lines. But assuming a 32-byte cache line + * won't get you into trouble for now. + * + * If mutex locking is a bottleneck, then you can speed it up by adding a + * regular LWZ load before the LWARX load, so that you can test for the + * common case of a locked mutex without wasting cycles making a reservation. + * + * 'set' mutexes have the value 1, like on Intel; the returned value from + * MUTEX_SET() is 1 if the mutex previously had its low bit set, 0 otherwise. + */ +#define MUTEX_SET(tsl) ({ \ + int __one = 1; \ + int __r; \ + tsl_t *__l = (tsl); \ + asm volatile (" \ +0: \ + lwarx %0,0,%1; \ + cmpwi %0,0; \ + bne 1f; \ + stwcx. %2,0,%1; \ + bne- 0b; \ +1:" \ + : "=&r" (__r) \ + : "r" (__l), "r" (__one)); \ + __r & 1; \ +}) + +#define MUTEX_UNSET(tsl) (*(tsl) = 0) +#define MUTEX_INIT(tsl) MUTEX_UNSET(tsl) +#endif +#endif + +/********************************************************************* + * SCO/cc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_SCO_X86_CC_ASSEMBLY +typedef unsigned char tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * UnixWare has threads in libthread, but OpenServer doesn't (yet). + * + * For cc/x86, 0 is clear, 1 is set. + */ + +#if defined(__USLC__) +asm int +_tsl_set(void *tsl) +{ +%mem tsl + movl tsl, %ecx + movl $1, %eax + lock + xchgb (%ecx),%al + xorl $1,%eax +} +#endif + +#define MUTEX_SET(tsl) _tsl_set(tsl) +#define MUTEX_UNSET(tsl) (*(tsl) = 0) +#define MUTEX_INIT(tsl) MUTEX_UNSET(tsl) +#endif +#endif + +/********************************************************************* + * Sparc/gcc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_SPARC_GCC_ASSEMBLY +typedef unsigned char tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * + * The ldstub instruction takes the location specified by its first argument + * (a register containing a memory address) and loads its contents into its + * second argument (a register) and atomically sets the contents the location + * specified by its first argument to a byte of 1s. (The value in the second + * argument is never read, but only overwritten.) + * + * The stbar is needed for v8, and is implemented as membar #sync on v9, + + so is functional there as well. For v7, stbar may generate an illegal + + instruction and we have no way to tell what we're running on. Some + + operating systems notice and skip this instruction in the fault handler. + * + * For gcc/sparc, 0 is clear, 1 is set. + */ +#define MUTEX_SET(tsl) ({ \ + register tsl_t *__l = (tsl); \ + register tsl_t __r; \ + __asm__ volatile \ + ("ldstub [%1],%0; stbar" \ + : "=r"( __r) : "r" (__l)); \ + !__r; \ +}) + +#define MUTEX_UNSET(tsl) (*(tsl) = 0) +#define MUTEX_INIT(tsl) MUTEX_UNSET(tsl) +#endif +#endif + +/********************************************************************* + * UTS/cc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_UTS_CC_ASSEMBLY +typedef int tsl_t; + +#define MUTEX_ALIGN sizeof(int) +#ifdef LOAD_ACTUAL_MUTEX_CODE +#define MUTEX_INIT(x) 0 +#define MUTEX_SET(x) (!uts_lock(x, 1)) +#define MUTEX_UNSET(x) (*(x) = 0) +#endif +#endif + +/********************************************************************* + * x86/gcc assembly. + *********************************************************************/ +#ifdef HAVE_MUTEX_X86_GCC_ASSEMBLY +typedef unsigned char tsl_t; + +#ifdef LOAD_ACTUAL_MUTEX_CODE +/* + * For gcc/x86, 0 is clear, 1 is set. + */ +#define MUTEX_SET(tsl) ({ \ + register tsl_t *__l = (tsl); \ + int __r; \ + asm volatile("movl $1,%%eax; lock; xchgb %1,%%al; xorl $1,%%eax"\ + : "=&a" (__r), "=m" (*__l) \ + : "1" (*__l) \ + ); \ + __r & 1; \ +}) + +#define MUTEX_UNSET(tsl) (*(tsl) = 0) +#define MUTEX_INIT(tsl) MUTEX_UNSET(tsl) +#endif +#endif + +/* + * Mutex alignment defaults to one byte. + * + * !!! + * Various systems require different alignments for mutexes (the worst we've + * seen so far is 16-bytes on some HP architectures). Malloc(3) is assumed + * to return reasonable alignment, all other mutex users must ensure proper + * alignment locally. + */ +#ifndef MUTEX_ALIGN +#define MUTEX_ALIGN 1 +#endif + +/* + * Mutex destruction defaults to a no-op. + */ +#ifdef LOAD_ACTUAL_MUTEX_CODE +#ifndef MUTEX_DESTROY +#define MUTEX_DESTROY(x) +#endif +#endif + +#define MUTEX_IGNORE 0x001 /* Ignore, no lock required. */ +#define MUTEX_INITED 0x002 /* Mutex is successfully initialized */ +#define MUTEX_SELF_BLOCK 0x004 /* Must block self. */ +#define MUTEX_THREAD 0x008 /* Thread-only mutex. */ + +/* Mutex. */ +struct __mutex_t { +#ifdef HAVE_MUTEX_THREADS +#ifdef MUTEX_FIELDS + MUTEX_FIELDS +#else + tsl_t tas; /* Test and set. */ +#endif + u_int32_t spins; /* Spins before block. */ + u_int32_t locked; /* !0 if locked. */ +#else + u_int32_t off; /* Byte offset to lock. */ + u_int32_t pid; /* Lock holder: 0 or process pid. */ +#endif + u_int32_t mutex_set_wait; /* Granted after wait. */ + u_int32_t mutex_set_nowait; /* Granted without waiting. */ +#ifdef MUTEX_SYSTEM_RESOURCES + roff_t reg_off; /* Shared lock info offset. */ +#endif + + u_int8_t flags; /* MUTEX_XXX */ +}; + +/* Redirect calls to the correct functions. */ +#ifdef HAVE_MUTEX_THREADS +#if defined(HAVE_MUTEX_PTHREADS) || defined(HAVE_MUTEX_SOLARIS_LWP) || defined(HAVE_MUTEX_UI_THREADS) +#define __db_mutex_init(a, b, c, d) __db_pthread_mutex_init(a, b, d) +#define __db_mutex_lock(a, b, c) __db_pthread_mutex_lock(a, b) +#define __db_mutex_unlock(a, b) __db_pthread_mutex_unlock(a, b) +#define __db_mutex_destroy(a) __db_pthread_mutex_destroy(a) +#else +#define __db_mutex_init(a, b, c, d) __db_tas_mutex_init(a, b, d) +#define __db_mutex_lock(a, b, c) __db_tas_mutex_lock(a, b) +#define __db_mutex_unlock(a, b) __db_tas_mutex_unlock(a, b) +#define __db_mutex_destroy(a) __db_tas_mutex_destroy(a) +#endif +#else +#define __db_mutex_init(a, b, c, d) __db_fcntl_mutex_init(a, b, c) +#define __db_mutex_lock(a, b, c) __db_fcntl_mutex_lock(a, b, c) +#define __db_mutex_unlock(a, b) __db_fcntl_mutex_unlock(a, b) +#define __db_mutex_destroy(a) __db_fcntl_mutex_destroy(a) +#endif + +/* Redirect system resource calls to correct functions */ +#ifdef MUTEX_SYSTEM_RESOURCES +#define __db_maintinit(a, b, c) __db_shreg_maintinit(a, b, c) +#define __db_shlocks_clear(a, b, c) __db_shreg_locks_clear(a, b, c) +#define __db_shlocks_destroy(a, b) __db_shreg_locks_destroy(a, b) +#define __db_shmutex_init(a, b, c, d, e, f) \ + __db_shreg_mutex_init(a, b, c, d, e, f) +#else +#define __db_maintinit(a, b, c) +#define __db_shlocks_clear(a, b, c) +#define __db_shlocks_destroy(a, b) +#define __db_shmutex_init(a, b, c, d, e, f) __db_mutex_init(a, b, c, d) +#endif + +/* + * Lock/unlock a mutex. If the mutex was marked as uninteresting, the thread + * of control can proceed without it. + * + * If the lock is for threads-only, then it was optionally not allocated and + * file handles aren't necessary, as threaded applications aren't supported by + * fcntl(2) locking. + */ +#ifdef DIAGNOSTIC + /* + * XXX + * We want to switch threads as often as possible. Yield every time + * we get a mutex to ensure contention. + */ +#define MUTEX_LOCK(dbenv, mp, fh) \ + if (!F_ISSET((MUTEX *)(mp), MUTEX_IGNORE)) \ + (void)__db_mutex_lock(dbenv, mp, fh); \ + if (DB_GLOBAL(db_pageyield)) \ + __os_yield(NULL, 1); +#else +#define MUTEX_LOCK(dbenv, mp, fh) \ + if (!F_ISSET((MUTEX *)(mp), MUTEX_IGNORE)) \ + (void)__db_mutex_lock(dbenv, mp, fh); +#endif +#define MUTEX_UNLOCK(dbenv, mp) \ + if (!F_ISSET((MUTEX *)(mp), MUTEX_IGNORE)) \ + (void)__db_mutex_unlock(dbenv, mp); +#define MUTEX_THREAD_LOCK(dbenv, mp) \ + if (mp != NULL) \ + MUTEX_LOCK(dbenv, mp, NULL) +#define MUTEX_THREAD_UNLOCK(dbenv, mp) \ + if (mp != NULL) \ + MUTEX_UNLOCK(dbenv, mp) + +/* + * We use a single file descriptor for fcntl(2) locking, and (generally) the + * object's offset in a shared region as the byte that we're locking. So, + * there's a (remote) possibility that two objects might have the same offsets + * such that the locks could conflict, resulting in deadlock. To avoid this + * possibility, we offset the region offset by a small integer value, using a + * different offset for each subsystem's locks. Since all region objects are + * suitably aligned, the offset guarantees that we don't collide with another + * region's objects. + */ +#define DB_FCNTL_OFF_GEN 0 /* Everything else. */ +#define DB_FCNTL_OFF_LOCK 1 /* Lock subsystem offset. */ +#define DB_FCNTL_OFF_MPOOL 2 /* Mpool subsystem offset. */ diff --git a/bdb/include/mutex_ext.h b/bdb/include/mutex_ext.h new file mode 100644 index 00000000000..040a6615eef --- /dev/null +++ b/bdb/include/mutex_ext.h @@ -0,0 +1,31 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _mutex_ext_h_ +#define _mutex_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +int __db_fcntl_mutex_init __P((DB_ENV *, MUTEX *, u_int32_t)); +int __db_fcntl_mutex_lock __P((DB_ENV *, MUTEX *, DB_FH *)); +int __db_fcntl_mutex_unlock __P((DB_ENV *, MUTEX *)); +int __db_fcntl_mutex_destroy __P((MUTEX *)); +int __db_pthread_mutex_init __P((DB_ENV *, MUTEX *, u_int32_t)); +int __db_pthread_mutex_lock __P((DB_ENV *, MUTEX *)); +int __db_pthread_mutex_unlock __P((DB_ENV *, MUTEX *)); +int __db_pthread_mutex_destroy __P((MUTEX *)); +int __db_tas_mutex_init __P((DB_ENV *, MUTEX *, u_int32_t)); +int __db_tas_mutex_lock __P((DB_ENV *, MUTEX *)); +int __db_tas_mutex_unlock __P((DB_ENV *, MUTEX *)); +int __db_tas_mutex_destroy __P((MUTEX *)); +int __db_mutex_alloc __P((DB_ENV *, REGINFO *, MUTEX **)); +void __db_mutex_free __P((DB_ENV *, REGINFO *, MUTEX *)); +int __db_shreg_locks_record __P((DB_ENV *, MUTEX *, REGINFO *, + REGMAINT *)); +void __db_shreg_locks_clear __P((MUTEX *, REGINFO *, REGMAINT *)); +void __db_shreg_locks_destroy __P((REGINFO *, REGMAINT *)); +int __db_shreg_mutex_init __P((DB_ENV *, MUTEX *, u_int32_t, + u_int32_t, REGINFO *, REGMAINT *)); +void __db_shreg_maintinit __P((REGINFO *, void *addr, size_t)); +#if defined(__cplusplus) +} +#endif +#endif /* _mutex_ext_h_ */ diff --git a/bdb/include/os.h b/bdb/include/os.h new file mode 100644 index 00000000000..b5d469e88fa --- /dev/null +++ b/bdb/include/os.h @@ -0,0 +1,46 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: os.h,v 11.5 2000/10/27 20:32:01 dda Exp $ + */ + +#if defined(__cplusplus) +extern "C" { +#endif +/* + * Filehandle. + */ +struct __fh_t { +#if defined(DB_WIN32) + HANDLE handle; /* Windows/32 file handle. */ +#endif + int fd; /* POSIX file descriptor. */ + + u_int32_t log_size; /* XXX: Log file size. */ + +#define DB_FH_NOSYNC 0x01 /* Handle doesn't need to be sync'd. */ +#define DB_FH_VALID 0x02 /* Handle is valid. */ + u_int8_t flags; +}; + +/* + * We group certain seek/write calls into a single function so that we + * can use pread(2)/pwrite(2) where they're available. + */ +#define DB_IO_READ 1 +#define DB_IO_WRITE 2 +typedef struct __io_t { + DB_FH *fhp; /* I/O file handle. */ + MUTEX *mutexp; /* Mutex to lock. */ + size_t pagesize; /* Page size. */ + db_pgno_t pgno; /* Page number. */ + u_int8_t *buf; /* Buffer. */ + size_t bytes; /* Bytes read/written. */ +} DB_IO; + +#if defined(__cplusplus) +} +#endif diff --git a/bdb/include/os_ext.h b/bdb/include/os_ext.h new file mode 100644 index 00000000000..ae9e3d304f2 --- /dev/null +++ b/bdb/include/os_ext.h @@ -0,0 +1,62 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _os_ext_h_ +#define _os_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +int __os_abspath __P((const char *)); +int __os_strdup __P((DB_ENV *, const char *, void *)); +int __os_calloc __P((DB_ENV *, size_t, size_t, void *)); +int __os_malloc __P((DB_ENV *, size_t, void *(*)(size_t), void *)); +int __os_realloc __P((DB_ENV *, + size_t, void *(*)(void *, size_t), void *)); +void __os_free __P((void *, size_t)); +void __os_freestr __P((void *)); +void *__ua_memcpy __P((void *, const void *, size_t)); +int __os_dirlist __P((DB_ENV *, const char *, char ***, int *)); +void __os_dirfree __P((char **, int)); +int __os_get_errno __P((void)); +void __os_set_errno __P((int)); +int __os_fileid __P((DB_ENV *, const char *, int, u_int8_t *)); +int __os_finit __P((DB_ENV *, DB_FH *, size_t, int)); +int __os_fpinit __P((DB_ENV *, DB_FH *, db_pgno_t, int, int)); +int __os_fsync __P((DB_ENV *, DB_FH *)); +int __os_openhandle __P((DB_ENV *, const char *, int, int, DB_FH *)); +int __os_closehandle __P((DB_FH *)); +int __os_r_sysattach __P((DB_ENV *, REGINFO *, REGION *)); +int __os_r_sysdetach __P((DB_ENV *, REGINFO *, int)); +int __os_mapfile __P((DB_ENV *, + char *, DB_FH *, size_t, int, void **)); +int __os_unmapfile __P((DB_ENV *, void *, size_t)); +u_int32_t __db_oflags __P((int)); +int __db_omode __P((const char *)); +int __os_open __P((DB_ENV *, const char *, u_int32_t, int, DB_FH *)); +int __os_shmname __P((DB_ENV *, const char *, char **)); +int __os_r_attach __P((DB_ENV *, REGINFO *, REGION *)); +int __os_r_detach __P((DB_ENV *, REGINFO *, int)); +int __os_rename __P((DB_ENV *, const char *, const char *)); +int __os_isroot __P((void)); +char *__db_rpath __P((const char *)); +int __os_io __P((DB_ENV *, DB_IO *, int, size_t *)); +int __os_read __P((DB_ENV *, DB_FH *, void *, size_t, size_t *)); +int __os_write __P((DB_ENV *, DB_FH *, void *, size_t, size_t *)); +int __os_seek __P((DB_ENV *, + DB_FH *, size_t, db_pgno_t, u_int32_t, int, DB_OS_SEEK)); +int __os_sleep __P((DB_ENV *, u_long, u_long)); +int __os_spin __P((void)); +void __os_yield __P((DB_ENV*, u_long)); +int __os_exists __P((const char *, int *)); +int __os_ioinfo __P((DB_ENV *, const char *, + DB_FH *, u_int32_t *, u_int32_t *, u_int32_t *)); +int __os_tmpdir __P((DB_ENV *, u_int32_t)); +int __os_unlink __P((DB_ENV *, const char *)); +int __os_region_unlink __P((DB_ENV *, const char *)); +#if defined(DB_WIN32) +int __os_win32_errno __P((void)); +#endif +int __os_fpinit __P((DB_ENV *, DB_FH *, db_pgno_t, int, int)); +int __os_is_winnt __P((void)); +#if defined(__cplusplus) +} +#endif +#endif /* _os_ext_h_ */ diff --git a/bdb/include/os_jump.h b/bdb/include/os_jump.h new file mode 100644 index 00000000000..681ba82d5eb --- /dev/null +++ b/bdb/include/os_jump.h @@ -0,0 +1,34 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: os_jump.h,v 11.3 2000/02/14 02:59:55 bostic Exp $ + */ + +/* Calls which can be replaced by the application. */ +struct __db_jumptab { + int (*j_close) __P((int)); + void (*j_dirfree) __P((char **, int)); + int (*j_dirlist) __P((const char *, char ***, int *)); + int (*j_exists) __P((const char *, int *)); + void (*j_free) __P((void *)); + int (*j_fsync) __P((int)); + int (*j_ioinfo) __P((const char *, + int, u_int32_t *, u_int32_t *, u_int32_t *)); + void *(*j_malloc) __P((size_t)); + int (*j_map) __P((char *, size_t, int, int, void **)); + int (*j_open) __P((const char *, int, ...)); + ssize_t (*j_read) __P((int, void *, size_t)); + void *(*j_realloc) __P((void *, size_t)); + int (*j_rename) __P((const char *, const char *)); + int (*j_seek) __P((int, size_t, db_pgno_t, u_int32_t, int, int)); + int (*j_sleep) __P((u_long, u_long)); + int (*j_unlink) __P((const char *)); + int (*j_unmap) __P((void *, size_t)); + ssize_t (*j_write) __P((int, const void *, size_t)); + int (*j_yield) __P((void)); +}; + +extern struct __db_jumptab __db_jump; diff --git a/bdb/include/qam.h b/bdb/include/qam.h new file mode 100644 index 00000000000..88cd68776a8 --- /dev/null +++ b/bdb/include/qam.h @@ -0,0 +1,150 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: qam.h,v 11.26 2001/01/11 18:19:52 bostic Exp $ + */ + +/* + * QAM data elements: a status field and the data. + */ +typedef struct _qamdata { + u_int8_t flags; /* 00: delete bit. */ +#define QAM_VALID 0x01 +#define QAM_SET 0x02 + u_int8_t data[1]; /* Record. */ +} QAMDATA; + +struct __queue; typedef struct __queue QUEUE; +struct __qcursor; typedef struct __qcursor QUEUE_CURSOR; + +struct __qcursor { + /* struct __dbc_internal */ + __DBC_INTERNAL + + /* Queue private part */ + + /* Per-thread information: queue private. */ + db_recno_t recno; /* Current record number. */ + + u_int32_t flags; +}; + +/* + * The in-memory, per-tree queue data structure. + */ + +typedef struct __mpfarray { + u_int32_t n_extent; /* Number of extents in table. */ + u_int32_t low_extent; /* First extent open. */ + u_int32_t hi_extent; /* Last extent open. */ + struct __qmpf { + int pinref; + DB_MPOOLFILE *mpf; + } *mpfarray; /* Array of open extents. */ +} MPFARRAY; + +struct __queue { + db_pgno_t q_meta; /* Database meta-data page. */ + db_pgno_t q_root; /* Database root page. */ + + int re_pad; /* Fixed-length padding byte. */ + u_int32_t re_len; /* Length for fixed-length records. */ + u_int32_t rec_page; /* records per page */ + u_int32_t page_ext; /* Pages per extent */ + MPFARRAY array1, array2; /* File arrays. */ + DB_MPOOL_FINFO finfo; /* Initialized info struct. */ + DB_PGINFO pginfo; /* Initialized pginfo struct. */ + DBT pgcookie; /* Initialized pgcookie. */ + char *path; /* Space allocated to file pathname. */ + char *name; /* The name of the file. */ + char *dir; /* The dir of the file. */ + int mode; /* Mode to open extents. */ +}; + +/* Format for queue extent names. */ +#define QUEUE_EXTENT "%s/__dbq.%s.%d" + +typedef struct __qam_filelist { + DB_MPOOLFILE *mpf; + u_int32_t id; +} QUEUE_FILELIST; + +/* + * Caculate the page number of a recno + * + * Number of records per page = + * Divide the available space on the page by the record len + header. + * + * Page number for record = + * divide the physical record number by the records per page + * add the root page number + * For now the root page will always be 1, but we might want to change + * in the future (e.g. multiple fixed len queues per file). + * + * Index of record on page = + * physical record number, less the logical pno times records/page + */ +#define CALC_QAM_RECNO_PER_PAGE(dbp) \ + (((dbp)->pgsize - sizeof(QPAGE)) / \ + ALIGN(((QUEUE *)(dbp)->q_internal)->re_len + \ + sizeof(QAMDATA) - SSZA(QAMDATA, data), sizeof(u_int32_t))) + +#define QAM_RECNO_PER_PAGE(dbp) (((QUEUE*)(dbp)->q_internal)->rec_page) + +#define QAM_RECNO_PAGE(dbp, recno) \ + (((QUEUE *)(dbp)->q_internal)->q_root \ + + (((recno) - 1) / QAM_RECNO_PER_PAGE(dbp))) + +#define QAM_RECNO_INDEX(dbp, pgno, recno) \ + (((recno) - 1) - (QAM_RECNO_PER_PAGE(dbp) \ + * (pgno - ((QUEUE *)(dbp)->q_internal)->q_root))) + +#define QAM_GET_RECORD(dbp, page, index) \ + ((QAMDATA *)((u_int8_t *)(page) + \ + sizeof(QPAGE) + (ALIGN(sizeof(QAMDATA) - SSZA(QAMDATA, data) + \ + ((QUEUE *)(dbp)->q_internal)->re_len, sizeof(u_int32_t)) * index))) + +#define QAM_AFTER_CURRENT(meta, recno) \ + ((recno) > (meta)->cur_recno && \ + ((meta)->first_recno <= (meta)->cur_recno || (recno) < (meta)->first_recno)) + +#define QAM_BEFORE_FIRST(meta, recno) \ + ((recno) < (meta)->first_recno && \ + ((meta->first_recno <= (meta)->cur_recno || (recno) > (meta)->cur_recno))) + +#define QAM_NOT_VALID(meta, recno) \ + (recno == RECNO_OOB || \ + QAM_BEFORE_FIRST(meta, recno) || QAM_AFTER_CURRENT(meta, recno)) + +/* + * Log opcodes for the mvptr routine. + */ +#define QAM_SETFIRST 0x01 +#define QAM_SETCUR 0x02 + +/* + * Parameter to __qam_position. + */ +typedef enum { + QAM_READ, + QAM_WRITE, + QAM_CONSUME +} qam_position_mode; + +typedef enum { + QAM_PROBE_GET, + QAM_PROBE_PUT, + QAM_PROBE_MPF +} qam_probe_mode; + +#define __qam_fget(dbp, pgnoaddr, flags, addrp) \ + __qam_fprobe(dbp, *pgnoaddr, addrp, QAM_PROBE_GET, flags) + +#define __qam_fput(dbp, pageno, addrp, flags) \ + __qam_fprobe(dbp, pageno, addrp, QAM_PROBE_PUT, flags) + +#include "qam_auto.h" +#include "qam_ext.h" diff --git a/bdb/include/qam_auto.h b/bdb/include/qam_auto.h new file mode 100644 index 00000000000..8362b2118f4 --- /dev/null +++ b/bdb/include/qam_auto.h @@ -0,0 +1,129 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef qam_AUTO_H +#define qam_AUTO_H + +#define DB_qam_inc 76 +typedef struct _qam_inc_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN lsn; +} __qam_inc_args; + +int __qam_inc_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, DB_LSN *)); +int __qam_inc_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_inc_read __P((DB_ENV *, void *, __qam_inc_args **)); + +#define DB_qam_incfirst 77 +typedef struct _qam_incfirst_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + db_recno_t recno; +} __qam_incfirst_args; + +int __qam_incfirst_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, db_recno_t)); +int __qam_incfirst_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_incfirst_read __P((DB_ENV *, void *, __qam_incfirst_args **)); + +#define DB_qam_mvptr 78 +typedef struct _qam_mvptr_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t fileid; + db_recno_t old_first; + db_recno_t new_first; + db_recno_t old_cur; + db_recno_t new_cur; + DB_LSN metalsn; +} __qam_mvptr_args; + +int __qam_mvptr_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, int32_t, db_recno_t, db_recno_t, db_recno_t, db_recno_t, DB_LSN *)); +int __qam_mvptr_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_mvptr_read __P((DB_ENV *, void *, __qam_mvptr_args **)); + +#define DB_qam_del 79 +typedef struct _qam_del_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN lsn; + db_pgno_t pgno; + u_int32_t indx; + db_recno_t recno; +} __qam_del_args; + +int __qam_del_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, DB_LSN *, db_pgno_t, u_int32_t, db_recno_t)); +int __qam_del_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_del_read __P((DB_ENV *, void *, __qam_del_args **)); + +#define DB_qam_add 80 +typedef struct _qam_add_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN lsn; + db_pgno_t pgno; + u_int32_t indx; + db_recno_t recno; + DBT data; + u_int32_t vflag; + DBT olddata; +} __qam_add_args; + +int __qam_add_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, DB_LSN *, db_pgno_t, u_int32_t, db_recno_t, const DBT *, u_int32_t, const DBT *)); +int __qam_add_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_add_read __P((DB_ENV *, void *, __qam_add_args **)); + +#define DB_qam_delete 81 +typedef struct _qam_delete_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + DBT name; + DB_LSN lsn; +} __qam_delete_args; + +int __qam_delete_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, const DBT *, DB_LSN *)); +int __qam_delete_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_delete_read __P((DB_ENV *, void *, __qam_delete_args **)); + +#define DB_qam_rename 82 +typedef struct _qam_rename_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + DBT name; + DBT newname; +} __qam_rename_args; + +int __qam_rename_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, const DBT *, const DBT *)); +int __qam_rename_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_rename_read __P((DB_ENV *, void *, __qam_rename_args **)); + +#define DB_qam_delext 83 +typedef struct _qam_delext_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + int32_t fileid; + DB_LSN lsn; + db_pgno_t pgno; + u_int32_t indx; + db_recno_t recno; + DBT data; +} __qam_delext_args; + +int __qam_delext_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, int32_t, DB_LSN *, db_pgno_t, u_int32_t, db_recno_t, const DBT *)); +int __qam_delext_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_delext_read __P((DB_ENV *, void *, __qam_delext_args **)); +int __qam_init_print __P((DB_ENV *)); +int __qam_init_recover __P((DB_ENV *)); +#endif diff --git a/bdb/include/qam_ext.h b/bdb/include/qam_ext.h new file mode 100644 index 00000000000..f6e95110c0e --- /dev/null +++ b/bdb/include/qam_ext.h @@ -0,0 +1,56 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _qam_ext_h_ +#define _qam_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +int __qam_position + __P((DBC *, db_recno_t *, qam_position_mode, int *)); +int __qam_pitem + __P((DBC *, QPAGE *, u_int32_t, db_recno_t, DBT *)); +int __qam_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); +int __qam_delete __P((DB *, DB_TXN *, DBT *, u_int32_t)); +int __qam_c_dup __P((DBC *, DBC *)); +int __qam_c_init __P((DBC *)); +int __qam_mswap __P((PAGE *)); +int __qam_pgin_out __P((DB_ENV *, db_pgno_t, void *, DBT *)); +int __qam_fprobe __P((DB *, db_pgno_t, void *, qam_probe_mode, int)); +int __qam_fclose __P((DB *, db_pgno_t)); +int __qam_fremove __P((DB *, db_pgno_t)); +int __qam_sync __P((DB *, u_int32_t)); +int __qam_gen_filelist __P(( DB *, QUEUE_FILELIST **)); +int __qam_db_create __P((DB *)); +int __qam_db_close __P((DB *)); +int __db_prqueue __P((DB *, u_int32_t)); +int __qam_remove __P((DB *, const char *, + const char *, DB_LSN *, int (**)(DB *, void*), void **)); +int __qam_rename __P((DB *, + const char *, const char *, const char *)); +int __qam_open __P((DB *, const char *, db_pgno_t, int, u_int32_t)); +int __qam_metachk __P((DB *, const char *, QMETA *)); +int __qam_inc_recover __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_incfirst_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_mvptr_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_del_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_delext_recover __P((DB_ENV *, + DBT *, DB_LSN *, db_recops, void *)); +int __qam_add_recover __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_delete_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_rename_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __qam_stat __P((DB *, void *, void *(*)(size_t), u_int32_t)); +int __qam_31_qammeta __P((DB *, char *, u_int8_t *)); +int __qam_32_qammeta __P((DB *, char *, u_int8_t *)); +int __qam_vrfy_meta __P((DB *, VRFY_DBINFO *, QMETA *, + db_pgno_t, u_int32_t)); +int __qam_vrfy_data __P((DB *, VRFY_DBINFO *, QPAGE *, + db_pgno_t, u_int32_t)); +int __qam_vrfy_structure __P((DB *, VRFY_DBINFO *, u_int32_t)); +#if defined(__cplusplus) +} +#endif +#endif /* _qam_ext_h_ */ diff --git a/bdb/include/queue.h b/bdb/include/queue.h new file mode 100644 index 00000000000..8d4a771add6 --- /dev/null +++ b/bdb/include/queue.h @@ -0,0 +1,319 @@ +/* + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + */ + +/* + * XXX + * We #undef the queue macros because there are incompatible versions of this + * file and these macros on various systems. What makes the problem worse is + * they are included and/or defined by system include files which we may have + * already loaded into Berkeley DB before getting here. For example, FreeBSD's + * <rpc/rpc.h> includes its system <sys/queue.h>, and VxWorks UnixLib.h defines + * several of the LIST_XXX macros. Make sure we use ours. + */ +#undef LIST_HEAD +#undef LIST_ENTRY +#undef LIST_FIRST +#undef LIST_NEXT +#undef LIST_INIT +#undef LIST_INSERT_AFTER +#undef LIST_INSERT_BEFORE +#undef LIST_INSERT_HEAD +#undef LIST_REMOVE +#undef TAILQ_HEAD +#undef TAILQ_ENTRY +#undef TAILQ_FIRST +#undef TAILQ_NEXT +#undef TAILQ_INIT +#undef TAILQ_INSERT_HEAD +#undef TAILQ_INSERT_TAIL +#undef TAILQ_INSERT_AFTER +#undef TAILQ_INSERT_BEFORE +#undef TAILQ_REMOVE +#undef CIRCLEQ_HEAD +#undef CIRCLEQ_ENTRY +#undef CIRCLEQ_FIRST +#undef CIRCLEQ_LAST +#undef CIRCLEQ_NEXT +#undef CIRCLEQ_PREV +#undef CIRCLEQ_INIT +#undef CIRCLEQ_INSERT_AFTER +#undef CIRCLEQ_INSERT_BEFORE +#undef CIRCLEQ_INSERT_HEAD +#undef CIRCLEQ_INSERT_TAIL +#undef CIRCLEQ_REMOVE + +/* + * This file defines three types of data structures: lists, tail queues, + * and circular queues. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may only be traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may only be traversed in the forward direction. + * + * A circle queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or after + * an existing element, at the head of the list, or at the end of the list. + * A circle queue may be traversed in either direction, but has a more + * complex end of list detection. + * + * For details on the use of these macros, see the queue(3) manual page. + */ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * List definitions. + */ +#define LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define LIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +#define LIST_FIRST(head) ((head)->lh_first) +#define LIST_NEXT(elm, field) ((elm)->field.le_next) + +/* + * List functions. + */ +#define LIST_INIT(head) { \ + (head)->lh_first = NULL; \ +} + +#define LIST_INSERT_AFTER(listelm, elm, field) do { \ + if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \ + (listelm)->field.le_next->field.le_prev = \ + &(elm)->field.le_next; \ + (listelm)->field.le_next = (elm); \ + (elm)->field.le_prev = &(listelm)->field.le_next; \ +} while (0) + +#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + (elm)->field.le_next = (listelm); \ + *(listelm)->field.le_prev = (elm); \ + (listelm)->field.le_prev = &(elm)->field.le_next; \ +} while (0) + +#define LIST_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.le_next = (head)->lh_first) != NULL) \ + (head)->lh_first->field.le_prev = &(elm)->field.le_next;\ + (head)->lh_first = (elm); \ + (elm)->field.le_prev = &(head)->lh_first; \ +} while (0) + +#define LIST_REMOVE(elm, field) do { \ + if ((elm)->field.le_next != NULL) \ + (elm)->field.le_next->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = (elm)->field.le_next; \ +} while (0) + +/* + * Tail queue definitions. + */ +#define TAILQ_HEAD(name, type) \ +struct name { \ + struct type *tqh_first; /* first element */ \ + struct type **tqh_last; /* addr of last next element */ \ +} + +#define TAILQ_ENTRY(type) \ +struct { \ + struct type *tqe_next; /* next element */ \ + struct type **tqe_prev; /* address of previous next element */ \ +} + +#define TAILQ_FIRST(head) ((head)->tqh_first) +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) + +/* + * Tail queue functions. + */ +#define TAILQ_INIT(head) do { \ + (head)->tqh_first = NULL; \ + (head)->tqh_last = &(head)->tqh_first; \ +} while (0) + +#define TAILQ_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \ + (head)->tqh_first->field.tqe_prev = \ + &(elm)->field.tqe_next; \ + else \ + (head)->tqh_last = &(elm)->field.tqe_next; \ + (head)->tqh_first = (elm); \ + (elm)->field.tqe_prev = &(head)->tqh_first; \ +} while (0) + +#define TAILQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.tqe_next = NULL; \ + (elm)->field.tqe_prev = (head)->tqh_last; \ + *(head)->tqh_last = (elm); \ + (head)->tqh_last = &(elm)->field.tqe_next; \ +} while (0) + +#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\ + (elm)->field.tqe_next->field.tqe_prev = \ + &(elm)->field.tqe_next; \ + else \ + (head)->tqh_last = &(elm)->field.tqe_next; \ + (listelm)->field.tqe_next = (elm); \ + (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \ +} while (0) + +#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ + (elm)->field.tqe_next = (listelm); \ + *(listelm)->field.tqe_prev = (elm); \ + (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \ +} while (0) + +#define TAILQ_REMOVE(head, elm, field) do { \ + if (((elm)->field.tqe_next) != NULL) \ + (elm)->field.tqe_next->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + *(elm)->field.tqe_prev = (elm)->field.tqe_next; \ +} while (0) + +/* + * This macro is used to fixup the queue after moving the head. + */ +#define TAILQ_REINSERT_HEAD(head, elm, field) do { \ + DB_ASSERT((head)->tqh_first == (elm)); \ + (elm)->field.tqe_prev = &(head)->tqh_first; \ +} while (0) + +/* + * Circular queue definitions. + */ +#define CIRCLEQ_HEAD(name, type) \ +struct name { \ + struct type *cqh_first; /* first element */ \ + struct type *cqh_last; /* last element */ \ +} + +#define CIRCLEQ_ENTRY(type) \ +struct { \ + struct type *cqe_next; /* next element */ \ + struct type *cqe_prev; /* previous element */ \ +} + +#define CIRCLEQ_FIRST(head) ((head)->cqh_first) +#define CIRCLEQ_LAST(head) ((head)->cqh_last) +#define CIRCLEQ_NEXT(elm, field) ((elm)->field.cqe_next) +#define CIRCLEQ_PREV(elm, field) ((elm)->field.cqe_prev) + +/* + * Circular queue functions. + */ +#define CIRCLEQ_INIT(head) do { \ + (head)->cqh_first = (void *)(head); \ + (head)->cqh_last = (void *)(head); \ +} while (0) + +#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do { \ + (elm)->field.cqe_next = (listelm)->field.cqe_next; \ + (elm)->field.cqe_prev = (listelm); \ + if ((listelm)->field.cqe_next == (void *)(head)) \ + (head)->cqh_last = (elm); \ + else \ + (listelm)->field.cqe_next->field.cqe_prev = (elm); \ + (listelm)->field.cqe_next = (elm); \ +} while (0) + +#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do { \ + (elm)->field.cqe_next = (listelm); \ + (elm)->field.cqe_prev = (listelm)->field.cqe_prev; \ + if ((listelm)->field.cqe_prev == (void *)(head)) \ + (head)->cqh_first = (elm); \ + else \ + (listelm)->field.cqe_prev->field.cqe_next = (elm); \ + (listelm)->field.cqe_prev = (elm); \ +} while (0) + +#define CIRCLEQ_INSERT_HEAD(head, elm, field) do { \ + (elm)->field.cqe_next = (head)->cqh_first; \ + (elm)->field.cqe_prev = (void *)(head); \ + if ((head)->cqh_last == (void *)(head)) \ + (head)->cqh_last = (elm); \ + else \ + (head)->cqh_first->field.cqe_prev = (elm); \ + (head)->cqh_first = (elm); \ +} while (0) + +#define CIRCLEQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.cqe_next = (void *)(head); \ + (elm)->field.cqe_prev = (head)->cqh_last; \ + if ((head)->cqh_first == (void *)(head)) \ + (head)->cqh_first = (elm); \ + else \ + (head)->cqh_last->field.cqe_next = (elm); \ + (head)->cqh_last = (elm); \ +} while (0) + +#define CIRCLEQ_REMOVE(head, elm, field) do { \ + if ((elm)->field.cqe_next == (void *)(head)) \ + (head)->cqh_last = (elm)->field.cqe_prev; \ + else \ + (elm)->field.cqe_next->field.cqe_prev = \ + (elm)->field.cqe_prev; \ + if ((elm)->field.cqe_prev == (void *)(head)) \ + (head)->cqh_first = (elm)->field.cqe_next; \ + else \ + (elm)->field.cqe_prev->field.cqe_next = \ + (elm)->field.cqe_next; \ +} while (0) + +#if defined(__cplusplus) +} +#endif diff --git a/bdb/include/region.h b/bdb/include/region.h new file mode 100644 index 00000000000..c5882d09aad --- /dev/null +++ b/bdb/include/region.h @@ -0,0 +1,292 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: region.h,v 11.13 2000/11/15 19:25:37 sue Exp $ + */ + +/* + * The DB environment consists of some number of "regions", which are described + * by the following four structures: + * + * REGENV -- shared information about the environment + * REGENV_REF -- file describing system memory version of REGENV + * REGION -- shared information about a single region + * REGINFO -- per-process information about a REGION + * + * There are three types of memory that hold regions: + * per-process heap (malloc) + * file mapped into memory (mmap, MapViewOfFile) + * system memory (shmget, CreateFileMapping) + * + * If the regions are private to a process, they're in malloc. If they're + * public, they're in file mapped memory, or, optionally, in system memory. + * Regions in the filesystem are named "__db.001", "__db.002" and so on. If + * we're not using a private environment allocated using malloc(3), the file + * "__db.001" will always exist, as we use it to synchronize on the regions, + * whether they exist in file mapped memory or system memory. + * + * The file "__db.001" contains a REGENV structure and a linked list of some + * number of REGION structures. Each of the REGION structures describes and + * locks one of the underlying shared regions used by DB. + * + * __db.001 + * +---------+ + * |REGENV | + * +---------+ +----------+ + * |REGION |-> | __db.002 | + * | | +----------+ + * +---------+ +----------+ + * |REGION |-> | __db.003 | + * | | +----------+ + * +---------+ +----------+ + * |REGION |-> | __db.004 | + * | | +----------+ + * +---------+ + * + * The only tricky part about manipulating the regions is correctly creating + * or joining the REGENV file, i.e., __db.001. We have to be absolutely sure + * that only one process creates it, and that everyone else joins it without + * seeing inconsistent data. Once that region is created, we can use normal + * shared locking procedures to do mutal exclusion for all other regions. + * + * One of the REGION structures in the main environment region describes the + * environment region itself. + * + * To lock a region, locate the REGION structure that describes it and acquire + * the region's mutex. There is one exception to this rule -- the lock for the + * environment region itself is in the REGENV structure, and not in the REGION + * that describes the environment region. That's so that we can acquire a lock + * without walking linked lists that could potentially change underneath us. + * The REGION will not be moved or removed during the life of the region, and + * so long-lived references to it can be held by the process. + * + * All requests to create or join a region return a REGINFO structure, which + * is held by the caller and used to open and subsequently close the reference + * to the region. The REGINFO structure contains the per-process information + * that we need to access the region. + * + * The one remaining complication. If the regions (including the environment + * region) live in system memory, and the system memory isn't "named" somehow + * in the filesystem name space, we need some way of finding it. Do this by + * by writing the REGENV_REF structure into the "__db.001" file. When we find + * a __db.001 file that is too small to be a real, on-disk environment, we use + * the information it contains to redirect to the real "__db.001" file/memory. + * This currently only happens when the REGENV file is in shared system memory. + * + * Although DB does not currently grow regions when they run out of memory, it + * would be possible to do so. To grow a region, allocate a new region of the + * appropriate size, then copy the old region over it and insert the additional + * space into the already existing shalloc arena. Callers may have to fix up + * local references, but that should be easy to do. This failed in historic + * versions of DB because the region lock lived in the mapped memory, and when + * it was unmapped and remapped (or copied), threads could lose track of it. + * Once we moved that lock into a region that is never unmapped, growing should + * work. That all said, current versions of DB don't implement region grow + * because some systems don't support mutex copying, e.g., from OSF1 V4.0: + * + * The address of an msemaphore structure may be significant. If the + * msemaphore structure contains any value copied from an msemaphore + * structure at a different address, the result is undefined. + */ + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DB_REGION_FMT "__db.%03d" /* Region file name format. */ +#define DB_REGION_NAME_NUM 5 /* First digit offset in file names. */ +#define DB_REGION_NAME_LENGTH 8 /* Length of file names. */ + +#define DB_REGION_ENV "__db.001" /* Primary environment name. */ + +#define INVALID_REGION_ID 0 /* Out-of-band region ID. */ +#define REGION_ID_ENV 1 /* Primary environment ID. */ + +typedef enum { + INVALID_REGION_TYPE=0, /* Region type. */ + REGION_TYPE_ENV, + REGION_TYPE_LOCK, + REGION_TYPE_LOG, + REGION_TYPE_MPOOL, + REGION_TYPE_MUTEX, + REGION_TYPE_TXN } reg_type; + +#define INVALID_REGION_SEGID -1 /* Segment IDs are either shmget(2) or + * Win16 segment identifiers. They are + * both stored in a "long", and we need + * an out-of-band value. + */ +/* + * Nothing can live at region offset 0, because, in all cases, that's where + * we store *something*. Lots of code needs an out-of-band value for region + * offsets, so we use 0. + */ +#define INVALID_ROFF 0 + +/* Reference describing system memory version of REGENV. */ +typedef struct __db_reg_env_ref { + roff_t size; /* Region size. */ + long segid; /* UNIX shmget(2) ID. */ +} REGENV_REF; + +/* Per-environment region information. */ +typedef struct __db_reg_env { + /* + * !!! + * The mutex must be the first entry in the structure to guarantee + * correct alignment. + */ + MUTEX mutex; /* Environment mutex. */ + + /* + * !!! + * Note, the magic and panic fields are NOT protected by any mutex, + * and for this reason cannot be anything more complicated than a + * zero/non-zero value. + * + * !!! + * The valid region magic number must appear at the same byte offset + * in both the environment and each shared region, as Windows/95 uses + * it to determine if the memory has been zeroed since it was last used. + */ + u_int32_t magic; /* Valid region magic number. */ + + int panic; /* Environment is dead. */ + + int majver; /* Major DB version number. */ + int minver; /* Minor DB version number. */ + int patch; /* Patch DB version number. */ + + u_int32_t init_flags; /* Flags the env was initialized with.*/ + + /* List of regions. */ + SH_LIST_HEAD(__db_regionh) regionq; + + u_int32_t refcnt; /* References to the environment. */ + + size_t pad; /* Guarantee that following memory is + * size_t aligned. This is necessary + * because we're going to store the + * allocation region information there. + */ +} REGENV; + +/* Per-region shared region information. */ +typedef struct __db_region { + /* + * !!! + * The mutex must be the first entry in the structure to guarantee + * correct alignment. + */ + MUTEX mutex; /* Region mutex. */ + + /* + * !!! + * The valid region magic number must appear at the same byte offset + * in both the environment and each shared region, as Windows/95 uses + * it to determine if the memory has been zeroed since it was last used. + */ + u_int32_t magic; + + SH_LIST_ENTRY q; /* Linked list of REGIONs. */ + + reg_type type; /* Region type. */ + u_int32_t id; /* Region id. */ + + roff_t size; /* Region size in bytes. */ + + roff_t primary; /* Primary data structure offset. */ + + long segid; /* UNIX shmget(2), Win16 segment ID. */ +} REGION; + +/* + * Per-process/per-attachment information about a single region. + */ +struct __db_reginfo_t { /* __db_r_attach IN parameters. */ + reg_type type; /* Region type. */ + u_int32_t id; /* Region id. */ + int mode; /* File creation mode. */ + + /* __db_r_attach OUT parameters. */ + REGION *rp; /* Shared region. */ + + char *name; /* Region file name. */ + + void *addr; /* Region allocation address. */ + void *primary; /* Primary data structure address. */ + + void *wnt_handle; /* Win/NT HANDLE. */ + +#define REGION_CREATE 0x01 /* Caller created region. */ +#define REGION_CREATE_OK 0x02 /* Caller willing to create region. */ +#define REGION_JOIN_OK 0x04 /* Caller is looking for a match. */ + u_int32_t flags; +}; + +/* + * Mutex maintenance information each subsystem region must keep track + * of to manage resources adequately. + */ +typedef struct __db_regmaint_stat_t { + u_int32_t st_hint_hit; + u_int32_t st_hint_miss; + u_int32_t st_records; + u_int32_t st_clears; + u_int32_t st_destroys; + u_int32_t st_max_locks; +} REGMAINT_STAT; + +typedef struct __db_regmaint_t { + u_int32_t reglocks; /* Maximum # of mutexes we track. */ + u_int32_t regmutex_hint; /* Hint for next slot */ + REGMAINT_STAT stat; /* Stats */ + roff_t regmutexes[1]; /* Region mutexes in use. */ +} REGMAINT; + +/* + * R_ADDR Return a per-process address for a shared region offset. + * R_OFFSET Return a shared region offset for a per-process address. + * + * !!! + * R_OFFSET should really be returning a ptrdiff_t, but that's not yet + * portable. We use u_int32_t, which restricts regions to 4Gb in size. + */ +#define R_ADDR(base, offset) \ + ((void *)((u_int8_t *)((base)->addr) + offset)) +#define R_OFFSET(base, p) \ + ((u_int32_t)((u_int8_t *)(p) - (u_int8_t *)(base)->addr)) + +/* + * R_LOCK Lock/unlock a region. + * R_UNLOCK + */ +#define R_LOCK(dbenv, reginfo) \ + MUTEX_LOCK(dbenv, &(reginfo)->rp->mutex, (dbenv)->lockfhp) +#define R_UNLOCK(dbenv, reginfo) \ + MUTEX_UNLOCK(dbenv, &(reginfo)->rp->mutex) + +/* PANIC_CHECK: Check to see if the DB environment is dead. */ +#define PANIC_CHECK(dbenv) \ + if (DB_GLOBAL(db_panic) && \ + (dbenv)->reginfo != NULL && ((REGENV *) \ + ((REGINFO *)(dbenv)->reginfo)->primary)->panic != 0) \ + return (DB_RUNRECOVERY); + +/* + * All regions are created on 8K boundaries out of sheer paranoia, so that + * we don't make some underlying VM unhappy. + */ +#define OS_ROUNDOFF(i, s) { \ + (i) += (s) - 1; \ + (i) -= (i) % (s); \ +} +#define OS_VMPAGESIZE (8 * 1024) +#define OS_VMROUNDOFF(i) OS_ROUNDOFF(i, OS_VMPAGESIZE) + +#if defined(__cplusplus) +} +#endif diff --git a/bdb/include/rpc_client_ext.h b/bdb/include/rpc_client_ext.h new file mode 100644 index 00000000000..a5c4689cd27 --- /dev/null +++ b/bdb/include/rpc_client_ext.h @@ -0,0 +1,19 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _rpc_client_ext_h_ +#define _rpc_client_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +int __dbcl_envserver __P((DB_ENV *, char *, long, long, u_int32_t)); +int __dbcl_refresh __P((DB_ENV *)); +int __dbcl_txn_close __P((DB_ENV *)); +void __dbcl_txn_end __P((DB_TXN *)); +int __dbcl_c_destroy __P((DBC *)); +void __dbcl_c_refresh __P((DBC *)); +int __dbcl_c_setup __P((long, DB *, DBC **)); +int __dbcl_retcopy __P((DB_ENV *, DBT *, void *, u_int32_t)); +int __dbcl_dbclose_common __P((DB *)); +#if defined(__cplusplus) +} +#endif +#endif /* _rpc_client_ext_h_ */ diff --git a/bdb/include/rpc_server_ext.h b/bdb/include/rpc_server_ext.h new file mode 100644 index 00000000000..4abb0768134 --- /dev/null +++ b/bdb/include/rpc_server_ext.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _rpc_server_ext_h_ +#define _rpc_server_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +void __db_stats_freelist __P((__db_stat_statsreplist **)); +void __dbsrv_settimeout __P((ct_entry *, u_int32_t)); +void __dbsrv_timeout __P((int)); +void __dbclear_ctp __P((ct_entry *)); +void __dbdel_ctp __P((ct_entry *)); +ct_entry *new_ct_ent __P((u_int32_t *)); +ct_entry *get_tableent __P((long)); +void __dbsrv_active __P((ct_entry *)); +int __dbc_close_int __P((ct_entry *)); +int __dbenv_close_int __P((long, int)); +char *get_home __P((char *)); +#if defined(__cplusplus) +} +#endif +#endif /* _rpc_server_ext_h_ */ diff --git a/bdb/include/shqueue.h b/bdb/include/shqueue.h new file mode 100644 index 00000000000..115c5d39e88 --- /dev/null +++ b/bdb/include/shqueue.h @@ -0,0 +1,337 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: shqueue.h,v 11.6 2000/11/14 20:20:28 bostic Exp $ + */ +#ifndef _SYS_SHQUEUE_H_ +#define _SYS_SHQUEUE_H_ + +/* + * This file defines three types of data structures: lists, tail queues, and + * circular queues, similarly to the include file <sys/queue.h>. + * + * The difference is that this set of macros can be used for structures that + * reside in shared memory that may be mapped at different addresses in each + * process. In most cases, the macros for shared structures exactly mirror + * the normal macros, although the macro calls require an additional type + * parameter, only used by the HEAD and ENTRY macros of the standard macros. + * + * For details on the use of these macros, see the queue(3) manual page. + */ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Shared list definitions. + */ +#define SH_LIST_HEAD(name) \ +struct name { \ + ssize_t slh_first; /* first element */ \ +} + +#define SH_LIST_ENTRY \ +struct { \ + ssize_t sle_next; /* relative offset next element */ \ + ssize_t sle_prev; /* relative offset of prev element */ \ +} + +/* + * Shared list functions. Since we use relative offsets for pointers, + * 0 is a valid offset. Therefore, we use -1 to indicate end of list. + * The macros ending in "P" return pointers without checking for end + * of list, the others check for end of list and evaluate to either a + * pointer or NULL. + */ + +#define SH_LIST_FIRSTP(head, type) \ + ((struct type *)(((u_int8_t *)(head)) + (head)->slh_first)) + +#define SH_LIST_FIRST(head, type) \ + ((head)->slh_first == -1 ? NULL : \ + ((struct type *)(((u_int8_t *)(head)) + (head)->slh_first))) + +#define SH_LIST_NEXTP(elm, field, type) \ + ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.sle_next)) + +#define SH_LIST_NEXT(elm, field, type) \ + ((elm)->field.sle_next == -1 ? NULL : \ + ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.sle_next))) + +#define SH_LIST_PREV(elm, field) \ + ((ssize_t *)(((u_int8_t *)(elm)) + (elm)->field.sle_prev)) + +#define SH_PTR_TO_OFF(src, dest) \ + ((ssize_t)(((u_int8_t *)(dest)) - ((u_int8_t *)(src)))) + +/* + * Take the element's next pointer and calculate what the corresponding + * Prev pointer should be -- basically it is the negation plus the offset + * of the next field in the structure. + */ +#define SH_LIST_NEXT_TO_PREV(elm, field) \ + (-(elm)->field.sle_next + SH_PTR_TO_OFF(elm, &(elm)->field.sle_next)) + +#define SH_LIST_INIT(head) (head)->slh_first = -1 + +#define SH_LIST_INSERT_AFTER(listelm, elm, field, type) do { \ + if ((listelm)->field.sle_next != -1) { \ + (elm)->field.sle_next = SH_PTR_TO_OFF(elm, \ + SH_LIST_NEXTP(listelm, field, type)); \ + SH_LIST_NEXTP(listelm, field, type)->field.sle_prev = \ + SH_LIST_NEXT_TO_PREV(elm, field); \ + } else \ + (elm)->field.sle_next = -1; \ + (listelm)->field.sle_next = SH_PTR_TO_OFF(listelm, elm); \ + (elm)->field.sle_prev = SH_LIST_NEXT_TO_PREV(listelm, field); \ +} while (0) + +#define SH_LIST_INSERT_HEAD(head, elm, field, type) do { \ + if ((head)->slh_first != -1) { \ + (elm)->field.sle_next = \ + (head)->slh_first - SH_PTR_TO_OFF(head, elm); \ + SH_LIST_FIRSTP(head, type)->field.sle_prev = \ + SH_LIST_NEXT_TO_PREV(elm, field); \ + } else \ + (elm)->field.sle_next = -1; \ + (head)->slh_first = SH_PTR_TO_OFF(head, elm); \ + (elm)->field.sle_prev = SH_PTR_TO_OFF(elm, &(head)->slh_first); \ +} while (0) + +#define SH_LIST_REMOVE(elm, field, type) do { \ + if ((elm)->field.sle_next != -1) { \ + SH_LIST_NEXTP(elm, field, type)->field.sle_prev = \ + (elm)->field.sle_prev - (elm)->field.sle_next; \ + *SH_LIST_PREV(elm, field) += (elm)->field.sle_next; \ + } else \ + *SH_LIST_PREV(elm, field) = -1; \ +} while (0) + +/* + * Shared tail queue definitions. + */ +#define SH_TAILQ_HEAD(name) \ +struct name { \ + ssize_t stqh_first; /* relative offset of first element */ \ + ssize_t stqh_last; /* relative offset of last's next */ \ +} + +#define SH_TAILQ_ENTRY \ +struct { \ + ssize_t stqe_next; /* relative offset of next element */ \ + ssize_t stqe_prev; /* relative offset of prev's next */ \ +} + +/* + * Shared tail queue functions. + */ +#define SH_TAILQ_FIRSTP(head, type) \ + ((struct type *)((u_int8_t *)(head) + (head)->stqh_first)) + +#define SH_TAILQ_FIRST(head, type) \ + ((head)->stqh_first == -1 ? NULL : SH_TAILQ_FIRSTP(head, type)) + +#define SH_TAILQ_NEXTP(elm, field, type) \ + ((struct type *)((u_int8_t *)(elm) + (elm)->field.stqe_next)) + +#define SH_TAILQ_NEXT(elm, field, type) \ + ((elm)->field.stqe_next == -1 ? NULL : SH_TAILQ_NEXTP(elm, field, type)) + +#define SH_TAILQ_PREVP(elm, field) \ + ((ssize_t *)((u_int8_t *)(elm) + (elm)->field.stqe_prev)) + +#define SH_TAILQ_LAST(head) \ + ((ssize_t *)(((u_int8_t *)(head)) + (head)->stqh_last)) + +#define SH_TAILQ_NEXT_TO_PREV(elm, field) \ + (-(elm)->field.stqe_next + SH_PTR_TO_OFF(elm, &(elm)->field.stqe_next)) + +#define SH_TAILQ_INIT(head) { \ + (head)->stqh_first = -1; \ + (head)->stqh_last = SH_PTR_TO_OFF(head, &(head)->stqh_first); \ +} + +#define SH_TAILQ_INSERT_HEAD(head, elm, field, type) do { \ + if ((head)->stqh_first != -1) { \ + (elm)->field.stqe_next = \ + (head)->stqh_first - SH_PTR_TO_OFF(head, elm); \ + SH_TAILQ_FIRSTP(head, type)->field.stqe_prev = \ + SH_TAILQ_NEXT_TO_PREV(elm, field); \ + } else { \ + (elm)->field.stqe_next = -1; \ + (head)->stqh_last = \ + SH_PTR_TO_OFF(head, &(elm)->field.stqe_next); \ + } \ + (head)->stqh_first = SH_PTR_TO_OFF(head, elm); \ + (elm)->field.stqe_prev = \ + SH_PTR_TO_OFF(elm, &(head)->stqh_first); \ +} while (0) + +#define SH_TAILQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.stqe_next = -1; \ + (elm)->field.stqe_prev = \ + -SH_PTR_TO_OFF(head, elm) + (head)->stqh_last; \ + if ((head)->stqh_last == \ + SH_PTR_TO_OFF((head), &(head)->stqh_first)) \ + (head)->stqh_first = SH_PTR_TO_OFF(head, elm); \ + else \ + *SH_TAILQ_LAST(head) = -(head)->stqh_last + \ + SH_PTR_TO_OFF((elm), &(elm)->field.stqe_next) + \ + SH_PTR_TO_OFF(head, elm); \ + (head)->stqh_last = \ + SH_PTR_TO_OFF(head, &((elm)->field.stqe_next)); \ +} while (0) + +#define SH_TAILQ_INSERT_AFTER(head, listelm, elm, field, type) do { \ + if ((listelm)->field.stqe_next != -1) { \ + (elm)->field.stqe_next = (listelm)->field.stqe_next - \ + SH_PTR_TO_OFF(listelm, elm); \ + SH_TAILQ_NEXTP(listelm, field, type)->field.stqe_prev = \ + SH_TAILQ_NEXT_TO_PREV(elm, field); \ + } else { \ + (elm)->field.stqe_next = -1; \ + (head)->stqh_last = \ + SH_PTR_TO_OFF(head, &elm->field.stqe_next); \ + } \ + (listelm)->field.stqe_next = SH_PTR_TO_OFF(listelm, elm); \ + (elm)->field.stqe_prev = SH_TAILQ_NEXT_TO_PREV(listelm, field); \ +} while (0) + +#define SH_TAILQ_REMOVE(head, elm, field, type) do { \ + if ((elm)->field.stqe_next != -1) { \ + SH_TAILQ_NEXTP(elm, field, type)->field.stqe_prev = \ + (elm)->field.stqe_prev + \ + SH_PTR_TO_OFF(SH_TAILQ_NEXTP(elm, \ + field, type), elm); \ + *SH_TAILQ_PREVP(elm, field) += elm->field.stqe_next; \ + } else { \ + (head)->stqh_last = (elm)->field.stqe_prev + \ + SH_PTR_TO_OFF(head, elm); \ + *SH_TAILQ_PREVP(elm, field) = -1; \ + } \ +} while (0) + +/* + * Shared circular queue definitions. + */ +#define SH_CIRCLEQ_HEAD(name) \ +struct name { \ + ssize_t scqh_first; /* first element */ \ + ssize_t scqh_last; /* last element */ \ +} + +#define SH_CIRCLEQ_ENTRY \ +struct { \ + ssize_t scqe_next; /* next element */ \ + ssize_t scqe_prev; /* previous element */ \ +} + +/* + * Shared circular queue functions. + */ +#define SH_CIRCLEQ_FIRSTP(head, type) \ + ((struct type *)(((u_int8_t *)(head)) + (head)->scqh_first)) + +#define SH_CIRCLEQ_FIRST(head, type) \ + ((head)->scqh_first == -1 ? \ + (void *)head : SH_CIRCLEQ_FIRSTP(head, type)) + +#define SH_CIRCLEQ_LASTP(head, type) \ + ((struct type *)(((u_int8_t *)(head)) + (head)->scqh_last)) + +#define SH_CIRCLEQ_LAST(head, type) \ + ((head)->scqh_last == -1 ? (void *)head : SH_CIRCLEQ_LASTP(head, type)) + +#define SH_CIRCLEQ_NEXTP(elm, field, type) \ + ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.scqe_next)) + +#define SH_CIRCLEQ_NEXT(head, elm, field, type) \ + ((elm)->field.scqe_next == SH_PTR_TO_OFF(elm, head) ? \ + (void *)head : SH_CIRCLEQ_NEXTP(elm, field, type)) + +#define SH_CIRCLEQ_PREVP(elm, field, type) \ + ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.scqe_prev)) + +#define SH_CIRCLEQ_PREV(head, elm, field, type) \ + ((elm)->field.scqe_prev == SH_PTR_TO_OFF(elm, head) ? \ + (void *)head : SH_CIRCLEQ_PREVP(elm, field, type)) + +#define SH_CIRCLEQ_INIT(head) { \ + (head)->scqh_first = 0; \ + (head)->scqh_last = 0; \ +} + +#define SH_CIRCLEQ_INSERT_AFTER(head, listelm, elm, field, type) do { \ + (elm)->field.scqe_prev = SH_PTR_TO_OFF(elm, listelm); \ + (elm)->field.scqe_next = (listelm)->field.scqe_next + \ + (elm)->field.scqe_prev; \ + if (SH_CIRCLEQ_NEXTP(listelm, field, type) == (void *)head) \ + (head)->scqh_last = SH_PTR_TO_OFF(head, elm); \ + else \ + SH_CIRCLEQ_NEXTP(listelm, \ + field, type)->field.scqe_prev = \ + SH_PTR_TO_OFF(SH_CIRCLEQ_NEXTP(listelm, \ + field, type), elm); \ + (listelm)->field.scqe_next = -(elm)->field.scqe_prev; \ +} while (0) + +#define SH_CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field, type) do { \ + (elm)->field.scqe_next = SH_PTR_TO_OFF(elm, listelm); \ + (elm)->field.scqe_prev = (elm)->field.scqe_next - \ + SH_CIRCLEQ_PREVP(listelm, field, type)->field.scqe_next;\ + if (SH_CIRCLEQ_PREVP(listelm, field, type) == (void *)(head)) \ + (head)->scqh_first = SH_PTR_TO_OFF(head, elm); \ + else \ + SH_CIRCLEQ_PREVP(listelm, \ + field, type)->field.scqe_next = \ + SH_PTR_TO_OFF(SH_CIRCLEQ_PREVP(listelm, \ + field, type), elm); \ + (listelm)->field.scqe_prev = -(elm)->field.scqe_next; \ +} while (0) + +#define SH_CIRCLEQ_INSERT_HEAD(head, elm, field, type) do { \ + (elm)->field.scqe_prev = SH_PTR_TO_OFF(elm, head); \ + (elm)->field.scqe_next = (head)->scqh_first + \ + (elm)->field.scqe_prev; \ + if ((head)->scqh_last == 0) \ + (head)->scqh_last = -(elm)->field.scqe_prev; \ + else \ + SH_CIRCLEQ_FIRSTP(head, type)->field.scqe_prev = \ + SH_PTR_TO_OFF(SH_CIRCLEQ_FIRSTP(head, type), elm); \ + (head)->scqh_first = -(elm)->field.scqe_prev; \ +} while (0) + +#define SH_CIRCLEQ_INSERT_TAIL(head, elm, field, type) do { \ + (elm)->field.scqe_next = SH_PTR_TO_OFF(elm, head); \ + (elm)->field.scqe_prev = (head)->scqh_last + \ + (elm)->field.scqe_next; \ + if ((head)->scqh_first == 0) \ + (head)->scqh_first = -(elm)->field.scqe_next; \ + else \ + SH_CIRCLEQ_LASTP(head, type)->field.scqe_next = \ + SH_PTR_TO_OFF(SH_CIRCLEQ_LASTP(head, type), elm); \ + (head)->scqh_last = -(elm)->field.scqe_next; \ +} while (0) + +#define SH_CIRCLEQ_REMOVE(head, elm, field, type) do { \ + if (SH_CIRCLEQ_NEXTP(elm, field, type) == (void *)(head)) \ + (head)->scqh_last += (elm)->field.scqe_prev; \ + else \ + SH_CIRCLEQ_NEXTP(elm, field, type)->field.scqe_prev += \ + (elm)->field.scqe_prev; \ + if (SH_CIRCLEQ_PREVP(elm, field, type) == (void *)(head)) \ + (head)->scqh_first += (elm)->field.scqe_next; \ + else \ + SH_CIRCLEQ_PREVP(elm, field, type)->field.scqe_next += \ + (elm)->field.scqe_next; \ +} while (0) + +#if defined(__cplusplus) +} +#endif + +#endif /* !_SYS_SHQUEUE_H_ */ diff --git a/bdb/include/tcl_db.h b/bdb/include/tcl_db.h new file mode 100644 index 00000000000..254006c2f6d --- /dev/null +++ b/bdb/include/tcl_db.h @@ -0,0 +1,219 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: tcl_db.h,v 11.9 2000/12/12 17:43:56 bostic Exp $ + */ + +#define MSG_SIZE 100 /* Message size */ + +enum INFOTYPE { + I_ENV, I_DB, I_DBC, I_TXN, I_MP, I_PG, I_LOCK, I_NDBM, I_MUTEX }; + +#define MAX_ID 8 /* Maximum number of sub-id's we need */ + +#define DBTCL_DBM 1 +#define DBTCL_NDBM 2 + +typedef struct _mutex_entry { + union { + struct { + MUTEX real_m; + u_int32_t real_val; + } r; + /* + * This is here to make sure that each of the mutex structures + * are 16-byte aligned, which is required on HP architectures. + * The db_mutex_t structure might be >32 bytes itself, or the + * real_val might push it over the 32 byte boundary. The best + * we can do is use a 48 byte boundary. + */ + char c[48]; + } u; +} _MUTEX_ENTRY; + +#define m u.r.real_m +#define val u.r.real_val + +typedef struct _mutex_data { + DB_ENV *env; + REGINFO reginfo; + _MUTEX_ENTRY *marray; + size_t size; + u_int32_t n_mutex; +} _MUTEX_DATA; + +/* + * Why use a home grown package over the Tcl_Hash functions? + * + * We could have implemented the stuff below without maintaining our + * own list manipulation, efficiently hashing it with the available + * Tcl functions (Tcl_CreateHashEntry, Tcl_GetHashValue, etc). I chose + * not to do so for these reasons: + * + * We still need the information below. Using the hashing only removes + * us from needing the next/prev pointers. We still need the structure + * itself because we need more than one value associated with a widget. + * We need to keep track of parent pointers for sub-widgets (like cursors) + * so we can correctly close. We need to keep track of individual widget's + * id counters for any sub-widgets they may have. We need to be able to + * associate the name/client data outside the scope of the widget. + * + * So, is it better to use the hashing rather than + * the linear list we have now? I decided against it for the simple reason + * that to access the structure would require two calls. The first is + * Tcl_FindHashEntry(table, key) and then, once we have the entry, we'd + * have to do Tcl_GetHashValue(entry) to get the pointer of the structure. + * + * I believe the number of simultaneous DB widgets in existence at one time + * is not going to be that large (more than several dozen) such that + * linearly searching the list is not going to impact performance in a + * noticable way. Should performance be impacted due to the size of the + * info list, then perhaps it is time to revisit this decision. + */ +typedef struct dbtcl_info { + LIST_ENTRY(dbtcl_info) entries; + Tcl_Interp *i_interp; + char *i_name; + enum INFOTYPE i_type; + union infop { + DB_ENV *envp; + void *anyp; + DB *dbp; + DBC *dbcp; + DB_TXN *txnp; + DB_MPOOLFILE *mp; + DB_LOCK *lock; + _MUTEX_DATA *mutex; +#if 0 + DBM *ndbmp; /* Compatibility */ +#endif + } un; + union data { + int anydata; + db_pgno_t pgno; + u_int32_t lockid; + } und; + union data2 { + int anydata; + size_t pagesz; + } und2; + DBT i_lockobj; + FILE *i_err; + char *i_errpfx; + struct dbtcl_info *i_parent; + int i_otherid[MAX_ID]; +} DBTCL_INFO; + +extern int __debug_on, __debug_print, __debug_stop, __debug_test; +LIST_HEAD(infohead, dbtcl_info) __db_infohead; + +#define i_anyp un.anyp +#define i_pagep un.anyp +#define i_envp un.envp +#define i_dbp un.dbp +#define i_dbcp un.dbcp +#define i_txnp un.txnp +#define i_mp un.mp +#define i_lock un.lock +#define i_mutex un.mutex +#if 0 +#define i_ndbm un.ndbmp +#endif + +#define i_data und.anydata +#define i_pgno und.pgno +#define i_locker und.lockid +#define i_data2 und2.anydata +#define i_pgsz und2.pagesz + +#define i_envtxnid i_otherid[0] +#define i_envmpid i_otherid[1] +#define i_envlockid i_otherid[2] +#define i_envmutexid i_otherid[3] + +#define i_mppgid i_otherid[0] + +#define i_dbdbcid i_otherid[0] + +#define NAME_TO_ENV(name) (DB_ENV *)_NameToPtr((name)) +#define NAME_TO_DB(name) (DB *)_NameToPtr((name)) +#define NAME_TO_DBC(name) (DBC *)_NameToPtr((name)) +#define NAME_TO_TXN(name) (DB_TXN *)_NameToPtr((name)) +#define NAME_TO_MP(name) (DB_MPOOLFILE *)_NameToPtr((name)) +#define NAME_TO_LOCK(name) (DB_LOCK *)_NameToPtr((name)) + +/* + * MAKE_STAT_LIST appends a {name value} pair to a result list + * that MUST be called 'res' that is a Tcl_Obj * in the local + * function. This macro also assumes a label "error" to go to + * in the even of a Tcl error. For stat functions this will + * typically go before the "free" function to free the stat structure + * returned by DB. + */ +#define MAKE_STAT_LIST(s,v) \ +do { \ + result = _SetListElemInt(interp, res, (s), (v)); \ + if (result != TCL_OK) \ + goto error; \ +} while (0) + +/* + * MAKE_STAT_STRLIST appends a {name string} pair to a result list + * that MUST be called 'res' that is a Tcl_Obj * in the local + * function. This macro also assumes a label "error" to go to + * in the even of a Tcl error. For stat functions this will + * typically go before the "free" function to free the stat structure + * returned by DB. + */ +#define MAKE_STAT_STRLIST(s,s1) \ +do { \ + result = _SetListElem(interp, res, (s), strlen(s), \ + (s1), strlen(s1)); \ + if (result != TCL_OK) \ + goto error; \ +} while (0) + +/* + * FLAG_CHECK checks that the given flag is not set yet. + * If it is, it sets up an error message. + */ +#define FLAG_CHECK(flag) \ +do { \ + if ((flag) != 0) { \ + Tcl_SetResult(interp, \ + " Only 1 policy can be specified.\n", \ + TCL_STATIC); \ + result = TCL_ERROR; \ + break; \ + } \ +} while (0) + +/* + * FLAG_CHECK2 checks that the given flag is not set yet or is + * only set to the given allowed value. + * If it is, it sets up an error message. + */ +#define FLAG_CHECK2(flag,val) \ +do { \ + if ((flag) != 0 && (flag) != (val)) { \ + Tcl_SetResult(interp, \ + " Only 1 policy can be specified.\n", \ + TCL_STATIC); \ + result = TCL_ERROR; \ + break; \ + } \ +} while (0) + +/* + * IS_HELP checks whether the arg we bombed on is -?, which is a help option. + * If it is, we return TCL_OK (but leave the result set to whatever + * Tcl_GetIndexFromObj says, which lists all the valid options. Otherwise + * return TCL_ERROR. + */ +#define IS_HELP(s) \ + (strcmp(Tcl_GetStringFromObj(s,NULL), "-?") == 0) ? TCL_OK : TCL_ERROR + +#include "tcl_ext.h" diff --git a/bdb/include/tcl_ext.h b/bdb/include/tcl_ext.h new file mode 100644 index 00000000000..9baf7e4fdcf --- /dev/null +++ b/bdb/include/tcl_ext.h @@ -0,0 +1,89 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _tcl_ext_h_ +#define _tcl_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +int bdb_HCommand __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); +#if DB_DBM_HSEARCH != 0 +int bdb_NdbmOpen __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DBM **)); +#endif +#if DB_DBM_HSEARCH != 0 +int bdb_DbmCommand + __P((Tcl_Interp *, int, Tcl_Obj * CONST*, int, DBM *)); +#endif +int ndbm_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); +int bdb_RandCommand __P((Tcl_Interp *, int, Tcl_Obj * CONST*)); +int tcl_Mutex __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *, + DBTCL_INFO *)); +int db_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); +int dbc_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); +int env_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); +int tcl_EnvRemove __P((Tcl_Interp *, int, Tcl_Obj * CONST*, + DB_ENV *, DBTCL_INFO *)); +int tcl_EnvVerbose __P((Tcl_Interp *, DB_ENV *, Tcl_Obj *, + Tcl_Obj *)); +int tcl_EnvTest __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +DBTCL_INFO *_NewInfo __P((Tcl_Interp *, + void *, char *, enum INFOTYPE)); +void *_NameToPtr __P((CONST char *)); +char *_PtrToName __P((CONST void *)); +DBTCL_INFO *_PtrToInfo __P((CONST void *)); +DBTCL_INFO *_NameToInfo __P((CONST char *)); +void _SetInfoData __P((DBTCL_INFO *, void *)); +void _DeleteInfo __P((DBTCL_INFO *)); +int _SetListElem __P((Tcl_Interp *, + Tcl_Obj *, void *, int, void *, int)); +int _SetListElemInt __P((Tcl_Interp *, Tcl_Obj *, void *, int)); +int _SetListRecnoElem __P((Tcl_Interp *, Tcl_Obj *, + db_recno_t, u_char *, int)); +int _GetGlobPrefix __P((char *, char **)); +int _ReturnSetup __P((Tcl_Interp *, int, char *)); +int _ErrorSetup __P((Tcl_Interp *, int, char *)); +void _ErrorFunc __P((CONST char *, char *)); +int _GetLsn __P((Tcl_Interp *, Tcl_Obj *, DB_LSN *)); +void _debug_check __P((void)); +int tcl_LockDetect __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LockGet __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LockStat __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LockVec __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LogArchive __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LogCompare __P((Tcl_Interp *, int, + Tcl_Obj * CONST*)); +int tcl_LogFile __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LogFlush __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LogGet __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LogPut __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LogRegister __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LogStat __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *)); +int tcl_LogUnregister __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *)); +void _MpInfoDelete __P((Tcl_Interp *, DBTCL_INFO *)); +int tcl_MpSync __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +int tcl_MpTrickle __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *)); +int tcl_Mp __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *, DBTCL_INFO *)); +int tcl_MpStat __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); +void _TxnInfoDelete __P((Tcl_Interp *, DBTCL_INFO *)); +int tcl_TxnCheckpoint __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *)); +int tcl_Txn __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *, DBTCL_INFO *)); +int tcl_TxnStat __P((Tcl_Interp *, int, + Tcl_Obj * CONST*, DB_ENV *)); +int txn_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); +#if defined(__cplusplus) +} +#endif +#endif /* _tcl_ext_h_ */ diff --git a/bdb/include/txn.h b/bdb/include/txn.h new file mode 100644 index 00000000000..009a1ca1589 --- /dev/null +++ b/bdb/include/txn.h @@ -0,0 +1,150 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: txn.h,v 11.12 2001/01/02 17:23:39 margo Exp $ + */ + +#ifndef _TXN_H_ +#define _TXN_H_ + +#include "xa.h" + +struct __db_txnmgr; typedef struct __db_txnmgr DB_TXNMGR; +struct __db_txnregion; typedef struct __db_txnregion DB_TXNREGION; + +/* + * !!! + * TXN_MINIMUM = (DB_LOCK_MAXID + 1) but this makes compilers complain. + */ +#define TXN_MINIMUM 0x80000000 +#define TXN_INVALID 0xffffffff /* Maximum number of txn ids. */ +#define TXN_INVALID_ID 0 /* Invalid transaction ID. */ + +#define DEF_MAX_TXNS 20 /* Default max transactions. */ + +/* The structure allocated for every transaction. */ +struct __db_txn { + DB_TXNMGR *mgrp; /* Pointer to transaction manager. */ + DB_TXN *parent; /* Pointer to transaction's parent. */ + DB_LSN last_lsn; /* Lsn of last log write. */ + u_int32_t txnid; /* Unique transaction id. */ + roff_t off; /* Detail structure within region. */ + TAILQ_ENTRY(__db_txn) links; /* Links transactions off manager. */ + TAILQ_HEAD(__kids, __db_txn) kids; /* Child transactions. */ + TAILQ_ENTRY(__db_txn) klinks; /* Links child transactions. */ + u_int32_t cursors; /* Number of cursors open for txn */ + +#define TXN_CHILDCOMMIT 0x01 /* Transaction that has committed. */ +#define TXN_MALLOC 0x02 /* Structure allocated by TXN system. */ +#define TXN_NOSYNC 0x04 /* Do not sync on prepare and commit. */ +#define TXN_NOWAIT 0x08 /* Do not wait on locks. */ +#define TXN_SYNC 0x10 /* Sync on prepare and commit. */ + u_int32_t flags; +}; + +/* + * Internal data maintained in shared memory for each transaction. + */ +typedef char DB_XID[XIDDATASIZE]; + +typedef struct __txn_detail { + u_int32_t txnid; /* current transaction id + used to link free list also */ + DB_LSN last_lsn; /* last lsn written for this txn */ + DB_LSN begin_lsn; /* lsn of begin record */ + roff_t parent; /* Offset of transaction's parent. */ + +#define TXN_RUNNING 1 +#define TXN_ABORTED 2 +#define TXN_PREPARED 3 +#define TXN_COMMITTED 4 + u_int32_t status; /* status of the transaction */ + + SH_TAILQ_ENTRY links; /* free/active list */ + +#define TXN_XA_ABORTED 1 +#define TXN_XA_DEADLOCKED 2 +#define TXN_XA_ENDED 3 +#define TXN_XA_PREPARED 4 +#define TXN_XA_STARTED 5 +#define TXN_XA_SUSPENDED 6 + u_int32_t xa_status; /* XA status */ + + /* + * XID (xid_t) structure: because these fields are logged, the + * sizes have to be explicit. + */ + DB_XID xid; /* XA global transaction id */ + u_int32_t bqual; /* bqual_length from XID */ + u_int32_t gtrid; /* gtrid_length from XID */ + int32_t format; /* XA format */ +} TXN_DETAIL; + +/* + * DB_TXNMGR -- + * The transaction manager encapsulates the transaction system. + */ +struct __db_txnmgr { +/* + * These fields need to be protected for multi-threaded support. + * + * !!! + * As this structure is allocated in per-process memory, the mutex may need + * to be stored elsewhere on architectures unable to support mutexes in heap + * memory, e.g., HP/UX 9. + */ + MUTEX *mutexp; /* Lock list of active transactions + * (including the content of each + * TXN_DETAIL structure on the list). + */ + /* List of active transactions. */ + TAILQ_HEAD(_chain, __db_txn) txn_chain; + +/* These fields are never updated after creation, and so not protected. */ + DB_ENV *dbenv; /* Environment. */ + REGINFO reginfo; /* Region information. */ +}; + +/* + * DB_TXNREGION -- + * The primary transaction data structure in the shared memory region. + */ +struct __db_txnregion { + u_int32_t maxtxns; /* maximum number of active TXNs */ + u_int32_t last_txnid; /* last transaction id given out */ + DB_LSN pending_ckp; /* last checkpoint did not finish */ + DB_LSN last_ckp; /* lsn of the last checkpoint */ + time_t time_ckp; /* time of last checkpoint */ + u_int32_t logtype; /* type of logging */ + u_int32_t locktype; /* lock type */ + u_int32_t naborts; /* number of aborted TXNs */ + u_int32_t ncommits; /* number of committed TXNs */ + u_int32_t nbegins; /* number of begun TXNs */ + u_int32_t nactive; /* number of active TXNs */ + u_int32_t maxnactive; /* maximum number of active TXNs */ + /* active TXN list */ + SH_TAILQ_HEAD(__active) active_txn; +}; + +/* + * Make the region large enough to hold N transaction detail structures + * plus some space to hold thread handles and the beginning of the shalloc + * region. + */ +#define TXN_REGION_SIZE(N) \ + (sizeof(DB_TXNREGION) + N * sizeof(TXN_DETAIL) + 1000) + +/* + * Log record types. + */ +#define TXN_COMMIT 1 +#define TXN_PREPARE 2 + +#include "txn_auto.h" +#include "txn_ext.h" + +#include "xa_ext.h" +#endif /* !_TXN_H_ */ diff --git a/bdb/include/txn_auto.h b/bdb/include/txn_auto.h new file mode 100644 index 00000000000..c9cb5cfae4c --- /dev/null +++ b/bdb/include/txn_auto.h @@ -0,0 +1,114 @@ +/* Do not edit: automatically built by gen_rec.awk. */ + +#ifndef txn_AUTO_H +#define txn_AUTO_H + +#define DB_txn_old_regop 6 +typedef struct _txn_old_regop_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; +} __txn_old_regop_args; + +int __txn_old_regop_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_old_regop_read __P((DB_ENV *, void *, __txn_old_regop_args **)); + +#define DB_txn_regop 10 +typedef struct _txn_regop_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + int32_t timestamp; +} __txn_regop_args; + +int __txn_regop_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, int32_t)); +int __txn_regop_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_regop_read __P((DB_ENV *, void *, __txn_regop_args **)); + +#define DB_txn_old_ckp 7 +typedef struct _txn_old_ckp_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + DB_LSN ckp_lsn; + DB_LSN last_ckp; +} __txn_old_ckp_args; + +int __txn_old_ckp_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_old_ckp_read __P((DB_ENV *, void *, __txn_old_ckp_args **)); + +#define DB_txn_ckp 11 +typedef struct _txn_ckp_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + DB_LSN ckp_lsn; + DB_LSN last_ckp; + int32_t timestamp; +} __txn_ckp_args; + +int __txn_ckp_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, DB_LSN *, DB_LSN *, int32_t)); +int __txn_ckp_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_ckp_read __P((DB_ENV *, void *, __txn_ckp_args **)); + +#define DB_txn_xa_regop_old 8 +typedef struct _txn_xa_regop_old_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + DBT xid; + int32_t formatID; + u_int32_t gtrid; + u_int32_t bqual; +} __txn_xa_regop_old_args; + +int __txn_xa_regop_old_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_xa_regop_old_read __P((DB_ENV *, void *, __txn_xa_regop_old_args **)); + +#define DB_txn_xa_regop 13 +typedef struct _txn_xa_regop_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + DBT xid; + int32_t formatID; + u_int32_t gtrid; + u_int32_t bqual; + DB_LSN begin_lsn; +} __txn_xa_regop_args; + +int __txn_xa_regop_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, const DBT *, int32_t, u_int32_t, u_int32_t, DB_LSN *)); +int __txn_xa_regop_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_xa_regop_read __P((DB_ENV *, void *, __txn_xa_regop_args **)); + +#define DB_txn_child_old 9 +typedef struct _txn_child_old_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + u_int32_t parent; +} __txn_child_old_args; + +int __txn_child_old_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_child_old_read __P((DB_ENV *, void *, __txn_child_old_args **)); + +#define DB_txn_child 12 +typedef struct _txn_child_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t child; + DB_LSN c_lsn; +} __txn_child_args; + +int __txn_child_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, DB_LSN *)); +int __txn_child_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_child_read __P((DB_ENV *, void *, __txn_child_args **)); +int __txn_init_print __P((DB_ENV *)); +int __txn_init_recover __P((DB_ENV *)); +#endif diff --git a/bdb/include/txn_ext.h b/bdb/include/txn_ext.h new file mode 100644 index 00000000000..ee6922d701c --- /dev/null +++ b/bdb/include/txn_ext.h @@ -0,0 +1,24 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _txn_ext_h_ +#define _txn_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +int __txn_xa_begin __P((DB_ENV *, DB_TXN *)); +int __txn_end __P((DB_TXN *, int)); +int __txn_activekids __P((DB_ENV *, u_int32_t, DB_TXN *)); +int __txn_regop_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_xa_regop_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_ckp_recover +__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __txn_child_recover + __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +void __txn_dbenv_create __P((DB_ENV *)); +int __txn_open __P((DB_ENV *)); +int __txn_close __P((DB_ENV *)); +#if defined(__cplusplus) +} +#endif +#endif /* _txn_ext_h_ */ diff --git a/bdb/include/xa.h b/bdb/include/xa.h new file mode 100644 index 00000000000..ce46179263a --- /dev/null +++ b/bdb/include/xa.h @@ -0,0 +1,179 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998, 1999, 2000 + * Sleepycat Software. All rights reserved. + * + * $Id: xa.h,v 11.3 2000/02/14 02:59:55 bostic Exp $ + */ +/* + * Start of xa.h header + * + * Define a symbol to prevent multiple inclusions of this header file + */ +#ifndef XA_H +#define XA_H + +/* + * Transaction branch identification: XID and NULLXID: + */ +#define XIDDATASIZE 128 /* size in bytes */ +#define MAXGTRIDSIZE 64 /* maximum size in bytes of gtrid */ +#define MAXBQUALSIZE 64 /* maximum size in bytes of bqual */ + +struct xid_t { + long formatID; /* format identifier */ + long gtrid_length; /* value from 1 through 64 */ + long bqual_length; /* value from 1 through 64 */ + char data[XIDDATASIZE]; +}; +typedef struct xid_t XID; +/* + * A value of -1 in formatID means that the XID is null. + */ + +/* + * Declarations of routines by which RMs call TMs: + */ +extern int ax_reg __P((int, XID *, long)); +extern int ax_unreg __P((int, long)); + +/* + * XA Switch Data Structure + */ +#define RMNAMESZ 32 /* length of resource manager name, */ + /* including the null terminator */ +#define MAXINFOSIZE 256 /* maximum size in bytes of xa_info */ + /* strings, including the null + terminator */ +struct xa_switch_t { + char name[RMNAMESZ]; /* name of resource manager */ + long flags; /* resource manager specific options */ + long version; /* must be 0 */ + int (*xa_open_entry) /* xa_open function pointer */ + __P((char *, int, long)); + int (*xa_close_entry) /* xa_close function pointer */ + __P((char *, int, long)); + int (*xa_start_entry) /* xa_start function pointer */ + __P((XID *, int, long)); + int (*xa_end_entry) /* xa_end function pointer */ + __P((XID *, int, long)); + int (*xa_rollback_entry) /* xa_rollback function pointer */ + __P((XID *, int, long)); + int (*xa_prepare_entry) /* xa_prepare function pointer */ + __P((XID *, int, long)); + int (*xa_commit_entry) /* xa_commit function pointer */ + __P((XID *, int, long)); + int (*xa_recover_entry) /* xa_recover function pointer */ + __P((XID *, long, int, long)); + int (*xa_forget_entry) /* xa_forget function pointer */ + __P((XID *, int, long)); + int (*xa_complete_entry) /* xa_complete function pointer */ + __P((int *, int *, int, long)); +}; + +/* + * Flag definitions for the RM switch + */ +#define TMNOFLAGS 0x00000000L /* no resource manager features + selected */ +#define TMREGISTER 0x00000001L /* resource manager dynamically + registers */ +#define TMNOMIGRATE 0x00000002L /* resource manager does not support + association migration */ +#define TMUSEASYNC 0x00000004L /* resource manager supports + asynchronous operations */ +/* + * Flag definitions for xa_ and ax_ routines + */ +/* use TMNOFLAGGS, defined above, when not specifying other flags */ +#define TMASYNC 0x80000000L /* perform routine asynchronously */ +#define TMONEPHASE 0x40000000L /* caller is using one-phase commit + optimisation */ +#define TMFAIL 0x20000000L /* dissociates caller and marks + transaction branch rollback-only */ +#define TMNOWAIT 0x10000000L /* return if blocking condition + exists */ +#define TMRESUME 0x08000000L /* caller is resuming association with + suspended transaction branch */ +#define TMSUCCESS 0x04000000L /* dissociate caller from transaction + branch */ +#define TMSUSPEND 0x02000000L /* caller is suspending, not ending, + association */ +#define TMSTARTRSCAN 0x01000000L /* start a recovery scan */ +#define TMENDRSCAN 0x00800000L /* end a recovery scan */ +#define TMMULTIPLE 0x00400000L /* wait for any asynchronous + operation */ +#define TMJOIN 0x00200000L /* caller is joining existing + transaction branch */ +#define TMMIGRATE 0x00100000L /* caller intends to perform + migration */ + +/* + * ax_() return codes (transaction manager reports to resource manager) + */ +#define TM_JOIN 2 /* caller is joining existing + transaction branch */ +#define TM_RESUME 1 /* caller is resuming association with + suspended transaction branch */ +#define TM_OK 0 /* normal execution */ +#define TMER_TMERR -1 /* an error occurred in the transaction + manager */ +#define TMER_INVAL -2 /* invalid arguments were given */ +#define TMER_PROTO -3 /* routine invoked in an improper + context */ + +/* + * xa_() return codes (resource manager reports to transaction manager) + */ +#define XA_RBBASE 100 /* The inclusive lower bound of the + rollback codes */ +#define XA_RBROLLBACK XA_RBBASE /* The rollback was caused by an + unspecified reason */ +#define XA_RBCOMMFAIL XA_RBBASE+1 /* The rollback was caused by a + communication failure */ +#define XA_RBDEADLOCK XA_RBBASE+2 /* A deadlock was detected */ +#define XA_RBINTEGRITY XA_RBBASE+3 /* A condition that violates the + integrity of the resources was + detected */ +#define XA_RBOTHER XA_RBBASE+4 /* The resource manager rolled back the + transaction branch for a reason not + on this list */ +#define XA_RBPROTO XA_RBBASE+5 /* A protocol error occurred in the + resource manager */ +#define XA_RBTIMEOUT XA_RBBASE+6 /* A transaction branch took too long */ +#define XA_RBTRANSIENT XA_RBBASE+7 /* May retry the transaction branch */ +#define XA_RBEND XA_RBTRANSIENT /* The inclusive upper bound of the + rollback codes */ +#define XA_NOMIGRATE 9 /* resumption must occur where + suspension occurred */ +#define XA_HEURHAZ 8 /* the transaction branch may have + been heuristically completed */ +#define XA_HEURCOM 7 /* the transaction branch has been + heuristically committed */ +#define XA_HEURRB 6 /* the transaction branch has been + heuristically rolled back */ +#define XA_HEURMIX 5 /* the transaction branch has been + heuristically committed and rolled + back */ +#define XA_RETRY 4 /* routine returned with no effect and + may be re-issued */ +#define XA_RDONLY 3 /* the transaction branch was read-only + and has been committed */ +#define XA_OK 0 /* normal execution */ +#define XAER_ASYNC -2 /* asynchronous operation already + outstanding */ +#define XAER_RMERR -3 /* a resource manager error occurred in + the transaction branch */ +#define XAER_NOTA -4 /* the XID is not valid */ +#define XAER_INVAL -5 /* invalid arguments were given */ +#define XAER_PROTO -6 /* routine invoked in an improper + context */ +#define XAER_RMFAIL -7 /* resource manager unavailable */ +#define XAER_DUPID -8 /* the XID already exists */ +#define XAER_OUTSIDE -9 /* resource manager doing work outside + transaction */ +#endif /* ifndef XA_H */ +/* + * End of xa.h header + */ diff --git a/bdb/include/xa_ext.h b/bdb/include/xa_ext.h new file mode 100644 index 00000000000..cc16ba18337 --- /dev/null +++ b/bdb/include/xa_ext.h @@ -0,0 +1,17 @@ +/* DO NOT EDIT: automatically built by dist/s_include. */ +#ifndef _xa_ext_h_ +#define _xa_ext_h_ +#if defined(__cplusplus) +extern "C" { +#endif +int __db_xa_create __P((DB *)); +int __db_rmid_to_env __P((int rmid, DB_ENV **envp)); +int __db_xid_to_txn __P((DB_ENV *, XID *, size_t *)); +int __db_map_rmid __P((int, DB_ENV *)); +int __db_unmap_rmid __P((int)); +int __db_map_xid __P((DB_ENV *, XID *, size_t)); +void __db_unmap_xid __P((DB_ENV *, XID *, size_t)); +#if defined(__cplusplus) +} +#endif +#endif /* _xa_ext_h_ */ |