summaryrefslogtreecommitdiff
path: root/db2/hash/hash_dup.c
diff options
context:
space:
mode:
Diffstat (limited to 'db2/hash/hash_dup.c')
-rw-r--r--db2/hash/hash_dup.c544
1 files changed, 544 insertions, 0 deletions
diff --git a/db2/hash/hash_dup.c b/db2/hash/hash_dup.c
new file mode 100644
index 0000000000..059eec6f92
--- /dev/null
+++ b/db2/hash/hash_dup.c
@@ -0,0 +1,544 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash_dup.c 10.5 (Sleepycat) 7/27/97";
+#endif /* not lint */
+
+/*
+ * PACKAGE: hashing
+ *
+ * DESCRIPTION:
+ * Manipulation of duplicates for the hash package.
+ *
+ * ROUTINES:
+ *
+ * External
+ * __add_dup
+ * Internal
+ */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_swap.h"
+#include "hash.h"
+
+static int __ham_check_move __P((HTAB *, HASH_CURSOR *, int32_t));
+static int __ham_dup_convert __P((HTAB *, HASH_CURSOR *));
+static int __ham_make_dup __P((const DBT *, DBT *d, void **, u_int32_t *));
+
+/*
+ * Called from hash_access to add a duplicate key. nval is the new
+ * value that we want to add. The flags correspond to the flag values
+ * to cursor_put indicating where to add the new element.
+ * There are 4 cases.
+ * Case 1: The existing duplicate set already resides on a separate page.
+ * We can use common code for this.
+ * Case 2: The element is small enough to just be added to the existing set.
+ * Case 3: The element is large enough to be a big item, so we're going to
+ * have to push the set onto a new page.
+ * Case 4: The element is large enough to push the duplicate set onto a
+ * separate page.
+ *
+ * PUBLIC: int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, int));
+ */
+int
+__ham_add_dup(hashp, hcp, nval, flags)
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ DBT *nval;
+ int flags;
+{
+ DBT pval, tmp_val;
+ HKEYDATA *hk;
+ u_int32_t del_len, new_size;
+ int ret;
+
+ if (flags == DB_CURRENT && hcp->dpgno == PGNO_INVALID)
+ del_len = hcp->dup_len;
+ else
+ del_len = 0;
+
+ if ((ret = __ham_check_move(hashp, hcp,
+ (int32_t)DUP_SIZE(nval->size) - (int32_t)del_len)) != 0)
+ return (ret);
+
+ /*
+ * Check if resulting duplicate set is going to need to go
+ * onto a separate duplicate page. If so, convert the
+ * duplicate set and add the new one. After conversion,
+ * hcp->dndx is the first free ndx or the index of the
+ * current pointer into the duplicate set.
+ */
+ hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
+ new_size = DUP_SIZE(nval->size) - del_len + LEN_HKEYDATA(hcp->pagep,
+ hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx));
+
+ /*
+ * We convert to off-page duplicates if the item is a big item,
+ * the addition of the new item will make the set large, or
+ * if there isn't enough room on this page to add the next item.
+ */
+ if (hk->type != H_OFFDUP &&
+ (hk->type == H_OFFPAGE || ISBIG(hashp, new_size) ||
+ DUP_SIZE(nval->size) - del_len > P_FREESPACE(hcp->pagep))) {
+
+ if ((ret = __ham_dup_convert(hashp, hcp)) != 0)
+ return (ret);
+ else
+ hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
+ }
+
+ /* There are two separate cases here: on page and off page. */
+ if (hk->type != H_OFFDUP) {
+ if (hk->type != H_DUPLICATE) {
+ hk->type = H_DUPLICATE;
+ pval.flags = 0;
+ pval.data = hk->data;
+ pval.size = LEN_HDATA(hcp->pagep, hashp->hdr->pagesize,
+ hcp->bndx);
+ if ((ret = __ham_make_dup(&pval, &tmp_val, &hcp->big_data,
+ &hcp->big_datalen)) != 0 ||
+ (ret = __ham_replpair(hashp, hcp, &tmp_val, 1)) != 0)
+ return (ret);
+ }
+
+ /* Now make the new entry a duplicate. */
+ if ((ret = __ham_make_dup(nval,
+ &tmp_val, &hcp->big_data, &hcp->big_datalen)) != 0)
+ return (ret);
+
+ tmp_val.dlen = 0;
+ switch (flags) { /* On page. */
+ case DB_KEYFIRST:
+ tmp_val.doff = 0;
+ break;
+ case DB_KEYLAST:
+ tmp_val.doff = LEN_HDATA(hcp->pagep,
+ hashp->hdr->pagesize, hcp->bndx);
+ break;
+ case DB_CURRENT:
+ tmp_val.doff = hcp->dup_off;
+ tmp_val.dlen = DUP_SIZE(hcp->dup_len);
+ break;
+ case DB_BEFORE:
+ tmp_val.doff = hcp->dup_off;
+ break;
+ case DB_AFTER:
+ tmp_val.doff = hcp->dup_off + DUP_SIZE(hcp->dup_len);
+ break;
+ }
+ /* Add the duplicate. */
+ ret = __ham_replpair(hashp, hcp, &tmp_val, 0);
+ if (ret == 0)
+ ret = __ham_dirty_page(hashp, hcp->pagep);
+ __ham_c_update(hashp, hcp, hcp->pgno, tmp_val.size, 1, 1);
+ return (ret);
+ }
+
+ /* If we get here, then we're on duplicate pages. */
+ if (hcp->dpgno == PGNO_INVALID) {
+ memcpy(&hcp->dpgno,
+ (u_int8_t *)hk + SSZ(HOFFDUP, pgno), sizeof(db_pgno_t));
+ hcp->dndx = 0;
+ }
+
+ switch (flags) {
+ case DB_KEYFIRST:
+ /*
+ * The only way that we are already on a dup page is
+ * if we just converted the on-page representation.
+ * In that case, we've only got one page of duplicates.
+ */
+ if (hcp->dpagep == NULL && (ret =
+ __db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
+ return (ret);
+ hcp->dndx = 0;
+ break;
+ case DB_KEYLAST:
+ if (hcp->dpagep == NULL && (ret =
+ __db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
+ return (ret);
+ hcp->dpgno = PGNO(hcp->dpagep);
+ hcp->dndx = NUM_ENT(hcp->dpagep);
+ break;
+ case DB_CURRENT:
+ if ((ret = __db_ditem(hashp->dbp, hcp->dpagep, hcp->dndx,
+ BKEYDATA_SIZE(GET_BKEYDATA(hcp->dpagep, hcp->dndx)->len)))
+ != 0)
+ return (ret);
+ break;
+ case DB_BEFORE: /* The default behavior is correct. */
+ break;
+ case DB_AFTER:
+ hcp->dndx++;
+ break;
+ }
+
+ ret = __db_dput(hashp->dbp,
+ nval, &hcp->dpagep, &hcp->dndx, __ham_overflow_page);
+ hcp->pgno = PGNO(hcp->pagep);
+ __ham_c_update(hashp, hcp, hcp->pgno, nval->size, 1, 1);
+ return (ret);
+}
+
+/*
+ * Convert an on-page set of duplicates to an offpage set of duplicates.
+ */
+static int
+__ham_dup_convert(hashp, hcp)
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+{
+ BOVERFLOW bo;
+ DBT dbt;
+ HOFFPAGE ho;
+ db_indx_t dndx, len;
+ int ret;
+ u_int8_t *p, *pend;
+
+ /*
+ * Create a new page for the duplicates.
+ */
+ if ((ret =
+ __ham_overflow_page(hashp->dbp, P_DUPLICATE, &hcp->dpagep)) != 0)
+ return (ret);
+ hcp->dpagep->type = P_DUPLICATE;
+ hcp->dpgno = PGNO(hcp->dpagep);
+
+ /*
+ * Now put the duplicates onto the new page.
+ */
+ dbt.flags = 0;
+ switch (((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->type) {
+ case H_KEYDATA:
+ /* Simple case, one key on page; move it to dup page. */
+ dndx = 0;
+ dbt.size =
+ LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx);
+ dbt.data =
+ ((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->data;
+ ret = __db_pitem(hashp->dbp, hcp->dpagep,
+ (u_int32_t)dndx, BKEYDATA_SIZE(dbt.size), NULL, &dbt);
+ if (ret == 0)
+ __ham_dirty_page(hashp, hcp->dpagep);
+ break;
+ case H_OFFPAGE:
+ /* Simple case, one key on page; move it to dup page. */
+ dndx = 0;
+ memcpy(&ho,
+ P_ENTRY(hcp->pagep, H_DATAINDEX(hcp->bndx)), HOFFPAGE_SIZE);
+ bo.deleted = 0;
+ bo.type = ho.type;
+ bo.pgno = ho.pgno;
+ bo.tlen = ho.tlen;
+ dbt.size = BOVERFLOW_SIZE;
+ dbt.data = &bo;
+
+ ret = __db_pitem(hashp->dbp, hcp->dpagep,
+ (u_int32_t)dndx, dbt.size, &dbt, NULL);
+ if (ret == 0)
+ __ham_dirty_page(hashp, hcp->dpagep);
+ break;
+ case H_DUPLICATE:
+ p = ((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->data;
+ pend = p +
+ LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx);
+
+ for (dndx = 0; p < pend; dndx++) {
+ memcpy(&len, p, sizeof(db_indx_t));
+ dbt.size = len;
+ p += sizeof(db_indx_t);
+ dbt.data = p;
+ p += len + sizeof(db_indx_t);
+ ret = __db_dput(hashp->dbp, &dbt,
+ &hcp->dpagep, &dndx, __ham_overflow_page);
+ if (ret != 0)
+ break;
+ }
+ break;
+ default:
+ ret = __db_pgfmt(hashp->dbp, (u_long)hcp->pgno);
+ }
+ if (ret == 0) {
+ /*
+ * Now attach this to the source page in place of
+ * the old duplicate item.
+ */
+ __ham_move_offpage(hashp, hcp->pagep,
+ (u_int32_t)H_DATAINDEX(hcp->bndx), hcp->dpgno);
+
+ /* Can probably just do a "put" here. */
+ ret = __ham_dirty_page(hashp, hcp->pagep);
+ } else {
+ (void)__ham_del_page(hashp->dbp, hcp->dpagep);
+ hcp->dpagep = NULL;
+ }
+ return (ret);
+}
+
+static int
+__ham_make_dup(notdup, dup, bufp, sizep)
+ const DBT *notdup;
+ DBT *dup;
+ void **bufp;
+ u_int32_t *sizep;
+{
+ db_indx_t tsize, item_size;
+ int ret;
+ u_int8_t *p;
+
+ item_size = (db_indx_t)notdup->size;
+ tsize = DUP_SIZE(item_size);
+ if ((ret = __ham_init_dbt(dup, tsize, bufp, sizep)) != 0)
+ return (ret);
+
+ dup->dlen = 0;
+ dup->flags = notdup->flags;
+ F_SET(dup, DB_DBT_PARTIAL);
+
+ p = dup->data;
+ memcpy(p, &item_size, sizeof(db_indx_t));
+ p += sizeof(db_indx_t);
+ memcpy(p, notdup->data, notdup->size);
+ p += notdup->size;
+ memcpy(p, &item_size, sizeof(db_indx_t));
+
+ dup->doff = 0;
+ dup->dlen = notdup->size;
+
+ return (0);
+}
+
+static int
+__ham_check_move(hashp, hcp, add_len)
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ int32_t add_len;
+{
+ DBT k, d;
+ DB_LSN new_lsn;
+ HKEYDATA *hk;
+ PAGE *next_pagep;
+ db_pgno_t next_pgno;
+ int rectype, ret;
+ u_int32_t new_datalen, old_len;
+
+ /*
+ * Check if we can do whatever we need to on this page. If not,
+ * then we'll have to move the current element to a new page.
+ */
+
+ hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
+
+ /*
+ * If the item is already off page duplicates or an offpage item,
+ * then we know we can do whatever we need to do in-place
+ */
+ if (hk->type == H_OFFDUP || hk->type == H_OFFPAGE)
+ return (0);
+
+ old_len =
+ LEN_HITEM(hcp->pagep, hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx));
+ new_datalen = old_len - HKEYDATA_SIZE(0) + add_len;
+
+ /*
+ * We need to add a new page under two conditions:
+ * 1. The addition makes the total data length cross the BIG
+ * threshold and the OFFDUP structure won't fit on this page.
+ * 2. The addition does not make the total data cross the
+ * threshold, but the new data won't fit on the page.
+ * If neither of these is true, then we can return.
+ */
+ if (ISBIG(hashp, new_datalen) && (old_len > HOFFDUP_SIZE ||
+ HOFFDUP_SIZE - old_len <= P_FREESPACE(hcp->pagep)))
+ return (0);
+
+ if (!ISBIG(hashp, new_datalen) &&
+ add_len <= (int32_t)P_FREESPACE(hcp->pagep))
+ return (0);
+
+ /*
+ * If we get here, then we need to move the item to a new page.
+ * Check if there are more pages in the chain.
+ */
+
+ new_datalen = ISBIG(hashp, new_datalen) ?
+ HOFFDUP_SIZE : HKEYDATA_SIZE(new_datalen);
+
+ next_pagep = NULL;
+ for (next_pgno = NEXT_PGNO(hcp->pagep); next_pgno != PGNO_INVALID;
+ next_pgno = NEXT_PGNO(next_pagep)) {
+ if (next_pagep != NULL &&
+ (ret = __ham_put_page(hashp->dbp, next_pagep, 0)) != 0)
+ return (ret);
+
+ if ((ret = __ham_get_page(hashp->dbp, next_pgno, &next_pagep)) != 0)
+ return (ret);
+
+ if (P_FREESPACE(next_pagep) >= new_datalen)
+ break;
+ }
+
+ /* No more pages, add one. */
+ if (next_pagep == NULL &&
+ (ret = __ham_add_ovflpage(hashp, hcp->pagep, 0, &next_pagep)) != 0)
+ return (ret);
+
+ /* Add new page at the end of the chain. */
+ if (P_FREESPACE(next_pagep) < new_datalen &&
+ (ret = __ham_add_ovflpage(hashp, next_pagep, 1, &next_pagep)) != 0)
+ return (ret);
+
+ /* Copy the item to the new page. */
+ if (DB_LOGGING(hashp->dbp)) {
+ rectype = PUTPAIR;
+ k.flags = 0;
+ d.flags = 0;
+ if (H_PAIRKEY(hcp->pagep, hcp->bndx)->type == H_OFFPAGE) {
+ rectype |= PAIR_KEYMASK;
+ k.data = H_PAIRKEY(hcp->pagep, hcp->bndx);
+ k.size = HOFFPAGE_SIZE;
+ } else {
+ k.data = H_PAIRKEY(hcp->pagep, hcp->bndx)->data;
+ k.size = LEN_HKEY(hcp->pagep,
+ hashp->hdr->pagesize, hcp->bndx);
+ }
+
+ if (hk->type == H_OFFPAGE) {
+ rectype |= PAIR_DATAMASK;
+ d.data = H_PAIRDATA(hcp->pagep, hcp->bndx);
+ d.size = HOFFPAGE_SIZE;
+ } else {
+ d.data = H_PAIRDATA(hcp->pagep, hcp->bndx)->data;
+ d.size = LEN_HDATA(hcp->pagep,
+ hashp->hdr->pagesize, hcp->bndx);
+ }
+
+
+ if ((ret = __ham_insdel_log(hashp->dbp->dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, rectype,
+ hashp->dbp->log_fileid, PGNO(next_pagep),
+ (u_int32_t)H_NUMPAIRS(next_pagep), &LSN(next_pagep),
+ &k, &d)) != 0)
+ return (ret);
+
+ /* Move lsn onto page. */
+ LSN(next_pagep) = new_lsn; /* Structure assignment. */
+ }
+
+ __ham_copy_item(hashp, hcp->pagep, H_KEYINDEX(hcp->bndx), next_pagep);
+ __ham_copy_item(hashp, hcp->pagep, H_DATAINDEX(hcp->bndx), next_pagep);
+
+ /* Now delete the pair from the current page. */
+ ret = __ham_del_pair(hashp, hcp);
+
+ (void)__ham_put_page(hashp->dbp, hcp->pagep, 1);
+ hcp->pagep = next_pagep;
+ hcp->pgno = PGNO(hcp->pagep);
+ hcp->bndx = H_NUMPAIRS(hcp->pagep) - 1;
+ F_SET(hcp, H_EXPAND);
+ return (ret);
+}
+
+/*
+ * Replace an onpage set of duplicates with the OFFDUP structure that
+ * references the duplicate page.
+ * XXX This is really just a special case of __onpage_replace; we should
+ * probably combine them.
+ * PUBLIC: void __ham_move_offpage __P((HTAB *, PAGE *, u_int32_t, db_pgno_t));
+ */
+void
+__ham_move_offpage(hashp, pagep, ndx, pgno)
+ HTAB *hashp;
+ PAGE *pagep;
+ u_int32_t ndx;
+ db_pgno_t pgno;
+{
+ DBT new_dbt;
+ DBT old_dbt;
+ HOFFDUP od;
+ db_indx_t i;
+ int32_t shrink;
+ u_int8_t *src;
+
+ od.type = H_OFFDUP;
+ od.pgno = pgno;
+
+ if (DB_LOGGING(hashp->dbp)) {
+ new_dbt.data = &od;
+ new_dbt.size = HOFFDUP_SIZE;
+ old_dbt.data = P_ENTRY(pagep, ndx);
+ old_dbt.size = LEN_HITEM(pagep, hashp->hdr->pagesize, ndx);
+ (void)__ham_replace_log(hashp->dbp->dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &LSN(pagep), 0,
+ hashp->dbp->log_fileid, PGNO(pagep), (u_int32_t)ndx,
+ &LSN(pagep), -1, &old_dbt, &new_dbt, 0);
+ }
+
+ shrink =
+ LEN_HITEM(pagep, hashp->hdr->pagesize, ndx) - HOFFDUP_SIZE;
+
+ if (shrink != 0) {
+ /* Copy data. */
+ src = (u_int8_t *)(pagep) + HOFFSET(pagep);
+ memmove(src + shrink, src, pagep->inp[ndx] - HOFFSET(pagep));
+ HOFFSET(pagep) += shrink;
+
+ /* Update index table. */
+ for (i = ndx; i < NUM_ENT(pagep); i++)
+ pagep->inp[i] += shrink;
+ }
+
+ /* Now copy the offdup entry onto the page. */
+ memcpy(P_ENTRY(pagep, ndx), &od, HOFFDUP_SIZE);
+}