summaryrefslogtreecommitdiff
path: root/src/hash
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@baserock.org>2015-02-17 17:25:57 +0000
committer <>2015-03-17 16:26:24 +0000
commit780b92ada9afcf1d58085a83a0b9e6bc982203d1 (patch)
tree598f8b9fa431b228d29897e798de4ac0c1d3d970 /src/hash
parent7a2660ba9cc2dc03a69ddfcfd95369395cc87444 (diff)
downloadberkeleydb-master.tar.gz
Imported from /home/lorry/working-area/delta_berkeleydb/db-6.1.23.tar.gz.HEADdb-6.1.23master
Diffstat (limited to 'src/hash')
-rw-r--r--src/hash/hash.c178
-rw-r--r--src/hash/hash.src2
-rw-r--r--src/hash/hash_compact.c24
-rw-r--r--src/hash/hash_conv.c9
-rw-r--r--src/hash/hash_dup.c7
-rw-r--r--src/hash/hash_func.c2
-rw-r--r--src/hash/hash_meta.c2
-rw-r--r--src/hash/hash_method.c12
-rw-r--r--src/hash/hash_open.c44
-rw-r--r--src/hash/hash_page.c126
-rw-r--r--src/hash/hash_rec.c8
-rw-r--r--src/hash/hash_reclaim.c2
-rw-r--r--src/hash/hash_stat.c16
-rw-r--r--src/hash/hash_stub.c36
-rw-r--r--src/hash/hash_upgrade.c93
-rw-r--r--src/hash/hash_verify.c180
16 files changed, 633 insertions, 108 deletions
diff --git a/src/hash/hash.c b/src/hash/hash.c
index ae5736e7..5bff1dee 100644
--- a/src/hash/hash.c
+++ b/src/hash/hash.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994
@@ -298,6 +298,7 @@ __hamc_count(dbc, recnop)
}
switch (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))) {
+ case H_BLOB:
case H_KEYDATA:
case H_OFFPAGE:
recno = 1;
@@ -379,7 +380,7 @@ __hamc_del(dbc, flags)
hcp = (HASH_CURSOR *)dbc->internal;
if (F_ISSET(hcp, H_DELETED))
- return (DB_NOTFOUND);
+ return (DBC_ERR(dbc, DB_NOTFOUND));
if ((ret = __ham_get_meta(dbc)) != 0)
goto out;
@@ -535,7 +536,7 @@ next: ret = __ham_item_next(dbc, lock_type, pgnop);
case DB_CURRENT:
/* cgetchk has already determined that the cursor is set. */
if (F_ISSET(hcp, H_DELETED)) {
- ret = DB_KEYEMPTY;
+ ret = DBC_ERR(dbc, DB_KEYEMPTY);
goto err;
}
@@ -554,7 +555,8 @@ next: ret = __ham_item_next(dbc, lock_type, pgnop);
if (ret != 0 && ret != DB_NOTFOUND)
goto err;
else if (F_ISSET(hcp, H_OK)) {
- if (*pgnop == PGNO_INVALID)
+ if (*pgnop == PGNO_INVALID && HPAGE_PTYPE(
+ H_PAIRDATA(dbp, hcp->page, hcp->indx)) != H_BLOB)
ret = __ham_dup_return(dbc, data, flags);
break;
} else if (!F_ISSET(hcp, H_NOMORE)) {
@@ -576,7 +578,7 @@ next: ret = __ham_item_next(dbc, lock_type, pgnop);
dbc->thread_info, hcp->page, dbc->priority);
hcp->page = NULL;
if (hcp->bucket == 0) {
- ret = DB_NOTFOUND;
+ ret = DBC_ERR(dbc, DB_NOTFOUND);
hcp->pgno = PGNO_INVALID;
goto err;
}
@@ -598,7 +600,7 @@ next: ret = __ham_item_next(dbc, lock_type, pgnop);
F_CLR(hcp, H_ISDUP);
hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
if (hcp->bucket > hcp->hdr->max_bucket) {
- ret = DB_NOTFOUND;
+ ret = DBC_ERR(dbc, DB_NOTFOUND);
hcp->pgno = PGNO_INVALID;
goto err;
}
@@ -612,7 +614,7 @@ next: ret = __ham_item_next(dbc, lock_type, pgnop);
case DB_SET:
case DB_SET_RANGE:
/* Key not found. */
- ret = DB_NOTFOUND;
+ ret = DBC_ERR(dbc, DB_NOTFOUND);
goto err;
case DB_CURRENT:
/*
@@ -621,7 +623,7 @@ next: ret = __ham_item_next(dbc, lock_type, pgnop);
* locking. We return the same error code as we would
* if the cursor were deleted.
*/
- ret = DB_KEYEMPTY;
+ ret = DBC_ERR(dbc, DB_KEYEMPTY);
goto err;
default:
DB_ASSERT(env, 0);
@@ -649,11 +651,14 @@ __ham_bulk(dbc, data, flags)
DB *dbp;
DB_MPOOLFILE *mpf;
HASH_CURSOR *cp;
+ HBLOB hblob;
PAGE *pg;
db_indx_t dup_len, dup_off, dup_tlen, indx, *inp;
db_lockmode_t lock_mode;
db_pgno_t pgno;
+ off_t blob_size;
int32_t *endp, *offp, *saveoff;
+ db_seq_t blob_id;
u_int32_t key_off, key_size, pagesize, size, space;
u_int8_t *dbuf, *dp, *hk, *np, *tmp;
int is_dup, is_key;
@@ -708,6 +713,10 @@ next_pg:
space -= key_size;
key_off = (u_int32_t)(np - dbuf);
np += key_size;
+ } else if (HPAGE_PTYPE(hk) == H_BLOB) {
+ __db_errx(dbp->env, DB_STR("1185",
+ "Blob item key."));
+ (void)__env_panic(dbp->env, DB_RUNRECOVERY);
} else {
if (need_pg) {
dp = np;
@@ -982,6 +991,38 @@ get_space:
np += size;
space -= size;
break;
+ case H_BLOB:
+ space -= (is_key ? 4 : 2) * sizeof(*offp);
+ if (space > data->ulen)
+ goto back_up;
+
+ memcpy(&hblob, hk, HBLOB_SIZE);
+ blob_id = (db_seq_t)hblob.id;
+ GET_BLOB_SIZE(dbc->env, hblob, blob_size, ret);
+ if (ret != 0)
+ return (ret);
+ if (blob_size > UINT32_MAX) {
+ size = UINT32_MAX;
+ goto back_up;
+ }
+ size = (u_int32_t)blob_size;
+ if (size > space)
+ goto back_up;
+
+ if ((ret = __blob_bulk(dbc, size, blob_id, np)) != 0)
+ return (ret);
+
+ if (is_key) {
+ *offp-- = (int32_t)key_off;
+ *offp-- = (int32_t)key_size;
+ }
+
+ *offp-- = (int32_t)(np - dbuf);
+ *offp-- = (int32_t)size;
+
+ np += size;
+ space -= size;
+ break;
default:
/* Do nothing. */
break;
@@ -1014,7 +1055,7 @@ get_space:
* DBC->get(DB_NEXT) will return DB_NOTFOUND.
*/
cp->bucket--;
- ret = DB_NOTFOUND;
+ ret = DBC_ERR(dbc, DB_NOTFOUND);
} else {
/*
* Start on the next bucket.
@@ -1071,7 +1112,7 @@ __hamc_put(dbc, key, data, flags, pgnop)
if (F_ISSET(hcp, H_DELETED) && flags != DB_KEYFIRST &&
flags != DB_KEYLAST && flags != DB_OVERWRITE_DUP)
- return (DB_NOTFOUND);
+ return (DBC_ERR(dbc, DB_NOTFOUND));
if ((ret = __ham_get_meta(dbc)) != 0)
goto err1;
@@ -1083,9 +1124,15 @@ __hamc_put(dbc, key, data, flags, pgnop)
case DB_NOOVERWRITE:
case DB_OVERWRITE_DUP:
nbytes = (ISBIG(hcp, key->size) ? HOFFPAGE_PSIZE :
- HKEYDATA_PSIZE(key->size)) +
- (ISBIG(hcp, data->size) ? HOFFPAGE_PSIZE :
- HKEYDATA_PSIZE(data->size));
+ HKEYDATA_PSIZE(key->size));
+ if (dbp->blob_threshold && (data->size >=
+ dbp->blob_threshold || F_ISSET(data, DB_DBT_BLOB)))
+ nbytes += HBLOB_PSIZE;
+ else if (ISBIG(hcp, data->size))
+ nbytes += HOFFPAGE_PSIZE;
+ else
+ nbytes += HKEYDATA_PSIZE(data->size);
+
if ((ret = __ham_lookup(dbc,
key, nbytes, DB_LOCK_WRITE, pgnop)) == DB_NOTFOUND) {
if (hcp->seek_found_page != PGNO_INVALID &&
@@ -1124,7 +1171,7 @@ __hamc_put(dbc, key, data, flags, pgnop)
} else if (ret == 0 && flags == DB_NOOVERWRITE &&
!F_ISSET(hcp, H_DELETED)) {
if (*pgnop == PGNO_INVALID)
- ret = DB_KEYEXIST;
+ ret = DBC_ERR(dbc, DB_KEYEXIST);
else
ret = __bam_opd_exists(dbc, *pgnop);
if (ret != 0)
@@ -1468,6 +1515,7 @@ __ham_dup_return(dbc, val, flags)
type = HPAGE_TYPE(dbp, hcp->page, ndx);
pp = hcp->page;
myval = val;
+ cmp = 0;
/*
* There are 4 cases:
@@ -1545,9 +1593,13 @@ __ham_dup_return(dbc, val, flags)
memcpy(&pgno,
HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
if ((ret = __db_moff(dbc, val, pgno, tlen,
- dbp->dup_compare, &cmp)) != 0)
+ dbp->dup_compare, &cmp, NULL)) != 0)
return (ret);
cmp = -cmp;
+ } else if (((HKEYDATA *)hk)->type == H_BLOB) {
+ __db_errx(dbp->env, DB_STR("1186",
+ "Error - found a blob file in a duplicate data set."));
+ (void)__env_panic(dbp->env, DB_RUNRECOVERY);
} else {
/*
* We do not zero tmp_val since the comparison
@@ -1557,8 +1609,8 @@ __ham_dup_return(dbc, val, flags)
tmp_val.size = LEN_HDATA(dbp, hcp->page,
dbp->pgsize, hcp->indx);
cmp = dbp->dup_compare == NULL ?
- __bam_defcmp(dbp, &tmp_val, val) :
- dbp->dup_compare(dbp, &tmp_val, val);
+ __bam_defcmp(dbp, &tmp_val, val, NULL) :
+ dbp->dup_compare(dbp, &tmp_val, val, NULL);
}
if (cmp > 0 && flags == DB_GET_BOTH_RANGE &&
@@ -1567,7 +1619,7 @@ __ham_dup_return(dbc, val, flags)
}
if (cmp != 0)
- return (DB_NOTFOUND);
+ return (DBC_ERR(dbc, DB_NOTFOUND));
}
/*
@@ -1654,17 +1706,21 @@ __ham_overwrite(dbc, nval, flags)
u_int32_t flags;
{
DB *dbp;
- DBT *myval, tmp_val, tmp_val2;
+ DBT *myval, tmp_val, tmp_val2, old_rec, new_rec;
ENV *env;
HASH_CURSOR *hcp;
+ HBLOB hblob;
void *newrec;
u_int8_t *hk, *p;
u_int32_t len, nondup_size;
+ db_seq_t blob_id, new_blob_id;
db_indx_t newsize;
+ off_t blob_size;
int ret;
dbp = dbc->dbp;
env = dbp->env;
+ ret = 0;
hcp = (HASH_CURSOR *)dbc->internal;
if (F_ISSET(hcp, H_ISDUP)) {
/*
@@ -1717,7 +1773,7 @@ __ham_overwrite(dbc, nval, flags)
NULL, nval, flags, NULL));
}
- if ((ret = __os_malloc(dbp->env,
+ if ((ret = __os_malloc(env,
DUP_SIZE(newsize), &newrec)) != 0)
return (ret);
memset(&tmp_val2, 0, sizeof(tmp_val2));
@@ -1765,7 +1821,7 @@ __ham_overwrite(dbc, nval, flags)
(u_int8_t *)newrec + sizeof(db_indx_t);
tmp_val2.size = newsize;
if (dbp->dup_compare(
- dbp, &tmp_val, &tmp_val2) != 0) {
+ dbp, &tmp_val, &tmp_val2, NULL) != 0) {
__os_free(env, newrec);
return (__db_duperr(dbp, flags));
}
@@ -1816,7 +1872,7 @@ __ham_overwrite(dbc, nval, flags)
sizeof(db_indx_t);
tmp_val2.size = hcp->dup_len;
if (dbp->dup_compare(
- dbp, nval, &tmp_val2) != 0) {
+ dbp, nval, &tmp_val2, NULL) != 0) {
__db_errx(env, DB_STR("1131",
"Existing data sorts differently from put data"));
return (EINVAL);
@@ -1848,16 +1904,84 @@ __ham_overwrite(dbc, nval, flags)
hcp->dup_len = (db_indx_t)nval->size;
}
myval = &tmp_val;
+ goto end;
+ }
+ hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
+ if (HPAGE_PTYPE(hk) == H_BLOB) {
+ memcpy(&hblob, hk, HBLOB_SIZE);
+ memset(&old_rec, 0, sizeof(DBT));
+ memset(&new_rec, 0, sizeof(DBT));
+ if (DBC_LOGGING(dbc)) {
+ new_rec.data = HKEYDATA_DATA(&hblob);
+ if ((ret = __os_malloc(
+ env, HBLOB_SIZE, &old_rec.data)) != 0)
+ return (ret);
+ memcpy(old_rec.data,
+ HKEYDATA_DATA(&hblob), HBLOB_DSIZE);
+ new_rec.size = old_rec.size = HBLOB_DSIZE;
+ }
+ /*
+ * Inserting a blob record instead of blob data, only
+ * used internally by the DB_STREAM api.
+ */
+ if (F_ISSET(nval, DB_DBT_BLOB_REC)) {
+ DB_ASSERT(env, nval->size == HBLOB_SIZE);
+ DB_ASSERT(env, HPAGE_PTYPE(nval->data) == H_BLOB);
+ memcpy(&hblob, nval->data, nval->size);
+ } else {
+ /*
+ * A blob file overwrite is simpler than other
+ * replace operations. It's simply a matter
+ * deleting the old blob file, and creating a
+ * new one. We may need to be careful of
+ * cursors when we have support for blob
+ * cursors.
+ * That means that we can skip the replpair
+ * call.
+ */
+ blob_id = (db_seq_t)hblob.id;
+ GET_BLOB_SIZE(env, hblob, blob_size, ret);
+ if (ret != 0)
+ return (ret);
+ if ((ret = __blob_repl(dbc,
+ nval, blob_id, &new_blob_id, &blob_size)) == 0) {
+ SET_BLOB_ID(&hblob, new_blob_id, HBLOB);
+ SET_BLOB_SIZE(&hblob, blob_size, HBLOB);
+ }
+ }
+ if (ret == 0) {
+ if (DBC_LOGGING(dbc)) {
+ if ((ret = __ham_replace_log(dbp,
+ dbc->txn, &LSN(hcp->page), 0,
+ PGNO(hcp->page),
+ (u_int32_t)H_DATAINDEX(hcp->indx),
+ &LSN(hcp->page), 0,
+ OP_SET(H_BLOB, hcp->page), &old_rec,
+ OP_SET(H_BLOB, hcp->page),
+ &new_rec)) != 0) {
+ memcpy(HKEYDATA_DATA(&hblob),
+ old_rec.data, HBLOB_DSIZE);
+ __os_free(env, old_rec.data);
+ return (ret);
+ }
+
+ } else
+ LSN_NOT_LOGGED(LSN(hcp->page));
+ }
+ /* Copy the updated blob data back to the page. */
+ memcpy(hk, &hblob, HBLOB_SIZE);
+ if (old_rec.data != NULL)
+ __os_free(env, old_rec.data);
+ return (ret);
} else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
/* Put/overwrite */
memcpy(&tmp_val, nval, sizeof(*nval));
F_SET(&tmp_val, DB_DBT_PARTIAL);
tmp_val.doff = 0;
- hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
- if (HPAGE_PTYPE(hk) == H_OFFPAGE)
+ if (HPAGE_PTYPE(hk) == H_OFFPAGE) {
memcpy(&tmp_val.dlen,
HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
- else
+ } else
tmp_val.dlen = LEN_HDATA(dbp, hcp->page,
hcp->hdr->dbmeta.pagesize, hcp->indx);
myval = &tmp_val;
@@ -1865,7 +1989,7 @@ __ham_overwrite(dbc, nval, flags)
/* Regular partial put */
myval = nval;
- return (__ham_replpair(dbc, myval,
+end: return (__ham_replpair(dbc, myval,
F_ISSET(hcp, H_ISDUP) ? H_DUPLICATE : H_KEYDATA));
}
@@ -1955,7 +2079,7 @@ __ham_lookup(dbc, key, sought, mode, pgnop)
return (ret);
}
F_SET(hcp, H_NOMORE);
- return (DB_NOTFOUND);
+ return (DBC_ERR(dbc, DB_NOTFOUND));
}
/*
diff --git a/src/hash/hash.src b/src/hash/hash.src
index e544c6f3..f56a9c5b 100644
--- a/src/hash/hash.src
+++ b/src/hash/hash.src
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
diff --git a/src/hash/hash_compact.c b/src/hash/hash_compact.c
index 83b5ffb1..79fb6004 100644
--- a/src/hash/hash_compact.c
+++ b/src/hash/hash_compact.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
* $Id$
*/
@@ -118,7 +118,8 @@ __ham_compact_int(dbc, start, stop, factor, c_data, donep, flags)
break;
origpgno = pgno;
if ((ret = __db_truncate_root(dbc, hcp->page,
- H_DATAINDEX(hcp->indx), &pgno, 0)) != 0)
+ H_DATAINDEX(hcp->indx),
+ &pgno, 0, &pgs_done)) != 0)
break;
if (pgno != origpgno) {
memcpy(HOFFDUP_PGNO(H_PAIRDATA(dbp,
@@ -247,7 +248,7 @@ __ham_compact_bucket(dbc, c_data, pgs_donep)
if (check_trunc && PREV_PGNO(pg) != PGNO_INVALID &&
PGNO(pg) > c_data->compact_truncate &&
(ret = __db_exchange_page(dbc, &pg,
- hcp->page, PGNO_INVALID, DB_EXCH_FREE)) != 0)
+ hcp->page, PGNO_INVALID, DB_EXCH_FREE, pgs_donep)) != 0)
break;
if (pgno != PGNO(pg))
(*pgs_donep)++;
@@ -400,8 +401,8 @@ __ham_truncate_overflow(dbc, indx, c_data, pgs_done)
if ((ret = __memp_dirty(dbp->mpf, &hcp->page,
dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0)
return (ret);
- if ((ret =
- __db_truncate_root(dbc, hcp->page, indx, &pgno, 0)) != 0)
+ if ((ret = __db_truncate_root(dbc,
+ hcp->page, indx, &pgno, 0, pgs_done)) != 0)
return (ret);
if (pgno != origpgno) {
memcpy(HOFFPAGE_PGNO(P_ENTRY(dbp, hcp->page, indx)),
@@ -410,7 +411,8 @@ __ham_truncate_overflow(dbc, indx, c_data, pgs_done)
c_data->compact_pages--;
}
}
- if ((ret = __db_truncate_overflow(dbc, pgno, NULL, c_data)) != 0)
+ if ((ret =
+ __db_truncate_overflow(dbc, pgno, NULL, c_data, pgs_done)) != 0)
return (ret);
return (0);
}
@@ -434,10 +436,11 @@ __ham_compact_hash(dbp, ip, txn, c_data)
HMETA *meta;
PAGE *oldpage;
db_pgno_t free_pgno, last_pgno, pgno, start_pgno;
- int flags, local_txn, ret, t_ret;
+ int flags, local_txn, pgs_done, ret, t_ret;
u_int32_t bucket, i, size;
local_txn = IS_DB_AUTO_COMMIT(dbp, txn);
+ pgs_done = 0;
oldpage = NULL;
dbc = NULL;
LOCK_INIT(lock);
@@ -506,8 +509,8 @@ __ham_compact_hash(dbp, ip, txn, c_data)
flags = 0;
else
flags = DB_EXCH_FREE;
- if ((ret = __db_exchange_page(dbc,
- &oldpage, NULL, free_pgno, flags)) != 0)
+ if ((ret = __db_exchange_page(dbc, &oldpage,
+ NULL, free_pgno, flags, &pgs_done)) != 0)
goto err;
} else if (pgno >= last_pgno) {
if ((ret = __db_free(dbc, oldpage, 0)) != 0)
@@ -526,7 +529,8 @@ __ham_compact_hash(dbp, ip, txn, c_data)
}
if (ret == 0 && F_ISSET(dbp, DB_AM_SUBDB) &&
PGNO(hcp->hdr) > c_data->compact_truncate)
- ret = __db_move_metadata(dbc, (DBMETA**)&hcp->hdr, c_data);
+ ret = __db_move_metadata(dbc, (DBMETA**)&hcp->hdr,
+ c_data, &pgs_done);
err: if (oldpage != NULL && (t_ret = __memp_fput(dbp->mpf,
dbc->thread_info, oldpage, dbc->priority)) != 0 && ret == 0)
diff --git a/src/hash/hash_conv.c b/src/hash/hash_conv.c
index fa084f2a..7a53a037 100644
--- a/src/hash/hash_conv.c
+++ b/src/hash/hash_conv.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -104,7 +104,12 @@ __ham_mswap(env, pg)
SWAP32(p); /* h_charkey */
for (i = 0; i < NCACHED; ++i)
SWAP32(p); /* spares */
- p += 59 * sizeof(u_int32_t); /* unused */
+ SWAP32(p); /* threshold */
+ SWAP32(p); /* file id lo */
+ SWAP32(p); /* file id hi */
+ SWAP32(p); /* sdb id lo */
+ SWAP32(p); /* sdb id hi */
+ p += 54 * sizeof(u_int32_t); /* unused */
SWAP32(p); /* crypto_magic */
return (0);
}
diff --git a/src/hash/hash_dup.c b/src/hash/hash_dup.c
index 879c33d7..523d7227 100644
--- a/src/hash/hash_dup.c
+++ b/src/hash/hash_dup.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994
@@ -368,6 +368,7 @@ finish: if (ret == 0) {
off += len + 2 * sizeof(db_indx_t);
}
break;
+ case H_BLOB:
default:
ret = __db_pgfmt(env, hcp->pgno);
break;
@@ -772,7 +773,7 @@ __ham_dsearch(dbc, dbt, offp, cmpp, flags)
DBT cur;
HASH_CURSOR *hcp;
db_indx_t i, len;
- int (*func) __P((DB *, const DBT *, const DBT *));
+ int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
u_int8_t *data;
dbp = dbc->dbp;
@@ -794,7 +795,7 @@ __ham_dsearch(dbc, dbt, offp, cmpp, flags)
* we're done. In the latter case, if permitting partial
* matches, it's not a failure.
*/
- *cmpp = func(dbp, dbt, &cur);
+ *cmpp = func(dbp, dbt, &cur, NULL);
if (*cmpp == 0)
break;
if (*cmpp < 0 && dbp->dup_compare != NULL) {
diff --git a/src/hash/hash_func.c b/src/hash/hash_func.c
index baf6061c..1e83b00a 100644
--- a/src/hash/hash_func.c
+++ b/src/hash/hash_func.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993
diff --git a/src/hash/hash_meta.c b/src/hash/hash_meta.c
index d9a35cb4..aefdffb8 100644
--- a/src/hash/hash_meta.c
+++ b/src/hash/hash_meta.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
diff --git a/src/hash/hash_method.c b/src/hash/hash_method.c
index 1da81e70..a05bcea6 100644
--- a/src/hash/hash_method.c
+++ b/src/hash/hash_method.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -20,7 +20,7 @@ static int __ham_set_h_hash
static int __ham_set_h_nelem __P((DB *, u_int32_t));
static int __ham_get_h_compare
- __P((DB *, int (**)(DB *, const DBT *, const DBT *)));
+ __P((DB *, int (**)(DB *, const DBT *, const DBT *, size_t *)));
/*
* __ham_db_create --
@@ -153,7 +153,7 @@ __ham_set_h_hash(dbp, func)
static int
__ham_get_h_compare(dbp, funcp)
DB *dbp;
- int (**funcp) __P((DB *, const DBT *, const DBT *));
+ int (**funcp) __P((DB *, const DBT *, const DBT *, size_t *));
{
HASH *t;
@@ -170,13 +170,13 @@ __ham_get_h_compare(dbp, funcp)
* __ham_set_h_compare --
* Set the comparison function.
*
- * PUBLIC: int __ham_set_h_compare
- * PUBLIC: __P((DB *, int (*)(DB *, const DBT *, const DBT *)));
+ * PUBLIC: int __ham_set_h_compare __P((DB *,
+ * PUBLIC: int (*)(DB *, const DBT *, const DBT *, size_t *)));
*/
int
__ham_set_h_compare(dbp, func)
DB *dbp;
- int (*func) __P((DB *, const DBT *, const DBT *));
+ int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
{
HASH *t;
diff --git a/src/hash/hash_open.c b/src/hash/hash_open.c
index 3d0bb220..0104a57f 100644
--- a/src/hash/hash_open.c
+++ b/src/hash/hash_open.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994
@@ -44,6 +44,7 @@
#include "db_config.h"
#include "db_int.h"
+#include "dbinc/blob.h"
#include "dbinc/crypto.h"
#include "dbinc/db_page.h"
#include "dbinc/hash.h"
@@ -149,6 +150,7 @@ __ham_metachk(dbp, name, hashm)
int ret;
env = dbp->env;
+ ret = 0;
/*
* At this point, all we know is that the magic number is for a Hash.
@@ -168,6 +170,7 @@ __ham_metachk(dbp, name, hashm)
case 7:
case 8:
case 9:
+ case 10:
break;
default:
__db_errx(env, DB_STR_A("1126",
@@ -230,6 +233,29 @@ __ham_metachk(dbp, name, hashm)
/* Set the page size. */
dbp->pgsize = hashm->dbmeta.pagesize;
+ dbp->blob_threshold = hashm->blob_threshold;
+ GET_BLOB_FILE_ID(env, hashm, dbp->blob_file_id, ret);
+ if (ret != 0)
+ return (ret);
+ GET_BLOB_SDB_ID(env, hashm, dbp->blob_sdb_id, ret);
+ if (ret != 0)
+ return (ret);
+ /* Blob databases must be upgraded. */
+ if (vers == 9 && (dbp->blob_file_id != 0 || dbp->blob_sdb_id != 0)) {
+ __db_errx(env, DB_STR_A("1208",
+"%s: databases that support blobs must be upgraded.", "%s"),
+ name);
+ return (EINVAL);
+ }
+#ifndef HAVE_64BIT_TYPES
+ if (dbp->blob_file_id != 0 || dbp->blob_sdb_id != 0) {
+ __db_errx(env, DB_STR_A("1202",
+ "%s: blobs require 64 integer compiler support.", "%s"),
+ name);
+ return (EINVAL);
+ }
+#endif
+
/* Copy the file's ID. */
memcpy(dbp->fileid, hashm->dbmeta.uid, DB_FILE_ID_LEN);
@@ -297,6 +323,9 @@ __ham_init_meta(dbp, meta, pgno, lsnp)
meta->nelem = hashp->h_nelem;
meta->h_charkey = hashp->h_hash(dbp, CHARKEY, sizeof(CHARKEY));
memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN);
+ meta->blob_threshold = dbp->blob_threshold;
+ SET_BLOB_META_FILE_ID(meta, dbp->blob_file_id, HMETA);
+ SET_BLOB_META_SDB_ID(meta, dbp->blob_sdb_id, HMETA);
if (F_ISSET(dbp, DB_AM_DUP))
F_SET(&meta->dbmeta, DB_HASH_DUP);
@@ -414,6 +443,12 @@ __ham_new_file(dbp, ip, txn, fhp, name)
F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP));
pdbt.data = &pginfo;
pdbt.size = sizeof(pginfo);
+ if (dbp->blob_threshold) {
+ if ((ret = __blob_generate_dir_ids(
+ dbp, txn, &dbp->blob_file_id)) != 0)
+ return (ret);
+
+ }
if ((ret = __os_calloc(dbp->env, 1, dbp->pgsize, &buf)) != 0)
return (ret);
meta = (HMETA *)buf;
@@ -491,6 +526,13 @@ __ham_new_subdb(mdbp, dbp, ip, txn)
LOCK_INIT(metalock);
LOCK_INIT(mmlock);
+ if (dbp->blob_threshold) {
+ if ((ret = __blob_generate_dir_ids(
+ dbp, txn, &dbp->blob_sdb_id)) != 0)
+ return (ret);
+
+ }
+
if ((ret = __db_cursor(mdbp, ip, txn,
&dbc, CDB_LOCKING(env) ? DB_WRITECURSOR : 0)) != 0)
return (ret);
diff --git a/src/hash/hash_page.c b/src/hash/hash_page.c
index 7576fe61..8e0f897d 100644
--- a/src/hash/hash_page.c
+++ b/src/hash/hash_page.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994
@@ -129,7 +129,7 @@ recheck:
/* Fetch next page. */
if (NEXT_PGNO(hcp->page) == PGNO_INVALID) {
F_SET(hcp, H_NOMORE);
- return (DB_NOTFOUND);
+ return (DBC_ERR(dbc, DB_NOTFOUND));
}
next_pgno = NEXT_PGNO(hcp->page);
hcp->indx = 0;
@@ -344,7 +344,7 @@ __ham_item_prev(dbc, mode, pgnop)
if (hcp->pgno == PGNO_INVALID) {
/* Beginning of bucket. */
F_SET(hcp, H_NOMORE);
- return (DB_NOTFOUND);
+ return (DBC_ERR(dbc, DB_NOTFOUND));
} else if ((ret =
__ham_next_cpage(dbc, hcp->pgno)) != 0)
return (ret);
@@ -371,7 +371,7 @@ __ham_item_prev(dbc, mode, pgnop)
if (hcp->indx == 0) {
/* Bucket was empty. */
F_SET(hcp, H_NOMORE);
- return (DB_NOTFOUND);
+ return (DBC_ERR(dbc, DB_NOTFOUND));
}
}
@@ -497,7 +497,8 @@ __ham_insertpair(dbc, p, indxp, key_dbt, data_dbt, key_type, data_type)
inp = P_INP(dbp, p);
ksize = (key_type == H_OFFPAGE) ?
key_dbt->size : HKEYDATA_SIZE(key_dbt->size);
- dsize = (data_type == H_OFFPAGE || data_type == H_OFFDUP) ?
+ dsize = (data_type == H_OFFPAGE ||
+ data_type == H_OFFDUP || data_type == H_BLOB) ?
data_dbt->size : HKEYDATA_SIZE(data_dbt->size);
increase = ksize + dsize;
@@ -579,7 +580,8 @@ __ham_insertpair(dbc, p, indxp, key_dbt, data_dbt, key_type, data_type)
else
PUT_HKEYDATA(P_ENTRY(dbp, p, indx), key_dbt->data,
key_dbt->size, key_type);
- if (data_type == H_OFFPAGE || data_type == H_OFFDUP)
+ if (data_type == H_BLOB ||
+ data_type == H_OFFPAGE || data_type == H_OFFDUP)
memcpy(P_ENTRY(dbp, p, indx+1), data_dbt->data,
data_dbt->size);
else
@@ -618,6 +620,8 @@ __ham_getindex(dbc, p, key, key_type, match, indx)
{
/* Since all entries are key/data pairs. */
DB_ASSERT(dbc->env, NUM_ENT(p)%2 == 0 );
+ /* Blob files can only be stored as data items. */
+ DB_ASSERT(dbc->env, key_type != H_BLOB );
/* Support pre 4.6 unsorted hash pages. */
if (p->type == P_HASH_UNSORTED)
@@ -672,7 +676,7 @@ __ham_getindex_unsorted(dbc, p, key, match, indx)
memcpy(&pgno,
HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
if ((ret = __db_moff(dbc, key, pgno, tlen,
- t->h_compare, &res)) != 0)
+ t->h_compare, &res, NULL)) != 0)
return (ret);
}
break;
@@ -681,7 +685,7 @@ __ham_getindex_unsorted(dbc, p, key, match, indx)
DB_INIT_DBT(pg_dbt,
HKEYDATA_DATA(hk), key->size);
if (t->h_compare(
- dbp, key, &pg_dbt) != 0)
+ dbp, key, &pg_dbt, NULL) != 0)
break;
} else if (key->size ==
LEN_HKEY(dbp, p, dbp->pgsize, i))
@@ -784,7 +788,7 @@ __ham_getindex_sorted(dbc, p, key, key_type, match, indxp)
(void)__ua_memcpy(&off_pgno,
HOFFPAGE_PGNO(offp), sizeof(db_pgno_t));
if ((ret = __db_moff(dbc, key, off_pgno,
- itemlen, t->h_compare, &res)) != 0)
+ itemlen, t->h_compare, &res, NULL)) != 0)
return (ret);
}
} else {
@@ -799,7 +803,7 @@ __ham_getindex_sorted(dbc, p, key, key_type, match, indxp)
(void)__ua_memcpy(&off_len, HOFFPAGE_TLEN(offp),
sizeof(u_int32_t));
if ((ret = __db_moff(dbc, &tmp_dbt, off_pgno,
- off_len, t->h_compare, &res)) != 0)
+ off_len, t->h_compare, &res, NULL)) != 0)
return (ret);
/*
* Since we switched the key/match parameters
@@ -810,7 +814,7 @@ __ham_getindex_sorted(dbc, p, key, key_type, match, indxp)
} else if (t->h_compare != NULL) {
/* Case 4, with a user comparison func */
DB_INIT_DBT(tmp_dbt, data, itemlen);
- res = t->h_compare(dbp, key, &tmp_dbt);
+ res = t->h_compare(dbp, key, &tmp_dbt, NULL);
} else {
/* Case 4, without a user comparison func */
if ((res = memcmp(key->data, data,
@@ -899,8 +903,8 @@ __ham_verify_sorted_page (dbc, p)
sizeof(u_int32_t));
memcpy(&tpgno, HOFFPAGE_PGNO(H_PAIRKEY(dbp, p, i-2)),
sizeof(db_pgno_t));
- if ((ret = __db_moff(dbc,
- &curr_dbt, tpgno, tlen, t->h_compare, &res)) != 0)
+ if ((ret = __db_moff(dbc, &curr_dbt,
+ tpgno, tlen, t->h_compare, &res, NULL)) != 0)
return (ret);
} else if (HPAGE_TYPE(dbp, p, i) == H_OFFPAGE) {
memset(&prev_dbt, 0, sizeof(prev_dbt));
@@ -910,8 +914,8 @@ __ham_verify_sorted_page (dbc, p)
sizeof(u_int32_t));
memcpy(&tpgno, HOFFPAGE_PGNO(H_PAIRKEY(dbp, p, i)),
sizeof(db_pgno_t));
- if ((ret = __db_moff(dbc,
- &prev_dbt, tpgno, tlen, t->h_compare, &res)) != 0)
+ if ((ret = __db_moff(dbc, &prev_dbt, tpgno, tlen,
+ t->h_compare, &res, NULL)) != 0)
return (ret);
} else
res = memcmp(prev, curr, min(curr_len, prev_len));
@@ -1047,9 +1051,11 @@ __ham_del_pair(dbc, flags, ppg)
DBT data_dbt, key_dbt;
DB_LSN new_lsn, *n_lsn, tmp_lsn;
DB_MPOOLFILE *mpf;
+ HBLOB hblob;
HASH_CURSOR *hcp;
PAGE *n_pagep, *nn_pagep, *p, *p_pagep;
db_ham_mode op;
+ db_seq_t blob_id;
db_indx_t ndx;
db_pgno_t chg_pgno, pgno, tmp_pgno;
u_int32_t data_type, key_type, order;
@@ -1067,6 +1073,8 @@ __ham_del_pair(dbc, flags, ppg)
DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &hcp->page)) != 0)
return (ret);
p = hcp->page;
+ key_type = HPAGE_PTYPE(H_PAIRKEY(dbp, p, ndx));
+ data_type = HPAGE_PTYPE(H_PAIRDATA(dbp, p, ndx));
/*
* We optimize for the normal case which is when neither the key nor
@@ -1075,8 +1083,7 @@ __ham_del_pair(dbc, flags, ppg)
* to remove the big item and then update the page to remove the
* entry referring to the big item.
*/
- if (!LF_ISSET(HAM_DEL_IGNORE_OFFPAGE) &&
- HPAGE_PTYPE(H_PAIRKEY(dbp, p, ndx)) == H_OFFPAGE) {
+ if (!LF_ISSET(HAM_DEL_IGNORE_OFFPAGE) && key_type == H_OFFPAGE) {
memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_KEYINDEX(ndx))),
sizeof(db_pgno_t));
ret = __db_doff(dbc, pgno);
@@ -1084,7 +1091,13 @@ __ham_del_pair(dbc, flags, ppg)
ret = 0;
if (!LF_ISSET(HAM_DEL_IGNORE_OFFPAGE) && ret == 0)
- switch (HPAGE_PTYPE(H_PAIRDATA(dbp, p, ndx))) {
+ switch (data_type) {
+ case H_BLOB:
+ memcpy(&hblob,
+ P_ENTRY(dbp, p, H_DATAINDEX(ndx)), HBLOB_SIZE);
+ blob_id = (db_seq_t)hblob.id;
+ ret = __blob_del(dbc, blob_id);
+ break;
case H_OFFPAGE:
memcpy(&pgno,
HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_DATAINDEX(ndx))),
@@ -1111,7 +1124,7 @@ __ham_del_pair(dbc, flags, ppg)
/* Now log the delete off this page. */
if (DBC_LOGGING(dbc)) {
hk = H_PAIRKEY(dbp, hcp->page, ndx);
- if ((key_type = HPAGE_PTYPE(hk)) == H_OFFPAGE) {
+ if (key_type == H_OFFPAGE) {
key_dbt.data = hk;
key_dbt.size = HOFFPAGE_SIZE;
} else {
@@ -1120,9 +1133,12 @@ __ham_del_pair(dbc, flags, ppg)
LEN_HKEY(dbp, hcp->page, dbp->pgsize, ndx);
}
hk = H_PAIRDATA(dbp, hcp->page, ndx);
- if ((data_type = HPAGE_PTYPE(hk)) == H_OFFPAGE) {
+ if (data_type == H_OFFPAGE) {
data_dbt.data = hk;
data_dbt.size = HOFFPAGE_SIZE;
+ } else if (data_type == H_BLOB) {
+ data_dbt.data = hk;
+ data_dbt.size = HBLOB_SIZE;
} else if (data_type == H_OFFDUP) {
data_dbt.data = hk;
data_dbt.size = HOFFDUP_SIZE;
@@ -1404,6 +1420,8 @@ __ham_replpair(dbc, dbt, newtype)
* unless it is an append, when we extend the offpage item, and
* update the HOFFPAGE item on the current page to have the new size
* via a delete/add.
+ *
+ * Updating a record won't cause it to become a blob file or vice versa.
*/
dbp = dbc->dbp;
env = dbp->env;
@@ -2464,15 +2482,18 @@ __ham_add_el(dbc, key, val, type)
const DBT *pkey, *pdata;
DB *dbp;
DBT key_dbt, data_dbt;
- DB_LSN new_lsn;
+ DB_LSN blob_lsn, new_lsn;
DB_MPOOLFILE *mpf;
HASH_CURSOR *hcp;
HOFFPAGE doff, koff;
+ HBLOB dblob;
PAGE *new_pagep;
db_pgno_t next_pgno, pgno;
+ off_t file_size;
+ db_seq_t blob_id;
u_int32_t data_size, data_type, key_size, key_type;
u_int32_t pages, pagespace, pairsize;
- int do_expand, is_keybig, is_databig, match, ret;
+ int do_expand, is_keybig, match, ret;
dbp = dbc->dbp;
mpf = dbp->mpf;
@@ -2485,14 +2506,33 @@ __ham_add_el(dbc, key, val, type)
dbc->thread_info, dbc->txn, DB_MPOOL_CREATE, &hcp->page)) != 0)
return (ret);
+ /*
+ * Key is either:
+ * - On page
+ * - On overflow page(s)
+ */
key_size = HKEYDATA_PSIZE(key->size);
- data_size = HKEYDATA_PSIZE(val->size);
is_keybig = ISBIG(hcp, key->size);
- is_databig = ISBIG(hcp, val->size);
if (is_keybig)
key_size = HOFFPAGE_PSIZE;
- if (is_databig)
+ /*
+ * Data is either:
+ * - On page (H_KEYDATA or H_DUPLICATE)
+ * - On overflow page(s)
+ * - In a blob file
+ */
+ data_type =
+ (dbp->blob_threshold && (val->size >= dbp->blob_threshold ||
+ F_ISSET(val, DB_DBT_BLOB))) ?
+ H_BLOB : (ISBIG(hcp, val->size)) ? H_OFFPAGE : H_KEYDATA;
+ if (data_type == H_KEYDATA || data_type == H_DUPLICATE)
+ data_size = HKEYDATA_PSIZE(val->size);
+ else if (data_type == H_OFFPAGE)
data_size = HOFFPAGE_PSIZE;
+ else { /* H_BLOB */
+ DB_ASSERT(dbp->env, data_type == H_BLOB);
+ data_size = HBLOB_PSIZE;
+ }
pairsize = key_size + data_size;
@@ -2536,17 +2576,17 @@ __ham_add_el(dbc, key, val, type)
* run out of file space before updating the key or data.
*/
if (dbc->txn == NULL &&
- dbp->mpf->mfp->maxpgno != 0 && (is_keybig || is_databig)) {
+ dbp->mpf->mfp->maxpgno != 0 &&
+ (is_keybig || data_type == H_OFFPAGE)) {
pagespace = P_MAXSPACE(dbp, dbp->pgsize);
pages = 0;
- if (is_databig)
+ if (data_type == H_OFFPAGE)
pages = ((data_size - 1) / pagespace) + 1;
- if (is_keybig) {
+ if (is_keybig)
pages += ((key->size - 1) / pagespace) + 1;
- if (pages >
- (dbp->mpf->mfp->maxpgno - dbp->mpf->mfp->last_pgno))
- return (__db_space_err(dbp));
- }
+ if (pages >
+ (dbp->mpf->mfp->maxpgno - dbp->mpf->mfp->last_pgno))
+ return (__db_space_err(dbp));
}
if ((ret = __memp_dirty(mpf,
@@ -2575,7 +2615,7 @@ __ham_add_el(dbc, key, val, type)
key_type = H_KEYDATA;
}
- if (is_databig) {
+ if (data_type == H_OFFPAGE) {
doff.type = H_OFFPAGE;
UMRW_SET(doff.unused[0]);
UMRW_SET(doff.unused[1]);
@@ -2587,6 +2627,22 @@ __ham_add_el(dbc, key, val, type)
data_dbt.size = sizeof(doff);
pdata = &data_dbt;
data_type = H_OFFPAGE;
+ } else if (data_type == H_BLOB) {
+ memset(&dblob, 0, HBLOB_SIZE);
+ dblob.type = H_BLOB;
+ blob_id = 0;
+ file_size = 0;
+ if ((ret = __blob_put(
+ dbc, (DBT *)val, &blob_id, &file_size, &blob_lsn)) != 0)
+ return (ret);
+ SET_BLOB_ID(&dblob, blob_id, HBLOB);
+ SET_BLOB_SIZE(&dblob, file_size, HBLOB);
+ SET_BLOB_FILE_ID(&dblob, dbp->blob_file_id, HBLOB);
+ SET_BLOB_SDB_ID(&dblob, dbp->blob_sdb_id, HBLOB);
+ data_dbt.data = &dblob;
+ data_dbt.size = sizeof(dblob);
+ pdata = &data_dbt;
+ data_type = H_BLOB;
} else {
pdata = val;
data_type = type;
@@ -2673,7 +2729,7 @@ __ham_add_el(dbc, key, val, type)
/*
* Special insert pair call -- copies a key/data pair from one page to
* another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
- * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we
+ * H_DUPLICATE, H_OFFDUP, H_BLOB). Since we log splits at a high level, we
* do not need to log them here.
*
* dest_indx is an optional parameter, it serves several purposes:
@@ -2715,7 +2771,7 @@ __ham_copypair(dbc, src_page, src_ndx, dest_page, dest_indx, log)
tkey.data = HKEYDATA_DATA(P_ENTRY(dbp, src_page, kindx));
tkey.size = LEN_HKEYDATA(dbp, src_page, dbp->pgsize, kindx);
}
- if (dtype == H_OFFPAGE || dtype == H_OFFDUP) {
+ if (dtype == H_OFFPAGE || dtype == H_OFFDUP || dtype == H_BLOB) {
tdata.data = P_ENTRY(dbp, src_page, dindx);
tdata.size = LEN_HITEM(dbp, src_page, dbp->pgsize, dindx);
} else {
diff --git a/src/hash/hash_rec.c b/src/hash/hash_rec.c
index 58965569..8a39d880 100644
--- a/src/hash/hash_rec.c
+++ b/src/hash/hash_rec.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 1995, 1996
@@ -232,6 +232,7 @@ __ham_insdel_42_recover(env, dbtp, lsnp, op, info)
REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
ktype = DB_UNDO(op) || PAIR_ISKEYBIG(argp->opcode) ?
H_OFFPAGE : H_KEYDATA;
+ /* TODO: May need a PAIR_ISDATABLOB here. */
if (PAIR_ISDATADUP(argp->opcode))
dtype = H_DUPLICATE;
else if (DB_UNDO(op) || PAIR_ISDATABIG(argp->opcode))
@@ -957,9 +958,8 @@ __ham_metagroup_recover(env, dbtp, lsnp, op, info)
if (IS_ZERO_LSN(LSN(pagep))) {
REC_DIRTY(mpf, ip, dbc->priority, &pagep);
- P_INIT(pagep, file_dbp->pgsize,
- PGNO_INVALID, PGNO_INVALID, PGNO_INVALID,
- 0, P_HASH);
+ P_INIT(pagep, file_dbp->pgsize, pgno,
+ PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
}
if ((ret =
__memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
diff --git a/src/hash/hash_reclaim.c b/src/hash/hash_reclaim.c
index ce3f6d9e..55980444 100644
--- a/src/hash/hash_reclaim.c
+++ b/src/hash/hash_reclaim.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
diff --git a/src/hash/hash_stat.c b/src/hash/hash_stat.c
index 683ce5a6..7ccf472d 100644
--- a/src/hash/hash_stat.c
+++ b/src/hash/hash_stat.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -188,15 +188,19 @@ __ham_stat_print(dbc, flags)
sp->hash_bfree, sp->hash_buckets, sp->hash_pagesize), "ff");
__db_dl(env,
- "Number of overflow pages", (u_long)sp->hash_bigpages);
- __db_dl_pct(env, "Number of bytes free in overflow pages",
+ "Number of blobs", (u_long)sp->hash_nblobs);
+ __db_dl(env,
+ "Number of hash overflow (big item) pages",
+ (u_long)sp->hash_bigpages);
+ __db_dl_pct(env,
+ "Number of bytes free in hash overflow (big item) pages",
(u_long)sp->hash_big_bfree, DB_PCT_PG(
sp->hash_big_bfree, sp->hash_bigpages, sp->hash_pagesize), "ff");
__db_dl(env,
"Number of bucket overflow pages", (u_long)sp->hash_overflows);
__db_dl_pct(env,
- "Number of bytes free in bucket overflow pages",
+ "Number of bytes free on bucket overflow pages",
(u_long)sp->hash_ovfl_free, DB_PCT_PG(
sp->hash_ovfl_free, sp->hash_overflows, sp->hash_pagesize), "ff");
@@ -258,6 +262,9 @@ __ham_stat_callback(dbc, pagep, cookie, putp)
switch (*H_PAIRDATA(dbp, pagep, indx)) {
case H_OFFDUP:
break;
+ case H_BLOB:
+ sp->hash_nblobs++;
+ /* fall through */
case H_OFFPAGE:
case H_KEYDATA:
sp->hash_ndata++;
@@ -480,6 +487,7 @@ __ham_traverse(dbc, mode, callback, cookie, look_past_max)
opgno, callback, cookie)) != 0)
goto err;
break;
+ case H_BLOB:
case H_KEYDATA:
case H_DUPLICATE:
break;
diff --git a/src/hash/hash_stub.c b/src/hash/hash_stub.c
index 57337ea9..89307670 100644
--- a/src/hash/hash_stub.c
+++ b/src/hash/hash_stub.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -127,6 +127,40 @@ __ham_46_hashmeta(dbp, real_name, flags, fhp, h, dirtyp)
}
int
+__ham_60_hashmeta(dbp, real_name, flags, fhp, h, dirtyp)
+ DB *dbp;
+ char *real_name;
+ u_int32_t flags;
+ DB_FH *fhp;
+ PAGE *h;
+ int *dirtyp;
+{
+ COMPQUIET(real_name, NULL);
+ COMPQUIET(flags, 0);
+ COMPQUIET(fhp, NULL);
+ COMPQUIET(h, NULL);
+ COMPQUIET(dirtyp, NULL);
+ return (__db_no_hash_am(dbp->env));
+}
+
+int
+__ham_60_hash(dbp, real_name, flags, fhp, h, dirtyp)
+ DB *dbp;
+ char *real_name;
+ u_int32_t flags;
+ DB_FH *fhp;
+ PAGE *h;
+ int *dirtyp;
+{
+ COMPQUIET(real_name, NULL);
+ COMPQUIET(flags, 0);
+ COMPQUIET(fhp, NULL);
+ COMPQUIET(h, NULL);
+ COMPQUIET(dirtyp, NULL);
+ return (__db_no_hash_am(dbp->env));
+}
+
+int
__hamc_cmp(dbc, other_dbc, result)
DBC *dbc, *other_dbc;
int *result;
diff --git a/src/hash/hash_upgrade.c b/src/hash/hash_upgrade.c
index f66a7a58..17014a5c 100644
--- a/src/hash/hash_upgrade.c
+++ b/src/hash/hash_upgrade.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -9,6 +9,7 @@
#include "db_config.h"
#include "db_int.h"
+#include "dbinc/blob.h"
#include "dbinc/db_page.h"
#include "dbinc/hash.h"
#include "dbinc/db_upgrade.h"
@@ -321,3 +322,93 @@ __ham_46_hash(dbp, real_name, flags, fhp, h, dirtyp)
return (ret);
}
+
+/*
+ * __ham_60_hashmeta--
+ * Upgrade the version number.
+ *
+ * PUBLIC: int __ham_60_hashmeta
+ * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
+ */
+int
+__ham_60_hashmeta(dbp, real_name, flags, fhp, h, dirtyp)
+ DB *dbp;
+ char *real_name;
+ u_int32_t flags;
+ DB_FH *fhp;
+ PAGE *h;
+ int *dirtyp;
+{
+ HMETA33 *hmeta;
+
+ COMPQUIET(flags, 0);
+ COMPQUIET(real_name, NULL);
+ COMPQUIET(fhp, NULL);
+ COMPQUIET(dbp, NULL);
+ hmeta = (HMETA33 *)h;
+
+ hmeta->dbmeta.version = 10;
+ *dirtyp = 1;
+
+ return (0);
+}
+
+/*
+ * __ham_60_hash --
+ * Upgrade the blob records on the database hash leaf pages.
+ *
+ * PUBLIC: int __ham_60_hash
+ * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
+ */
+int
+__ham_60_hash(dbp, real_name, flags, fhp, h, dirtyp)
+ DB *dbp;
+ char *real_name;
+ u_int32_t flags;
+ DB_FH *fhp;
+ PAGE *h;
+ int *dirtyp;
+{
+ HBLOB60 hb60;
+ HBLOB60P1 hb60p1;
+ HKEYDATA *hk;
+ db_seq_t blob_id, blob_size, file_id, sdb_id;
+ db_indx_t indx;
+ int ret;
+
+ COMPQUIET(flags, 0);
+ COMPQUIET(real_name, NULL);
+ COMPQUIET(fhp, NULL);
+ ret = 0;
+
+ DB_ASSERT(dbp->env, HBLOB60_SIZE == HBLOB_SIZE);
+ for (indx = 0; indx < NUM_ENT(h); indx += 2) {
+ hk = (HKEYDATA *)H_PAIRDATA(dbp, h, indx);
+ if (HPAGE_PTYPE(hk) == H_BLOB) {
+ memcpy(&hb60, hk, HBLOB60_SIZE);
+ memset(&hb60p1, 0, HBLOB_SIZE);
+ hb60p1.type = hb60.type;
+ hb60p1.encoding = hb60.encoding;
+ GET_BLOB60_ID(dbp->env, hb60, blob_id, ret);
+ if (ret != 0)
+ return (ret);
+ GET_BLOB60_SIZE(dbp->env, hb60, blob_size, ret);
+ if (ret != 0)
+ return (ret);
+ GET_BLOB60_FILE_ID(dbp->env, &hb60, file_id, ret);
+ if (ret != 0)
+ return (ret);
+ GET_BLOB60_SDB_ID(dbp->env, &hb60, sdb_id, ret);
+ if (ret != 0)
+ return (ret);
+ SET_BLOB_ID(&hb60p1, blob_id, HBLOB60P1);
+ SET_BLOB_SIZE(&hb60p1, blob_size, HBLOB60P1);
+ SET_BLOB_FILE_ID(&hb60p1, file_id, HBLOB60P1);
+ SET_BLOB_SDB_ID(&hb60p1, sdb_id, HBLOB60P1);
+ memcpy(hk, &hb60p1, HBLOB_SIZE);
+ *dirtyp = 1;
+ }
+ }
+
+ return (ret);
+}
diff --git a/src/hash/hash_verify.c b/src/hash/hash_verify.c
index 662e7ac8..302d42d8 100644
--- a/src/hash/hash_verify.c
+++ b/src/hash/hash_verify.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -9,6 +9,7 @@
#include "db_config.h"
#include "db_int.h"
+#include "dbinc/blob.h"
#include "dbinc/db_page.h"
#include "dbinc/db_verify.h"
#include "dbinc/btree.h"
@@ -47,6 +48,7 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags)
int i, ret, t_ret, isbad;
u_int32_t pwr, mbucket;
u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t));
+ db_seq_t blob_id;
env = dbp->env;
isbad = 0;
@@ -164,6 +166,55 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags)
}
}
+/*
+ * Where 64-bit integer support is not available,
+ * return an error if the file has any blobs.
+ */
+ t_ret = 0;
+#ifdef HAVE_64BIT_TYPES
+ GET_BLOB_FILE_ID(env, m, blob_id, t_ret);
+ if (t_ret != 0) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("1178",
+ "Page %lu: blob file id overflow.", "%lu"), (u_long)pgno));
+ if (ret == 0)
+ ret = t_ret;
+ }
+ t_ret = 0;
+ GET_BLOB_SDB_ID(env, m, blob_id, t_ret);
+ if (t_ret != 0) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("1179",
+ "Page %lu: blob subdatabase id overflow.",
+ "%lu"), (u_long)pgno));
+ if (ret == 0)
+ ret = t_ret;
+ }
+#else /* HAVE_64BIT_TYPES */
+ /*
+ * db_seq_t is an int on systems that do not have 64 integer types, so
+ * this will compile and run.
+ */
+ GET_BLOB_FILE_ID(env, m, blob_id, t_ret);
+ if (t_ret != 0 || blob_id != 0) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("1203",
+ "Page %lu: blobs require 64 integer compiler support.",
+ "%lu"), (u_long)pgno));
+ if (ret == 0)
+ ret = t_ret;
+ }
+ GET_BLOB_SDB_ID(env, m, blob_id, t_ret);
+ if (t_ret != 0 || blob_id != 0) {
+ isbad = 1;
+ EPRINT((env, DB_STR_A("1204",
+ "Page %lu: blobs require 64 integer compiler support.",
+ "%lu"), (u_long)pgno));
+ if (ret == 0)
+ ret == t_ret;
+ }
+#endif
+
err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
if (LF_ISSET(DB_SALVAGE) &&
@@ -272,12 +323,15 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags)
PAGE *h;
u_int32_t i, flags;
{
+ HBLOB hblob;
HOFFDUP hod;
HOFFPAGE hop;
VRFY_CHILDINFO child;
VRFY_PAGEINFO *pip;
db_indx_t offset, len, dlen, elen;
int ret, t_ret;
+ off_t blob_size;
+ db_seq_t blob_id, file_id, sdb_id;
u_int8_t *databuf;
if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
@@ -287,6 +341,38 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags)
case H_KEYDATA:
/* Nothing to do here--everything but the type field is data */
break;
+ case H_BLOB:
+ /*
+ * Blob item. Check that the blob file exists and is the same
+ * file size as is stored in the database record.
+ */
+ memcpy(&hblob, P_ENTRY(dbp, h, i), HBLOB_SIZE);
+ blob_id = (db_seq_t)hblob.id;
+ GET_BLOB_SIZE(dbp->env, hblob, blob_size, ret);
+ if (ret != 0 || blob_size < 0) {
+ EPRINT((dbp->env, DB_STR_A("1181",
+ "Page %lu: blob file size value has overflowed",
+ "%lu"), (u_long)pip->pgno));
+ ret = DB_VERIFY_BAD;
+ goto err;
+ }
+ file_id = (db_seq_t)hblob.file_id;
+ sdb_id = (db_seq_t)hblob.sdb_id;
+ if (file_id == 0 && sdb_id == 0) {
+ EPRINT((dbp->env, DB_STR_A("1184",
+ "Page %lu: invalid blob dir ids %llu %llu at item %lu",
+ "%lu %llu %llu %lu"),
+ (u_long)pip->pgno, (unsigned long long)file_id,
+ (unsigned long long)sdb_id, (u_long)i));
+ ret = DB_VERIFY_BAD;
+ goto err;
+ }
+ if ((ret = __blob_vrfy(dbp->env, blob_id,
+ blob_size, file_id, sdb_id, pip->pgno, flags)) != 0) {
+ ret = DB_VERIFY_BAD;
+ goto err;
+ }
+ break;
case H_DUPLICATE:
/* Are we a datum or a key? Better be the former. */
if (i % 2 == 0) {
@@ -822,15 +908,23 @@ __ham_salvage(dbp, vdp, pgno, h, handle, callback, flags)
u_int32_t flags;
{
DBT dbt, key_dbt, unkdbt;
+ ENV *env;
+ HBLOB hblob;
+ char *prefix;
db_pgno_t dpgno;
int ret, err_ret, t_ret;
- u_int32_t himark, i, ovfl_bufsz;
- u_int8_t *hk, *p;
+ off_t blob_size, blob_offset, remaining;
+ u_int32_t blob_buf_size, himark, i, ovfl_bufsz;
+ u_int8_t *blob_buf, *hk, *p;
+ db_seq_t blob_id, file_id, sdb_id;
void *buf, *key_buf;
db_indx_t dlen, len, tlen;
memset(&dbt, 0, sizeof(DBT));
dbt.flags = DB_DBT_REALLOC;
+ blob_buf = NULL;
+ blob_buf_size = 0;
+ env = dbp->env;
DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1);
@@ -840,9 +934,9 @@ __ham_salvage(dbp, vdp, pgno, h, handle, callback, flags)
* Allocate a buffer for overflow items. Start at one page;
* __db_safe_goff will realloc as needed.
*/
- if ((ret = __os_malloc(dbp->env, dbp->pgsize, &buf)) != 0)
+ if ((ret = __os_malloc(env, dbp->pgsize, &buf)) != 0)
return (ret);
- ovfl_bufsz = dbp->pgsize;
+ ovfl_bufsz = dbp->pgsize;
himark = dbp->pgsize;
for (i = 0;; i++) {
@@ -886,6 +980,70 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len);
0, " ", handle, callback, 0, 0, vdp)) != 0)
err_ret = ret;
break;
+ case H_BLOB:
+ memcpy(&hblob, hk, HBLOB_SIZE);
+ blob_id = (db_seq_t)hblob.id;
+ GET_BLOB_SIZE(env, hblob, blob_size, ret);
+ if (ret != 0 || blob_size < 0) {
+ err_ret = DB_VERIFY_BAD;
+ continue;
+ }
+ file_id = (db_seq_t)hblob.file_id;
+ sdb_id = (db_seq_t)hblob.sdb_id;
+ /* Read the blob, in pieces if too large.*/
+ blob_offset = 0;
+ if (blob_size > MEGABYTE) {
+ if (blob_buf_size < MEGABYTE) {
+ if ((ret = __os_realloc(
+ env, MEGABYTE,
+ &blob_buf)) != 0) {
+ err_ret = ret;
+ continue;
+ }
+ blob_buf_size = MEGABYTE;
+ }
+ } else if (blob_buf_size < blob_size) {
+ blob_buf_size = (u_int32_t)blob_size;
+ if ((ret = __os_realloc(env,
+ blob_buf_size, &blob_buf)) != 0) {
+ err_ret = ret;
+ continue;
+ }
+ }
+ dbt.data = blob_buf;
+ dbt.ulen = blob_buf_size;
+ remaining = blob_size;
+ prefix = " ";
+ do {
+ if ((ret = __blob_salvage(env, blob_id,
+ blob_offset,
+ (remaining < blob_buf_size ?
+ (size_t)remaining : blob_buf_size),
+ file_id, sdb_id, &dbt)) != 0) {
+ err_ret = DB_VERIFY_BAD;
+ break;
+ }
+ if (remaining > blob_buf_size)
+ F_SET(
+ vdp, SALVAGE_STREAM_BLOB);
+ else
+ F_CLR(
+ vdp, SALVAGE_STREAM_BLOB);
+ if ((ret = __db_vrfy_prdbt(
+ &dbt, 0, prefix, handle,
+ callback, 0, 0, vdp)) != 0) {
+ err_ret = ret;
+ break;
+ }
+ prefix = NULL;
+ blob_offset += dbt.size;
+ if (remaining < blob_buf_size)
+ remaining = 0;
+ else
+ remaining -= blob_buf_size;
+ } while (remaining > 0);
+ F_CLR(vdp, SALVAGE_STREAM_BLOB);
+ break;
case H_OFFPAGE:
if (len < HOFFPAGE_SIZE) {
err_ret = DB_VERIFY_BAD;
@@ -960,7 +1118,7 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len);
*/
memset(&key_dbt, 0, sizeof(key_dbt));
if ((ret = __os_malloc(
- dbp->env, dbt.size, &key_buf)) != 0)
+ env, dbt.size, &key_buf)) != 0)
return (ret);
memcpy(key_buf, buf, dbt.size);
key_dbt.data = key_buf;
@@ -1002,7 +1160,7 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len);
handle, callback, 0, 0, vdp)) != 0)
err_ret = ret;
}
- __os_free(dbp->env, key_buf);
+ __os_free(env, key_buf);
break;
default:
if (!LF_ISSET(DB_AGGRESSIVE))
@@ -1013,7 +1171,9 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len);
}
}
- __os_free(dbp->env, buf);
+ if (blob_buf != NULL)
+ __os_free(env, blob_buf);
+ __os_free(env, buf);
if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0)
return (t_ret);
return ((ret == 0 && err_ret != 0) ? err_ret : ret);
@@ -1129,7 +1289,7 @@ __ham_dups_unsorted(dbp, buf, len)
{
DBT a, b;
db_indx_t offset, dlen;
- int (*func) __P((DB *, const DBT *, const DBT *));
+ int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
memset(&a, 0, sizeof(DBT));
memset(&b, 0, sizeof(DBT));
@@ -1146,7 +1306,7 @@ __ham_dups_unsorted(dbp, buf, len)
b.data = buf + offset + sizeof(db_indx_t);
b.size = dlen;
- if (a.data != NULL && func(dbp, &a, &b) > 0)
+ if (a.data != NULL && func(dbp, &a, &b, NULL) > 0)
return (1);
a.data = b.data;