diff options
author | Lorry Tar Creator <lorry-tar-importer@baserock.org> | 2015-02-17 17:25:57 +0000 |
---|---|---|
committer | <> | 2015-03-17 16:26:24 +0000 |
commit | 780b92ada9afcf1d58085a83a0b9e6bc982203d1 (patch) | |
tree | 598f8b9fa431b228d29897e798de4ac0c1d3d970 /src/hash | |
parent | 7a2660ba9cc2dc03a69ddfcfd95369395cc87444 (diff) | |
download | berkeleydb-master.tar.gz |
Diffstat (limited to 'src/hash')
-rw-r--r-- | src/hash/hash.c | 178 | ||||
-rw-r--r-- | src/hash/hash.src | 2 | ||||
-rw-r--r-- | src/hash/hash_compact.c | 24 | ||||
-rw-r--r-- | src/hash/hash_conv.c | 9 | ||||
-rw-r--r-- | src/hash/hash_dup.c | 7 | ||||
-rw-r--r-- | src/hash/hash_func.c | 2 | ||||
-rw-r--r-- | src/hash/hash_meta.c | 2 | ||||
-rw-r--r-- | src/hash/hash_method.c | 12 | ||||
-rw-r--r-- | src/hash/hash_open.c | 44 | ||||
-rw-r--r-- | src/hash/hash_page.c | 126 | ||||
-rw-r--r-- | src/hash/hash_rec.c | 8 | ||||
-rw-r--r-- | src/hash/hash_reclaim.c | 2 | ||||
-rw-r--r-- | src/hash/hash_stat.c | 16 | ||||
-rw-r--r-- | src/hash/hash_stub.c | 36 | ||||
-rw-r--r-- | src/hash/hash_upgrade.c | 93 | ||||
-rw-r--r-- | src/hash/hash_verify.c | 180 |
16 files changed, 633 insertions, 108 deletions
diff --git a/src/hash/hash.c b/src/hash/hash.c index ae5736e7..5bff1dee 100644 --- a/src/hash/hash.c +++ b/src/hash/hash.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. */ /* * Copyright (c) 1990, 1993, 1994 @@ -298,6 +298,7 @@ __hamc_count(dbc, recnop) } switch (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))) { + case H_BLOB: case H_KEYDATA: case H_OFFPAGE: recno = 1; @@ -379,7 +380,7 @@ __hamc_del(dbc, flags) hcp = (HASH_CURSOR *)dbc->internal; if (F_ISSET(hcp, H_DELETED)) - return (DB_NOTFOUND); + return (DBC_ERR(dbc, DB_NOTFOUND)); if ((ret = __ham_get_meta(dbc)) != 0) goto out; @@ -535,7 +536,7 @@ next: ret = __ham_item_next(dbc, lock_type, pgnop); case DB_CURRENT: /* cgetchk has already determined that the cursor is set. */ if (F_ISSET(hcp, H_DELETED)) { - ret = DB_KEYEMPTY; + ret = DBC_ERR(dbc, DB_KEYEMPTY); goto err; } @@ -554,7 +555,8 @@ next: ret = __ham_item_next(dbc, lock_type, pgnop); if (ret != 0 && ret != DB_NOTFOUND) goto err; else if (F_ISSET(hcp, H_OK)) { - if (*pgnop == PGNO_INVALID) + if (*pgnop == PGNO_INVALID && HPAGE_PTYPE( + H_PAIRDATA(dbp, hcp->page, hcp->indx)) != H_BLOB) ret = __ham_dup_return(dbc, data, flags); break; } else if (!F_ISSET(hcp, H_NOMORE)) { @@ -576,7 +578,7 @@ next: ret = __ham_item_next(dbc, lock_type, pgnop); dbc->thread_info, hcp->page, dbc->priority); hcp->page = NULL; if (hcp->bucket == 0) { - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); hcp->pgno = PGNO_INVALID; goto err; } @@ -598,7 +600,7 @@ next: ret = __ham_item_next(dbc, lock_type, pgnop); F_CLR(hcp, H_ISDUP); hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); if (hcp->bucket > hcp->hdr->max_bucket) { - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); hcp->pgno = PGNO_INVALID; goto err; } @@ -612,7 +614,7 @@ next: ret = __ham_item_next(dbc, lock_type, pgnop); case DB_SET: case DB_SET_RANGE: /* Key not found. */ - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); goto err; case DB_CURRENT: /* @@ -621,7 +623,7 @@ next: ret = __ham_item_next(dbc, lock_type, pgnop); * locking. We return the same error code as we would * if the cursor were deleted. */ - ret = DB_KEYEMPTY; + ret = DBC_ERR(dbc, DB_KEYEMPTY); goto err; default: DB_ASSERT(env, 0); @@ -649,11 +651,14 @@ __ham_bulk(dbc, data, flags) DB *dbp; DB_MPOOLFILE *mpf; HASH_CURSOR *cp; + HBLOB hblob; PAGE *pg; db_indx_t dup_len, dup_off, dup_tlen, indx, *inp; db_lockmode_t lock_mode; db_pgno_t pgno; + off_t blob_size; int32_t *endp, *offp, *saveoff; + db_seq_t blob_id; u_int32_t key_off, key_size, pagesize, size, space; u_int8_t *dbuf, *dp, *hk, *np, *tmp; int is_dup, is_key; @@ -708,6 +713,10 @@ next_pg: space -= key_size; key_off = (u_int32_t)(np - dbuf); np += key_size; + } else if (HPAGE_PTYPE(hk) == H_BLOB) { + __db_errx(dbp->env, DB_STR("1185", + "Blob item key.")); + (void)__env_panic(dbp->env, DB_RUNRECOVERY); } else { if (need_pg) { dp = np; @@ -982,6 +991,38 @@ get_space: np += size; space -= size; break; + case H_BLOB: + space -= (is_key ? 4 : 2) * sizeof(*offp); + if (space > data->ulen) + goto back_up; + + memcpy(&hblob, hk, HBLOB_SIZE); + blob_id = (db_seq_t)hblob.id; + GET_BLOB_SIZE(dbc->env, hblob, blob_size, ret); + if (ret != 0) + return (ret); + if (blob_size > UINT32_MAX) { + size = UINT32_MAX; + goto back_up; + } + size = (u_int32_t)blob_size; + if (size > space) + goto back_up; + + if ((ret = __blob_bulk(dbc, size, blob_id, np)) != 0) + return (ret); + + if (is_key) { + *offp-- = (int32_t)key_off; + *offp-- = (int32_t)key_size; + } + + *offp-- = (int32_t)(np - dbuf); + *offp-- = (int32_t)size; + + np += size; + space -= size; + break; default: /* Do nothing. */ break; @@ -1014,7 +1055,7 @@ get_space: * DBC->get(DB_NEXT) will return DB_NOTFOUND. */ cp->bucket--; - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); } else { /* * Start on the next bucket. @@ -1071,7 +1112,7 @@ __hamc_put(dbc, key, data, flags, pgnop) if (F_ISSET(hcp, H_DELETED) && flags != DB_KEYFIRST && flags != DB_KEYLAST && flags != DB_OVERWRITE_DUP) - return (DB_NOTFOUND); + return (DBC_ERR(dbc, DB_NOTFOUND)); if ((ret = __ham_get_meta(dbc)) != 0) goto err1; @@ -1083,9 +1124,15 @@ __hamc_put(dbc, key, data, flags, pgnop) case DB_NOOVERWRITE: case DB_OVERWRITE_DUP: nbytes = (ISBIG(hcp, key->size) ? HOFFPAGE_PSIZE : - HKEYDATA_PSIZE(key->size)) + - (ISBIG(hcp, data->size) ? HOFFPAGE_PSIZE : - HKEYDATA_PSIZE(data->size)); + HKEYDATA_PSIZE(key->size)); + if (dbp->blob_threshold && (data->size >= + dbp->blob_threshold || F_ISSET(data, DB_DBT_BLOB))) + nbytes += HBLOB_PSIZE; + else if (ISBIG(hcp, data->size)) + nbytes += HOFFPAGE_PSIZE; + else + nbytes += HKEYDATA_PSIZE(data->size); + if ((ret = __ham_lookup(dbc, key, nbytes, DB_LOCK_WRITE, pgnop)) == DB_NOTFOUND) { if (hcp->seek_found_page != PGNO_INVALID && @@ -1124,7 +1171,7 @@ __hamc_put(dbc, key, data, flags, pgnop) } else if (ret == 0 && flags == DB_NOOVERWRITE && !F_ISSET(hcp, H_DELETED)) { if (*pgnop == PGNO_INVALID) - ret = DB_KEYEXIST; + ret = DBC_ERR(dbc, DB_KEYEXIST); else ret = __bam_opd_exists(dbc, *pgnop); if (ret != 0) @@ -1468,6 +1515,7 @@ __ham_dup_return(dbc, val, flags) type = HPAGE_TYPE(dbp, hcp->page, ndx); pp = hcp->page; myval = val; + cmp = 0; /* * There are 4 cases: @@ -1545,9 +1593,13 @@ __ham_dup_return(dbc, val, flags) memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); if ((ret = __db_moff(dbc, val, pgno, tlen, - dbp->dup_compare, &cmp)) != 0) + dbp->dup_compare, &cmp, NULL)) != 0) return (ret); cmp = -cmp; + } else if (((HKEYDATA *)hk)->type == H_BLOB) { + __db_errx(dbp->env, DB_STR("1186", + "Error - found a blob file in a duplicate data set.")); + (void)__env_panic(dbp->env, DB_RUNRECOVERY); } else { /* * We do not zero tmp_val since the comparison @@ -1557,8 +1609,8 @@ __ham_dup_return(dbc, val, flags) tmp_val.size = LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); cmp = dbp->dup_compare == NULL ? - __bam_defcmp(dbp, &tmp_val, val) : - dbp->dup_compare(dbp, &tmp_val, val); + __bam_defcmp(dbp, &tmp_val, val, NULL) : + dbp->dup_compare(dbp, &tmp_val, val, NULL); } if (cmp > 0 && flags == DB_GET_BOTH_RANGE && @@ -1567,7 +1619,7 @@ __ham_dup_return(dbc, val, flags) } if (cmp != 0) - return (DB_NOTFOUND); + return (DBC_ERR(dbc, DB_NOTFOUND)); } /* @@ -1654,17 +1706,21 @@ __ham_overwrite(dbc, nval, flags) u_int32_t flags; { DB *dbp; - DBT *myval, tmp_val, tmp_val2; + DBT *myval, tmp_val, tmp_val2, old_rec, new_rec; ENV *env; HASH_CURSOR *hcp; + HBLOB hblob; void *newrec; u_int8_t *hk, *p; u_int32_t len, nondup_size; + db_seq_t blob_id, new_blob_id; db_indx_t newsize; + off_t blob_size; int ret; dbp = dbc->dbp; env = dbp->env; + ret = 0; hcp = (HASH_CURSOR *)dbc->internal; if (F_ISSET(hcp, H_ISDUP)) { /* @@ -1717,7 +1773,7 @@ __ham_overwrite(dbc, nval, flags) NULL, nval, flags, NULL)); } - if ((ret = __os_malloc(dbp->env, + if ((ret = __os_malloc(env, DUP_SIZE(newsize), &newrec)) != 0) return (ret); memset(&tmp_val2, 0, sizeof(tmp_val2)); @@ -1765,7 +1821,7 @@ __ham_overwrite(dbc, nval, flags) (u_int8_t *)newrec + sizeof(db_indx_t); tmp_val2.size = newsize; if (dbp->dup_compare( - dbp, &tmp_val, &tmp_val2) != 0) { + dbp, &tmp_val, &tmp_val2, NULL) != 0) { __os_free(env, newrec); return (__db_duperr(dbp, flags)); } @@ -1816,7 +1872,7 @@ __ham_overwrite(dbc, nval, flags) sizeof(db_indx_t); tmp_val2.size = hcp->dup_len; if (dbp->dup_compare( - dbp, nval, &tmp_val2) != 0) { + dbp, nval, &tmp_val2, NULL) != 0) { __db_errx(env, DB_STR("1131", "Existing data sorts differently from put data")); return (EINVAL); @@ -1848,16 +1904,84 @@ __ham_overwrite(dbc, nval, flags) hcp->dup_len = (db_indx_t)nval->size; } myval = &tmp_val; + goto end; + } + hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); + if (HPAGE_PTYPE(hk) == H_BLOB) { + memcpy(&hblob, hk, HBLOB_SIZE); + memset(&old_rec, 0, sizeof(DBT)); + memset(&new_rec, 0, sizeof(DBT)); + if (DBC_LOGGING(dbc)) { + new_rec.data = HKEYDATA_DATA(&hblob); + if ((ret = __os_malloc( + env, HBLOB_SIZE, &old_rec.data)) != 0) + return (ret); + memcpy(old_rec.data, + HKEYDATA_DATA(&hblob), HBLOB_DSIZE); + new_rec.size = old_rec.size = HBLOB_DSIZE; + } + /* + * Inserting a blob record instead of blob data, only + * used internally by the DB_STREAM api. + */ + if (F_ISSET(nval, DB_DBT_BLOB_REC)) { + DB_ASSERT(env, nval->size == HBLOB_SIZE); + DB_ASSERT(env, HPAGE_PTYPE(nval->data) == H_BLOB); + memcpy(&hblob, nval->data, nval->size); + } else { + /* + * A blob file overwrite is simpler than other + * replace operations. It's simply a matter + * deleting the old blob file, and creating a + * new one. We may need to be careful of + * cursors when we have support for blob + * cursors. + * That means that we can skip the replpair + * call. + */ + blob_id = (db_seq_t)hblob.id; + GET_BLOB_SIZE(env, hblob, blob_size, ret); + if (ret != 0) + return (ret); + if ((ret = __blob_repl(dbc, + nval, blob_id, &new_blob_id, &blob_size)) == 0) { + SET_BLOB_ID(&hblob, new_blob_id, HBLOB); + SET_BLOB_SIZE(&hblob, blob_size, HBLOB); + } + } + if (ret == 0) { + if (DBC_LOGGING(dbc)) { + if ((ret = __ham_replace_log(dbp, + dbc->txn, &LSN(hcp->page), 0, + PGNO(hcp->page), + (u_int32_t)H_DATAINDEX(hcp->indx), + &LSN(hcp->page), 0, + OP_SET(H_BLOB, hcp->page), &old_rec, + OP_SET(H_BLOB, hcp->page), + &new_rec)) != 0) { + memcpy(HKEYDATA_DATA(&hblob), + old_rec.data, HBLOB_DSIZE); + __os_free(env, old_rec.data); + return (ret); + } + + } else + LSN_NOT_LOGGED(LSN(hcp->page)); + } + /* Copy the updated blob data back to the page. */ + memcpy(hk, &hblob, HBLOB_SIZE); + if (old_rec.data != NULL) + __os_free(env, old_rec.data); + return (ret); } else if (!F_ISSET(nval, DB_DBT_PARTIAL)) { /* Put/overwrite */ memcpy(&tmp_val, nval, sizeof(*nval)); F_SET(&tmp_val, DB_DBT_PARTIAL); tmp_val.doff = 0; - hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - if (HPAGE_PTYPE(hk) == H_OFFPAGE) + if (HPAGE_PTYPE(hk) == H_OFFPAGE) { memcpy(&tmp_val.dlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); - else + } else tmp_val.dlen = LEN_HDATA(dbp, hcp->page, hcp->hdr->dbmeta.pagesize, hcp->indx); myval = &tmp_val; @@ -1865,7 +1989,7 @@ __ham_overwrite(dbc, nval, flags) /* Regular partial put */ myval = nval; - return (__ham_replpair(dbc, myval, +end: return (__ham_replpair(dbc, myval, F_ISSET(hcp, H_ISDUP) ? H_DUPLICATE : H_KEYDATA)); } @@ -1955,7 +2079,7 @@ __ham_lookup(dbc, key, sought, mode, pgnop) return (ret); } F_SET(hcp, H_NOMORE); - return (DB_NOTFOUND); + return (DBC_ERR(dbc, DB_NOTFOUND)); } /* diff --git a/src/hash/hash.src b/src/hash/hash.src index e544c6f3..f56a9c5b 100644 --- a/src/hash/hash.src +++ b/src/hash/hash.src @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ diff --git a/src/hash/hash_compact.c b/src/hash/hash_compact.c index 83b5ffb1..79fb6004 100644 --- a/src/hash/hash_compact.c +++ b/src/hash/hash_compact.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. * $Id$ */ @@ -118,7 +118,8 @@ __ham_compact_int(dbc, start, stop, factor, c_data, donep, flags) break; origpgno = pgno; if ((ret = __db_truncate_root(dbc, hcp->page, - H_DATAINDEX(hcp->indx), &pgno, 0)) != 0) + H_DATAINDEX(hcp->indx), + &pgno, 0, &pgs_done)) != 0) break; if (pgno != origpgno) { memcpy(HOFFDUP_PGNO(H_PAIRDATA(dbp, @@ -247,7 +248,7 @@ __ham_compact_bucket(dbc, c_data, pgs_donep) if (check_trunc && PREV_PGNO(pg) != PGNO_INVALID && PGNO(pg) > c_data->compact_truncate && (ret = __db_exchange_page(dbc, &pg, - hcp->page, PGNO_INVALID, DB_EXCH_FREE)) != 0) + hcp->page, PGNO_INVALID, DB_EXCH_FREE, pgs_donep)) != 0) break; if (pgno != PGNO(pg)) (*pgs_donep)++; @@ -400,8 +401,8 @@ __ham_truncate_overflow(dbc, indx, c_data, pgs_done) if ((ret = __memp_dirty(dbp->mpf, &hcp->page, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) return (ret); - if ((ret = - __db_truncate_root(dbc, hcp->page, indx, &pgno, 0)) != 0) + if ((ret = __db_truncate_root(dbc, + hcp->page, indx, &pgno, 0, pgs_done)) != 0) return (ret); if (pgno != origpgno) { memcpy(HOFFPAGE_PGNO(P_ENTRY(dbp, hcp->page, indx)), @@ -410,7 +411,8 @@ __ham_truncate_overflow(dbc, indx, c_data, pgs_done) c_data->compact_pages--; } } - if ((ret = __db_truncate_overflow(dbc, pgno, NULL, c_data)) != 0) + if ((ret = + __db_truncate_overflow(dbc, pgno, NULL, c_data, pgs_done)) != 0) return (ret); return (0); } @@ -434,10 +436,11 @@ __ham_compact_hash(dbp, ip, txn, c_data) HMETA *meta; PAGE *oldpage; db_pgno_t free_pgno, last_pgno, pgno, start_pgno; - int flags, local_txn, ret, t_ret; + int flags, local_txn, pgs_done, ret, t_ret; u_int32_t bucket, i, size; local_txn = IS_DB_AUTO_COMMIT(dbp, txn); + pgs_done = 0; oldpage = NULL; dbc = NULL; LOCK_INIT(lock); @@ -506,8 +509,8 @@ __ham_compact_hash(dbp, ip, txn, c_data) flags = 0; else flags = DB_EXCH_FREE; - if ((ret = __db_exchange_page(dbc, - &oldpage, NULL, free_pgno, flags)) != 0) + if ((ret = __db_exchange_page(dbc, &oldpage, + NULL, free_pgno, flags, &pgs_done)) != 0) goto err; } else if (pgno >= last_pgno) { if ((ret = __db_free(dbc, oldpage, 0)) != 0) @@ -526,7 +529,8 @@ __ham_compact_hash(dbp, ip, txn, c_data) } if (ret == 0 && F_ISSET(dbp, DB_AM_SUBDB) && PGNO(hcp->hdr) > c_data->compact_truncate) - ret = __db_move_metadata(dbc, (DBMETA**)&hcp->hdr, c_data); + ret = __db_move_metadata(dbc, (DBMETA**)&hcp->hdr, + c_data, &pgs_done); err: if (oldpage != NULL && (t_ret = __memp_fput(dbp->mpf, dbc->thread_info, oldpage, dbc->priority)) != 0 && ret == 0) diff --git a/src/hash/hash_conv.c b/src/hash/hash_conv.c index fa084f2a..7a53a037 100644 --- a/src/hash/hash_conv.c +++ b/src/hash/hash_conv.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -104,7 +104,12 @@ __ham_mswap(env, pg) SWAP32(p); /* h_charkey */ for (i = 0; i < NCACHED; ++i) SWAP32(p); /* spares */ - p += 59 * sizeof(u_int32_t); /* unused */ + SWAP32(p); /* threshold */ + SWAP32(p); /* file id lo */ + SWAP32(p); /* file id hi */ + SWAP32(p); /* sdb id lo */ + SWAP32(p); /* sdb id hi */ + p += 54 * sizeof(u_int32_t); /* unused */ SWAP32(p); /* crypto_magic */ return (0); } diff --git a/src/hash/hash_dup.c b/src/hash/hash_dup.c index 879c33d7..523d7227 100644 --- a/src/hash/hash_dup.c +++ b/src/hash/hash_dup.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. */ /* * Copyright (c) 1990, 1993, 1994 @@ -368,6 +368,7 @@ finish: if (ret == 0) { off += len + 2 * sizeof(db_indx_t); } break; + case H_BLOB: default: ret = __db_pgfmt(env, hcp->pgno); break; @@ -772,7 +773,7 @@ __ham_dsearch(dbc, dbt, offp, cmpp, flags) DBT cur; HASH_CURSOR *hcp; db_indx_t i, len; - int (*func) __P((DB *, const DBT *, const DBT *)); + int (*func) __P((DB *, const DBT *, const DBT *, size_t *)); u_int8_t *data; dbp = dbc->dbp; @@ -794,7 +795,7 @@ __ham_dsearch(dbc, dbt, offp, cmpp, flags) * we're done. In the latter case, if permitting partial * matches, it's not a failure. */ - *cmpp = func(dbp, dbt, &cur); + *cmpp = func(dbp, dbt, &cur, NULL); if (*cmpp == 0) break; if (*cmpp < 0 && dbp->dup_compare != NULL) { diff --git a/src/hash/hash_func.c b/src/hash/hash_func.c index baf6061c..1e83b00a 100644 --- a/src/hash/hash_func.c +++ b/src/hash/hash_func.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. */ /* * Copyright (c) 1990, 1993 diff --git a/src/hash/hash_meta.c b/src/hash/hash_meta.c index d9a35cb4..aefdffb8 100644 --- a/src/hash/hash_meta.c +++ b/src/hash/hash_meta.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ diff --git a/src/hash/hash_method.c b/src/hash/hash_method.c index 1da81e70..a05bcea6 100644 --- a/src/hash/hash_method.c +++ b/src/hash/hash_method.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -20,7 +20,7 @@ static int __ham_set_h_hash static int __ham_set_h_nelem __P((DB *, u_int32_t)); static int __ham_get_h_compare - __P((DB *, int (**)(DB *, const DBT *, const DBT *))); + __P((DB *, int (**)(DB *, const DBT *, const DBT *, size_t *))); /* * __ham_db_create -- @@ -153,7 +153,7 @@ __ham_set_h_hash(dbp, func) static int __ham_get_h_compare(dbp, funcp) DB *dbp; - int (**funcp) __P((DB *, const DBT *, const DBT *)); + int (**funcp) __P((DB *, const DBT *, const DBT *, size_t *)); { HASH *t; @@ -170,13 +170,13 @@ __ham_get_h_compare(dbp, funcp) * __ham_set_h_compare -- * Set the comparison function. * - * PUBLIC: int __ham_set_h_compare - * PUBLIC: __P((DB *, int (*)(DB *, const DBT *, const DBT *))); + * PUBLIC: int __ham_set_h_compare __P((DB *, + * PUBLIC: int (*)(DB *, const DBT *, const DBT *, size_t *))); */ int __ham_set_h_compare(dbp, func) DB *dbp; - int (*func) __P((DB *, const DBT *, const DBT *)); + int (*func) __P((DB *, const DBT *, const DBT *, size_t *)); { HASH *t; diff --git a/src/hash/hash_open.c b/src/hash/hash_open.c index 3d0bb220..0104a57f 100644 --- a/src/hash/hash_open.c +++ b/src/hash/hash_open.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. */ /* * Copyright (c) 1990, 1993, 1994 @@ -44,6 +44,7 @@ #include "db_config.h" #include "db_int.h" +#include "dbinc/blob.h" #include "dbinc/crypto.h" #include "dbinc/db_page.h" #include "dbinc/hash.h" @@ -149,6 +150,7 @@ __ham_metachk(dbp, name, hashm) int ret; env = dbp->env; + ret = 0; /* * At this point, all we know is that the magic number is for a Hash. @@ -168,6 +170,7 @@ __ham_metachk(dbp, name, hashm) case 7: case 8: case 9: + case 10: break; default: __db_errx(env, DB_STR_A("1126", @@ -230,6 +233,29 @@ __ham_metachk(dbp, name, hashm) /* Set the page size. */ dbp->pgsize = hashm->dbmeta.pagesize; + dbp->blob_threshold = hashm->blob_threshold; + GET_BLOB_FILE_ID(env, hashm, dbp->blob_file_id, ret); + if (ret != 0) + return (ret); + GET_BLOB_SDB_ID(env, hashm, dbp->blob_sdb_id, ret); + if (ret != 0) + return (ret); + /* Blob databases must be upgraded. */ + if (vers == 9 && (dbp->blob_file_id != 0 || dbp->blob_sdb_id != 0)) { + __db_errx(env, DB_STR_A("1208", +"%s: databases that support blobs must be upgraded.", "%s"), + name); + return (EINVAL); + } +#ifndef HAVE_64BIT_TYPES + if (dbp->blob_file_id != 0 || dbp->blob_sdb_id != 0) { + __db_errx(env, DB_STR_A("1202", + "%s: blobs require 64 integer compiler support.", "%s"), + name); + return (EINVAL); + } +#endif + /* Copy the file's ID. */ memcpy(dbp->fileid, hashm->dbmeta.uid, DB_FILE_ID_LEN); @@ -297,6 +323,9 @@ __ham_init_meta(dbp, meta, pgno, lsnp) meta->nelem = hashp->h_nelem; meta->h_charkey = hashp->h_hash(dbp, CHARKEY, sizeof(CHARKEY)); memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN); + meta->blob_threshold = dbp->blob_threshold; + SET_BLOB_META_FILE_ID(meta, dbp->blob_file_id, HMETA); + SET_BLOB_META_SDB_ID(meta, dbp->blob_sdb_id, HMETA); if (F_ISSET(dbp, DB_AM_DUP)) F_SET(&meta->dbmeta, DB_HASH_DUP); @@ -414,6 +443,12 @@ __ham_new_file(dbp, ip, txn, fhp, name) F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); pdbt.data = &pginfo; pdbt.size = sizeof(pginfo); + if (dbp->blob_threshold) { + if ((ret = __blob_generate_dir_ids( + dbp, txn, &dbp->blob_file_id)) != 0) + return (ret); + + } if ((ret = __os_calloc(dbp->env, 1, dbp->pgsize, &buf)) != 0) return (ret); meta = (HMETA *)buf; @@ -491,6 +526,13 @@ __ham_new_subdb(mdbp, dbp, ip, txn) LOCK_INIT(metalock); LOCK_INIT(mmlock); + if (dbp->blob_threshold) { + if ((ret = __blob_generate_dir_ids( + dbp, txn, &dbp->blob_sdb_id)) != 0) + return (ret); + + } + if ((ret = __db_cursor(mdbp, ip, txn, &dbc, CDB_LOCKING(env) ? DB_WRITECURSOR : 0)) != 0) return (ret); diff --git a/src/hash/hash_page.c b/src/hash/hash_page.c index 7576fe61..8e0f897d 100644 --- a/src/hash/hash_page.c +++ b/src/hash/hash_page.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. */ /* * Copyright (c) 1990, 1993, 1994 @@ -129,7 +129,7 @@ recheck: /* Fetch next page. */ if (NEXT_PGNO(hcp->page) == PGNO_INVALID) { F_SET(hcp, H_NOMORE); - return (DB_NOTFOUND); + return (DBC_ERR(dbc, DB_NOTFOUND)); } next_pgno = NEXT_PGNO(hcp->page); hcp->indx = 0; @@ -344,7 +344,7 @@ __ham_item_prev(dbc, mode, pgnop) if (hcp->pgno == PGNO_INVALID) { /* Beginning of bucket. */ F_SET(hcp, H_NOMORE); - return (DB_NOTFOUND); + return (DBC_ERR(dbc, DB_NOTFOUND)); } else if ((ret = __ham_next_cpage(dbc, hcp->pgno)) != 0) return (ret); @@ -371,7 +371,7 @@ __ham_item_prev(dbc, mode, pgnop) if (hcp->indx == 0) { /* Bucket was empty. */ F_SET(hcp, H_NOMORE); - return (DB_NOTFOUND); + return (DBC_ERR(dbc, DB_NOTFOUND)); } } @@ -497,7 +497,8 @@ __ham_insertpair(dbc, p, indxp, key_dbt, data_dbt, key_type, data_type) inp = P_INP(dbp, p); ksize = (key_type == H_OFFPAGE) ? key_dbt->size : HKEYDATA_SIZE(key_dbt->size); - dsize = (data_type == H_OFFPAGE || data_type == H_OFFDUP) ? + dsize = (data_type == H_OFFPAGE || + data_type == H_OFFDUP || data_type == H_BLOB) ? data_dbt->size : HKEYDATA_SIZE(data_dbt->size); increase = ksize + dsize; @@ -579,7 +580,8 @@ __ham_insertpair(dbc, p, indxp, key_dbt, data_dbt, key_type, data_type) else PUT_HKEYDATA(P_ENTRY(dbp, p, indx), key_dbt->data, key_dbt->size, key_type); - if (data_type == H_OFFPAGE || data_type == H_OFFDUP) + if (data_type == H_BLOB || + data_type == H_OFFPAGE || data_type == H_OFFDUP) memcpy(P_ENTRY(dbp, p, indx+1), data_dbt->data, data_dbt->size); else @@ -618,6 +620,8 @@ __ham_getindex(dbc, p, key, key_type, match, indx) { /* Since all entries are key/data pairs. */ DB_ASSERT(dbc->env, NUM_ENT(p)%2 == 0 ); + /* Blob files can only be stored as data items. */ + DB_ASSERT(dbc->env, key_type != H_BLOB ); /* Support pre 4.6 unsorted hash pages. */ if (p->type == P_HASH_UNSORTED) @@ -672,7 +676,7 @@ __ham_getindex_unsorted(dbc, p, key, match, indx) memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); if ((ret = __db_moff(dbc, key, pgno, tlen, - t->h_compare, &res)) != 0) + t->h_compare, &res, NULL)) != 0) return (ret); } break; @@ -681,7 +685,7 @@ __ham_getindex_unsorted(dbc, p, key, match, indx) DB_INIT_DBT(pg_dbt, HKEYDATA_DATA(hk), key->size); if (t->h_compare( - dbp, key, &pg_dbt) != 0) + dbp, key, &pg_dbt, NULL) != 0) break; } else if (key->size == LEN_HKEY(dbp, p, dbp->pgsize, i)) @@ -784,7 +788,7 @@ __ham_getindex_sorted(dbc, p, key, key_type, match, indxp) (void)__ua_memcpy(&off_pgno, HOFFPAGE_PGNO(offp), sizeof(db_pgno_t)); if ((ret = __db_moff(dbc, key, off_pgno, - itemlen, t->h_compare, &res)) != 0) + itemlen, t->h_compare, &res, NULL)) != 0) return (ret); } } else { @@ -799,7 +803,7 @@ __ham_getindex_sorted(dbc, p, key, key_type, match, indxp) (void)__ua_memcpy(&off_len, HOFFPAGE_TLEN(offp), sizeof(u_int32_t)); if ((ret = __db_moff(dbc, &tmp_dbt, off_pgno, - off_len, t->h_compare, &res)) != 0) + off_len, t->h_compare, &res, NULL)) != 0) return (ret); /* * Since we switched the key/match parameters @@ -810,7 +814,7 @@ __ham_getindex_sorted(dbc, p, key, key_type, match, indxp) } else if (t->h_compare != NULL) { /* Case 4, with a user comparison func */ DB_INIT_DBT(tmp_dbt, data, itemlen); - res = t->h_compare(dbp, key, &tmp_dbt); + res = t->h_compare(dbp, key, &tmp_dbt, NULL); } else { /* Case 4, without a user comparison func */ if ((res = memcmp(key->data, data, @@ -899,8 +903,8 @@ __ham_verify_sorted_page (dbc, p) sizeof(u_int32_t)); memcpy(&tpgno, HOFFPAGE_PGNO(H_PAIRKEY(dbp, p, i-2)), sizeof(db_pgno_t)); - if ((ret = __db_moff(dbc, - &curr_dbt, tpgno, tlen, t->h_compare, &res)) != 0) + if ((ret = __db_moff(dbc, &curr_dbt, + tpgno, tlen, t->h_compare, &res, NULL)) != 0) return (ret); } else if (HPAGE_TYPE(dbp, p, i) == H_OFFPAGE) { memset(&prev_dbt, 0, sizeof(prev_dbt)); @@ -910,8 +914,8 @@ __ham_verify_sorted_page (dbc, p) sizeof(u_int32_t)); memcpy(&tpgno, HOFFPAGE_PGNO(H_PAIRKEY(dbp, p, i)), sizeof(db_pgno_t)); - if ((ret = __db_moff(dbc, - &prev_dbt, tpgno, tlen, t->h_compare, &res)) != 0) + if ((ret = __db_moff(dbc, &prev_dbt, tpgno, tlen, + t->h_compare, &res, NULL)) != 0) return (ret); } else res = memcmp(prev, curr, min(curr_len, prev_len)); @@ -1047,9 +1051,11 @@ __ham_del_pair(dbc, flags, ppg) DBT data_dbt, key_dbt; DB_LSN new_lsn, *n_lsn, tmp_lsn; DB_MPOOLFILE *mpf; + HBLOB hblob; HASH_CURSOR *hcp; PAGE *n_pagep, *nn_pagep, *p, *p_pagep; db_ham_mode op; + db_seq_t blob_id; db_indx_t ndx; db_pgno_t chg_pgno, pgno, tmp_pgno; u_int32_t data_type, key_type, order; @@ -1067,6 +1073,8 @@ __ham_del_pair(dbc, flags, ppg) DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &hcp->page)) != 0) return (ret); p = hcp->page; + key_type = HPAGE_PTYPE(H_PAIRKEY(dbp, p, ndx)); + data_type = HPAGE_PTYPE(H_PAIRDATA(dbp, p, ndx)); /* * We optimize for the normal case which is when neither the key nor @@ -1075,8 +1083,7 @@ __ham_del_pair(dbc, flags, ppg) * to remove the big item and then update the page to remove the * entry referring to the big item. */ - if (!LF_ISSET(HAM_DEL_IGNORE_OFFPAGE) && - HPAGE_PTYPE(H_PAIRKEY(dbp, p, ndx)) == H_OFFPAGE) { + if (!LF_ISSET(HAM_DEL_IGNORE_OFFPAGE) && key_type == H_OFFPAGE) { memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_KEYINDEX(ndx))), sizeof(db_pgno_t)); ret = __db_doff(dbc, pgno); @@ -1084,7 +1091,13 @@ __ham_del_pair(dbc, flags, ppg) ret = 0; if (!LF_ISSET(HAM_DEL_IGNORE_OFFPAGE) && ret == 0) - switch (HPAGE_PTYPE(H_PAIRDATA(dbp, p, ndx))) { + switch (data_type) { + case H_BLOB: + memcpy(&hblob, + P_ENTRY(dbp, p, H_DATAINDEX(ndx)), HBLOB_SIZE); + blob_id = (db_seq_t)hblob.id; + ret = __blob_del(dbc, blob_id); + break; case H_OFFPAGE: memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_DATAINDEX(ndx))), @@ -1111,7 +1124,7 @@ __ham_del_pair(dbc, flags, ppg) /* Now log the delete off this page. */ if (DBC_LOGGING(dbc)) { hk = H_PAIRKEY(dbp, hcp->page, ndx); - if ((key_type = HPAGE_PTYPE(hk)) == H_OFFPAGE) { + if (key_type == H_OFFPAGE) { key_dbt.data = hk; key_dbt.size = HOFFPAGE_SIZE; } else { @@ -1120,9 +1133,12 @@ __ham_del_pair(dbc, flags, ppg) LEN_HKEY(dbp, hcp->page, dbp->pgsize, ndx); } hk = H_PAIRDATA(dbp, hcp->page, ndx); - if ((data_type = HPAGE_PTYPE(hk)) == H_OFFPAGE) { + if (data_type == H_OFFPAGE) { data_dbt.data = hk; data_dbt.size = HOFFPAGE_SIZE; + } else if (data_type == H_BLOB) { + data_dbt.data = hk; + data_dbt.size = HBLOB_SIZE; } else if (data_type == H_OFFDUP) { data_dbt.data = hk; data_dbt.size = HOFFDUP_SIZE; @@ -1404,6 +1420,8 @@ __ham_replpair(dbc, dbt, newtype) * unless it is an append, when we extend the offpage item, and * update the HOFFPAGE item on the current page to have the new size * via a delete/add. + * + * Updating a record won't cause it to become a blob file or vice versa. */ dbp = dbc->dbp; env = dbp->env; @@ -2464,15 +2482,18 @@ __ham_add_el(dbc, key, val, type) const DBT *pkey, *pdata; DB *dbp; DBT key_dbt, data_dbt; - DB_LSN new_lsn; + DB_LSN blob_lsn, new_lsn; DB_MPOOLFILE *mpf; HASH_CURSOR *hcp; HOFFPAGE doff, koff; + HBLOB dblob; PAGE *new_pagep; db_pgno_t next_pgno, pgno; + off_t file_size; + db_seq_t blob_id; u_int32_t data_size, data_type, key_size, key_type; u_int32_t pages, pagespace, pairsize; - int do_expand, is_keybig, is_databig, match, ret; + int do_expand, is_keybig, match, ret; dbp = dbc->dbp; mpf = dbp->mpf; @@ -2485,14 +2506,33 @@ __ham_add_el(dbc, key, val, type) dbc->thread_info, dbc->txn, DB_MPOOL_CREATE, &hcp->page)) != 0) return (ret); + /* + * Key is either: + * - On page + * - On overflow page(s) + */ key_size = HKEYDATA_PSIZE(key->size); - data_size = HKEYDATA_PSIZE(val->size); is_keybig = ISBIG(hcp, key->size); - is_databig = ISBIG(hcp, val->size); if (is_keybig) key_size = HOFFPAGE_PSIZE; - if (is_databig) + /* + * Data is either: + * - On page (H_KEYDATA or H_DUPLICATE) + * - On overflow page(s) + * - In a blob file + */ + data_type = + (dbp->blob_threshold && (val->size >= dbp->blob_threshold || + F_ISSET(val, DB_DBT_BLOB))) ? + H_BLOB : (ISBIG(hcp, val->size)) ? H_OFFPAGE : H_KEYDATA; + if (data_type == H_KEYDATA || data_type == H_DUPLICATE) + data_size = HKEYDATA_PSIZE(val->size); + else if (data_type == H_OFFPAGE) data_size = HOFFPAGE_PSIZE; + else { /* H_BLOB */ + DB_ASSERT(dbp->env, data_type == H_BLOB); + data_size = HBLOB_PSIZE; + } pairsize = key_size + data_size; @@ -2536,17 +2576,17 @@ __ham_add_el(dbc, key, val, type) * run out of file space before updating the key or data. */ if (dbc->txn == NULL && - dbp->mpf->mfp->maxpgno != 0 && (is_keybig || is_databig)) { + dbp->mpf->mfp->maxpgno != 0 && + (is_keybig || data_type == H_OFFPAGE)) { pagespace = P_MAXSPACE(dbp, dbp->pgsize); pages = 0; - if (is_databig) + if (data_type == H_OFFPAGE) pages = ((data_size - 1) / pagespace) + 1; - if (is_keybig) { + if (is_keybig) pages += ((key->size - 1) / pagespace) + 1; - if (pages > - (dbp->mpf->mfp->maxpgno - dbp->mpf->mfp->last_pgno)) - return (__db_space_err(dbp)); - } + if (pages > + (dbp->mpf->mfp->maxpgno - dbp->mpf->mfp->last_pgno)) + return (__db_space_err(dbp)); } if ((ret = __memp_dirty(mpf, @@ -2575,7 +2615,7 @@ __ham_add_el(dbc, key, val, type) key_type = H_KEYDATA; } - if (is_databig) { + if (data_type == H_OFFPAGE) { doff.type = H_OFFPAGE; UMRW_SET(doff.unused[0]); UMRW_SET(doff.unused[1]); @@ -2587,6 +2627,22 @@ __ham_add_el(dbc, key, val, type) data_dbt.size = sizeof(doff); pdata = &data_dbt; data_type = H_OFFPAGE; + } else if (data_type == H_BLOB) { + memset(&dblob, 0, HBLOB_SIZE); + dblob.type = H_BLOB; + blob_id = 0; + file_size = 0; + if ((ret = __blob_put( + dbc, (DBT *)val, &blob_id, &file_size, &blob_lsn)) != 0) + return (ret); + SET_BLOB_ID(&dblob, blob_id, HBLOB); + SET_BLOB_SIZE(&dblob, file_size, HBLOB); + SET_BLOB_FILE_ID(&dblob, dbp->blob_file_id, HBLOB); + SET_BLOB_SDB_ID(&dblob, dbp->blob_sdb_id, HBLOB); + data_dbt.data = &dblob; + data_dbt.size = sizeof(dblob); + pdata = &data_dbt; + data_type = H_BLOB; } else { pdata = val; data_type = type; @@ -2673,7 +2729,7 @@ __ham_add_el(dbc, key, val, type) /* * Special insert pair call -- copies a key/data pair from one page to * another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA, - * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we + * H_DUPLICATE, H_OFFDUP, H_BLOB). Since we log splits at a high level, we * do not need to log them here. * * dest_indx is an optional parameter, it serves several purposes: @@ -2715,7 +2771,7 @@ __ham_copypair(dbc, src_page, src_ndx, dest_page, dest_indx, log) tkey.data = HKEYDATA_DATA(P_ENTRY(dbp, src_page, kindx)); tkey.size = LEN_HKEYDATA(dbp, src_page, dbp->pgsize, kindx); } - if (dtype == H_OFFPAGE || dtype == H_OFFDUP) { + if (dtype == H_OFFPAGE || dtype == H_OFFDUP || dtype == H_BLOB) { tdata.data = P_ENTRY(dbp, src_page, dindx); tdata.size = LEN_HITEM(dbp, src_page, dbp->pgsize, dindx); } else { diff --git a/src/hash/hash_rec.c b/src/hash/hash_rec.c index 58965569..8a39d880 100644 --- a/src/hash/hash_rec.c +++ b/src/hash/hash_rec.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. */ /* * Copyright (c) 1995, 1996 @@ -232,6 +232,7 @@ __ham_insdel_42_recover(env, dbtp, lsnp, op, info) REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); ktype = DB_UNDO(op) || PAIR_ISKEYBIG(argp->opcode) ? H_OFFPAGE : H_KEYDATA; + /* TODO: May need a PAIR_ISDATABLOB here. */ if (PAIR_ISDATADUP(argp->opcode)) dtype = H_DUPLICATE; else if (DB_UNDO(op) || PAIR_ISDATABIG(argp->opcode)) @@ -957,9 +958,8 @@ __ham_metagroup_recover(env, dbtp, lsnp, op, info) if (IS_ZERO_LSN(LSN(pagep))) { REC_DIRTY(mpf, ip, dbc->priority, &pagep); - P_INIT(pagep, file_dbp->pgsize, - PGNO_INVALID, PGNO_INVALID, PGNO_INVALID, - 0, P_HASH); + P_INIT(pagep, file_dbp->pgsize, pgno, + PGNO_INVALID, PGNO_INVALID, 0, P_HASH); } if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) diff --git a/src/hash/hash_reclaim.c b/src/hash/hash_reclaim.c index ce3f6d9e..55980444 100644 --- a/src/hash/hash_reclaim.c +++ b/src/hash/hash_reclaim.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ diff --git a/src/hash/hash_stat.c b/src/hash/hash_stat.c index 683ce5a6..7ccf472d 100644 --- a/src/hash/hash_stat.c +++ b/src/hash/hash_stat.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -188,15 +188,19 @@ __ham_stat_print(dbc, flags) sp->hash_bfree, sp->hash_buckets, sp->hash_pagesize), "ff"); __db_dl(env, - "Number of overflow pages", (u_long)sp->hash_bigpages); - __db_dl_pct(env, "Number of bytes free in overflow pages", + "Number of blobs", (u_long)sp->hash_nblobs); + __db_dl(env, + "Number of hash overflow (big item) pages", + (u_long)sp->hash_bigpages); + __db_dl_pct(env, + "Number of bytes free in hash overflow (big item) pages", (u_long)sp->hash_big_bfree, DB_PCT_PG( sp->hash_big_bfree, sp->hash_bigpages, sp->hash_pagesize), "ff"); __db_dl(env, "Number of bucket overflow pages", (u_long)sp->hash_overflows); __db_dl_pct(env, - "Number of bytes free in bucket overflow pages", + "Number of bytes free on bucket overflow pages", (u_long)sp->hash_ovfl_free, DB_PCT_PG( sp->hash_ovfl_free, sp->hash_overflows, sp->hash_pagesize), "ff"); @@ -258,6 +262,9 @@ __ham_stat_callback(dbc, pagep, cookie, putp) switch (*H_PAIRDATA(dbp, pagep, indx)) { case H_OFFDUP: break; + case H_BLOB: + sp->hash_nblobs++; + /* fall through */ case H_OFFPAGE: case H_KEYDATA: sp->hash_ndata++; @@ -480,6 +487,7 @@ __ham_traverse(dbc, mode, callback, cookie, look_past_max) opgno, callback, cookie)) != 0) goto err; break; + case H_BLOB: case H_KEYDATA: case H_DUPLICATE: break; diff --git a/src/hash/hash_stub.c b/src/hash/hash_stub.c index 57337ea9..89307670 100644 --- a/src/hash/hash_stub.c +++ b/src/hash/hash_stub.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -127,6 +127,40 @@ __ham_46_hashmeta(dbp, real_name, flags, fhp, h, dirtyp) } int +__ham_60_hashmeta(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + COMPQUIET(real_name, NULL); + COMPQUIET(flags, 0); + COMPQUIET(fhp, NULL); + COMPQUIET(h, NULL); + COMPQUIET(dirtyp, NULL); + return (__db_no_hash_am(dbp->env)); +} + +int +__ham_60_hash(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + COMPQUIET(real_name, NULL); + COMPQUIET(flags, 0); + COMPQUIET(fhp, NULL); + COMPQUIET(h, NULL); + COMPQUIET(dirtyp, NULL); + return (__db_no_hash_am(dbp->env)); +} + +int __hamc_cmp(dbc, other_dbc, result) DBC *dbc, *other_dbc; int *result; diff --git a/src/hash/hash_upgrade.c b/src/hash/hash_upgrade.c index f66a7a58..17014a5c 100644 --- a/src/hash/hash_upgrade.c +++ b/src/hash/hash_upgrade.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -9,6 +9,7 @@ #include "db_config.h" #include "db_int.h" +#include "dbinc/blob.h" #include "dbinc/db_page.h" #include "dbinc/hash.h" #include "dbinc/db_upgrade.h" @@ -321,3 +322,93 @@ __ham_46_hash(dbp, real_name, flags, fhp, h, dirtyp) return (ret); } + +/* + * __ham_60_hashmeta-- + * Upgrade the version number. + * + * PUBLIC: int __ham_60_hashmeta + * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); + */ +int +__ham_60_hashmeta(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + HMETA33 *hmeta; + + COMPQUIET(flags, 0); + COMPQUIET(real_name, NULL); + COMPQUIET(fhp, NULL); + COMPQUIET(dbp, NULL); + hmeta = (HMETA33 *)h; + + hmeta->dbmeta.version = 10; + *dirtyp = 1; + + return (0); +} + +/* + * __ham_60_hash -- + * Upgrade the blob records on the database hash leaf pages. + * + * PUBLIC: int __ham_60_hash + * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); + */ +int +__ham_60_hash(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + HBLOB60 hb60; + HBLOB60P1 hb60p1; + HKEYDATA *hk; + db_seq_t blob_id, blob_size, file_id, sdb_id; + db_indx_t indx; + int ret; + + COMPQUIET(flags, 0); + COMPQUIET(real_name, NULL); + COMPQUIET(fhp, NULL); + ret = 0; + + DB_ASSERT(dbp->env, HBLOB60_SIZE == HBLOB_SIZE); + for (indx = 0; indx < NUM_ENT(h); indx += 2) { + hk = (HKEYDATA *)H_PAIRDATA(dbp, h, indx); + if (HPAGE_PTYPE(hk) == H_BLOB) { + memcpy(&hb60, hk, HBLOB60_SIZE); + memset(&hb60p1, 0, HBLOB_SIZE); + hb60p1.type = hb60.type; + hb60p1.encoding = hb60.encoding; + GET_BLOB60_ID(dbp->env, hb60, blob_id, ret); + if (ret != 0) + return (ret); + GET_BLOB60_SIZE(dbp->env, hb60, blob_size, ret); + if (ret != 0) + return (ret); + GET_BLOB60_FILE_ID(dbp->env, &hb60, file_id, ret); + if (ret != 0) + return (ret); + GET_BLOB60_SDB_ID(dbp->env, &hb60, sdb_id, ret); + if (ret != 0) + return (ret); + SET_BLOB_ID(&hb60p1, blob_id, HBLOB60P1); + SET_BLOB_SIZE(&hb60p1, blob_size, HBLOB60P1); + SET_BLOB_FILE_ID(&hb60p1, file_id, HBLOB60P1); + SET_BLOB_SDB_ID(&hb60p1, sdb_id, HBLOB60P1); + memcpy(hk, &hb60p1, HBLOB_SIZE); + *dirtyp = 1; + } + } + + return (ret); +} diff --git a/src/hash/hash_verify.c b/src/hash/hash_verify.c index 662e7ac8..302d42d8 100644 --- a/src/hash/hash_verify.c +++ b/src/hash/hash_verify.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -9,6 +9,7 @@ #include "db_config.h" #include "db_int.h" +#include "dbinc/blob.h" #include "dbinc/db_page.h" #include "dbinc/db_verify.h" #include "dbinc/btree.h" @@ -47,6 +48,7 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags) int i, ret, t_ret, isbad; u_int32_t pwr, mbucket; u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); + db_seq_t blob_id; env = dbp->env; isbad = 0; @@ -164,6 +166,55 @@ __ham_vrfy_meta(dbp, vdp, m, pgno, flags) } } +/* + * Where 64-bit integer support is not available, + * return an error if the file has any blobs. + */ + t_ret = 0; +#ifdef HAVE_64BIT_TYPES + GET_BLOB_FILE_ID(env, m, blob_id, t_ret); + if (t_ret != 0) { + isbad = 1; + EPRINT((env, DB_STR_A("1178", + "Page %lu: blob file id overflow.", "%lu"), (u_long)pgno)); + if (ret == 0) + ret = t_ret; + } + t_ret = 0; + GET_BLOB_SDB_ID(env, m, blob_id, t_ret); + if (t_ret != 0) { + isbad = 1; + EPRINT((env, DB_STR_A("1179", + "Page %lu: blob subdatabase id overflow.", + "%lu"), (u_long)pgno)); + if (ret == 0) + ret = t_ret; + } +#else /* HAVE_64BIT_TYPES */ + /* + * db_seq_t is an int on systems that do not have 64 integer types, so + * this will compile and run. + */ + GET_BLOB_FILE_ID(env, m, blob_id, t_ret); + if (t_ret != 0 || blob_id != 0) { + isbad = 1; + EPRINT((env, DB_STR_A("1203", + "Page %lu: blobs require 64 integer compiler support.", + "%lu"), (u_long)pgno)); + if (ret == 0) + ret = t_ret; + } + GET_BLOB_SDB_ID(env, m, blob_id, t_ret); + if (t_ret != 0 || blob_id != 0) { + isbad = 1; + EPRINT((env, DB_STR_A("1204", + "Page %lu: blobs require 64 integer compiler support.", + "%lu"), (u_long)pgno)); + if (ret == 0) + ret == t_ret; + } +#endif + err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) ret = t_ret; if (LF_ISSET(DB_SALVAGE) && @@ -272,12 +323,15 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags) PAGE *h; u_int32_t i, flags; { + HBLOB hblob; HOFFDUP hod; HOFFPAGE hop; VRFY_CHILDINFO child; VRFY_PAGEINFO *pip; db_indx_t offset, len, dlen, elen; int ret, t_ret; + off_t blob_size; + db_seq_t blob_id, file_id, sdb_id; u_int8_t *databuf; if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) @@ -287,6 +341,38 @@ __ham_vrfy_item(dbp, vdp, pgno, h, i, flags) case H_KEYDATA: /* Nothing to do here--everything but the type field is data */ break; + case H_BLOB: + /* + * Blob item. Check that the blob file exists and is the same + * file size as is stored in the database record. + */ + memcpy(&hblob, P_ENTRY(dbp, h, i), HBLOB_SIZE); + blob_id = (db_seq_t)hblob.id; + GET_BLOB_SIZE(dbp->env, hblob, blob_size, ret); + if (ret != 0 || blob_size < 0) { + EPRINT((dbp->env, DB_STR_A("1181", + "Page %lu: blob file size value has overflowed", + "%lu"), (u_long)pip->pgno)); + ret = DB_VERIFY_BAD; + goto err; + } + file_id = (db_seq_t)hblob.file_id; + sdb_id = (db_seq_t)hblob.sdb_id; + if (file_id == 0 && sdb_id == 0) { + EPRINT((dbp->env, DB_STR_A("1184", + "Page %lu: invalid blob dir ids %llu %llu at item %lu", + "%lu %llu %llu %lu"), + (u_long)pip->pgno, (unsigned long long)file_id, + (unsigned long long)sdb_id, (u_long)i)); + ret = DB_VERIFY_BAD; + goto err; + } + if ((ret = __blob_vrfy(dbp->env, blob_id, + blob_size, file_id, sdb_id, pip->pgno, flags)) != 0) { + ret = DB_VERIFY_BAD; + goto err; + } + break; case H_DUPLICATE: /* Are we a datum or a key? Better be the former. */ if (i % 2 == 0) { @@ -822,15 +908,23 @@ __ham_salvage(dbp, vdp, pgno, h, handle, callback, flags) u_int32_t flags; { DBT dbt, key_dbt, unkdbt; + ENV *env; + HBLOB hblob; + char *prefix; db_pgno_t dpgno; int ret, err_ret, t_ret; - u_int32_t himark, i, ovfl_bufsz; - u_int8_t *hk, *p; + off_t blob_size, blob_offset, remaining; + u_int32_t blob_buf_size, himark, i, ovfl_bufsz; + u_int8_t *blob_buf, *hk, *p; + db_seq_t blob_id, file_id, sdb_id; void *buf, *key_buf; db_indx_t dlen, len, tlen; memset(&dbt, 0, sizeof(DBT)); dbt.flags = DB_DBT_REALLOC; + blob_buf = NULL; + blob_buf_size = 0; + env = dbp->env; DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1); @@ -840,9 +934,9 @@ __ham_salvage(dbp, vdp, pgno, h, handle, callback, flags) * Allocate a buffer for overflow items. Start at one page; * __db_safe_goff will realloc as needed. */ - if ((ret = __os_malloc(dbp->env, dbp->pgsize, &buf)) != 0) + if ((ret = __os_malloc(env, dbp->pgsize, &buf)) != 0) return (ret); - ovfl_bufsz = dbp->pgsize; + ovfl_bufsz = dbp->pgsize; himark = dbp->pgsize; for (i = 0;; i++) { @@ -886,6 +980,70 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len); 0, " ", handle, callback, 0, 0, vdp)) != 0) err_ret = ret; break; + case H_BLOB: + memcpy(&hblob, hk, HBLOB_SIZE); + blob_id = (db_seq_t)hblob.id; + GET_BLOB_SIZE(env, hblob, blob_size, ret); + if (ret != 0 || blob_size < 0) { + err_ret = DB_VERIFY_BAD; + continue; + } + file_id = (db_seq_t)hblob.file_id; + sdb_id = (db_seq_t)hblob.sdb_id; + /* Read the blob, in pieces if too large.*/ + blob_offset = 0; + if (blob_size > MEGABYTE) { + if (blob_buf_size < MEGABYTE) { + if ((ret = __os_realloc( + env, MEGABYTE, + &blob_buf)) != 0) { + err_ret = ret; + continue; + } + blob_buf_size = MEGABYTE; + } + } else if (blob_buf_size < blob_size) { + blob_buf_size = (u_int32_t)blob_size; + if ((ret = __os_realloc(env, + blob_buf_size, &blob_buf)) != 0) { + err_ret = ret; + continue; + } + } + dbt.data = blob_buf; + dbt.ulen = blob_buf_size; + remaining = blob_size; + prefix = " "; + do { + if ((ret = __blob_salvage(env, blob_id, + blob_offset, + (remaining < blob_buf_size ? + (size_t)remaining : blob_buf_size), + file_id, sdb_id, &dbt)) != 0) { + err_ret = DB_VERIFY_BAD; + break; + } + if (remaining > blob_buf_size) + F_SET( + vdp, SALVAGE_STREAM_BLOB); + else + F_CLR( + vdp, SALVAGE_STREAM_BLOB); + if ((ret = __db_vrfy_prdbt( + &dbt, 0, prefix, handle, + callback, 0, 0, vdp)) != 0) { + err_ret = ret; + break; + } + prefix = NULL; + blob_offset += dbt.size; + if (remaining < blob_buf_size) + remaining = 0; + else + remaining -= blob_buf_size; + } while (remaining > 0); + F_CLR(vdp, SALVAGE_STREAM_BLOB); + break; case H_OFFPAGE: if (len < HOFFPAGE_SIZE) { err_ret = DB_VERIFY_BAD; @@ -960,7 +1118,7 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len); */ memset(&key_dbt, 0, sizeof(key_dbt)); if ((ret = __os_malloc( - dbp->env, dbt.size, &key_buf)) != 0) + env, dbt.size, &key_buf)) != 0) return (ret); memcpy(key_buf, buf, dbt.size); key_dbt.data = key_buf; @@ -1002,7 +1160,7 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len); handle, callback, 0, 0, vdp)) != 0) err_ret = ret; } - __os_free(dbp->env, key_buf); + __os_free(env, key_buf); break; default: if (!LF_ISSET(DB_AGGRESSIVE)) @@ -1013,7 +1171,9 @@ keydata: memcpy(buf, HKEYDATA_DATA(hk), len); } } - __os_free(dbp->env, buf); + if (blob_buf != NULL) + __os_free(env, blob_buf); + __os_free(env, buf); if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0) return (t_ret); return ((ret == 0 && err_ret != 0) ? err_ret : ret); @@ -1129,7 +1289,7 @@ __ham_dups_unsorted(dbp, buf, len) { DBT a, b; db_indx_t offset, dlen; - int (*func) __P((DB *, const DBT *, const DBT *)); + int (*func) __P((DB *, const DBT *, const DBT *, size_t *)); memset(&a, 0, sizeof(DBT)); memset(&b, 0, sizeof(DBT)); @@ -1146,7 +1306,7 @@ __ham_dups_unsorted(dbp, buf, len) b.data = buf + offset + sizeof(db_indx_t); b.size = dlen; - if (a.data != NULL && func(dbp, &a, &b) > 0) + if (a.data != NULL && func(dbp, &a, &b, NULL) > 0) return (1); a.data = b.data; |