diff options
Diffstat (limited to 'src/heap')
-rw-r--r-- | src/heap/heap.c | 592 | ||||
-rw-r--r-- | src/heap/heap.src | 26 | ||||
-rw-r--r-- | src/heap/heap_auto.c | 28 | ||||
-rw-r--r-- | src/heap/heap_autop.c | 34 | ||||
-rw-r--r-- | src/heap/heap_backup.c | 2 | ||||
-rw-r--r-- | src/heap/heap_conv.c | 7 | ||||
-rw-r--r-- | src/heap/heap_method.c | 14 | ||||
-rw-r--r-- | src/heap/heap_open.c | 41 | ||||
-rw-r--r-- | src/heap/heap_rec.c | 216 | ||||
-rw-r--r-- | src/heap/heap_reclaim.c | 4 | ||||
-rw-r--r-- | src/heap/heap_stat.c | 11 | ||||
-rw-r--r-- | src/heap/heap_stub.c | 36 | ||||
-rw-r--r-- | src/heap/heap_upgrade.c | 106 | ||||
-rw-r--r-- | src/heap/heap_verify.c | 169 |
14 files changed, 1000 insertions, 286 deletions
diff --git a/src/heap/heap.c b/src/heap/heap.c index ab404658..7aec416b 100644 --- a/src/heap/heap.c +++ b/src/heap/heap.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2010, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -24,6 +24,8 @@ static int __heapc_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); static int __heapc_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); static int __heapc_reloc __P((DBC *, DBT *, DBT *)); static int __heapc_reloc_partial __P((DBC *, DBT *, DBT *)); +static void __heapc_search __P((DBC *, HEAPPG *, db_indx_t, + int, db_indx_t *, int *)); static int __heapc_split __P((DBC *, DBT *, DBT *, int)); /* @@ -134,12 +136,15 @@ __heap_bulk(dbc, data, flags) DB_HEAP_RID prev_rid, rid; DBT sdata; HEAP_CURSOR *cp; + HEAPBLOBHDR bhdr; HEAPHDR *hdr; HEAPSPLITHDR *shdr; PAGE *pg; db_lockmode_t lock_type; int is_key, ret; int32_t *offp; + off_t blob_size; + db_seq_t blob_id; u_int32_t data_size, key_size, needed, space; u_int8_t *dbuf, *np; @@ -183,6 +188,7 @@ __heap_bulk(dbc, data, flags) next_pg: rid.indx = cp->indx; rid.pgno = cp->pgno; + prev_rid = rid; pg = cp->page; /* @@ -213,6 +219,14 @@ next_pg: if (F_ISSET(hdr, HEAP_RECSPLIT)) { shdr = (HEAPSPLITHDR *)hdr; data_size = DB_ALIGN(shdr->tsize, sizeof(u_int32_t)); + } else if (F_ISSET(hdr, HEAP_RECBLOB)) { + memcpy(&bhdr, hdr, HEAPBLOBREC_SIZE); + GET_BLOB_SIZE(dbp->env, bhdr, blob_size, ret); + if (ret != 0) + return (ret); + if (blob_size > UINT32_MAX) + return (DB_BUFFER_SMALL); + data_size = (u_int32_t)blob_size; } else data_size = DB_ALIGN(hdr->size, sizeof(u_int32_t)); needed += 2 * sizeof(*offp) + data_size; @@ -250,13 +264,21 @@ next_pg: if ((ret = __heapc_gsplit( dbc, &sdata, NULL, NULL)) != 0) return (ret); - } else { + } else if (F_ISSET(hdr, HEAP_RECBLOB)) { + memcpy(&bhdr, hdr, HEAPBLOBREC_SIZE); + blob_id = (db_seq_t)bhdr.id; + if ((ret = __blob_bulk( + dbc, data_size, blob_id, np)) != 0) + return (ret); + }else { memcpy(np, (u_int8_t *)hdr + sizeof(HEAPHDR), hdr->size); } *offp-- = (int32_t)(np - dbuf); if (F_ISSET(hdr, HEAP_RECSPLIT)) *offp-- = (int32_t)shdr->tsize; + else if (F_ISSET(hdr, HEAP_RECBLOB)) + *offp-- = (int32_t)data_size; else *offp-- = (int32_t)hdr->size; np += data_size; @@ -296,7 +318,6 @@ __heapc_close(dbc, root_pgno, rmroot) db_pgno_t root_pgno; int *rmroot; { - DB_MPOOLFILE *mpf; HEAP_CURSOR *cp; int ret; @@ -304,7 +325,6 @@ __heapc_close(dbc, root_pgno, rmroot) COMPQUIET(rmroot, 0); cp = (HEAP_CURSOR *)dbc->internal; - mpf = dbc->dbp->mpf; ret = 0; /* Release the page/lock held by the cursor. */ @@ -325,11 +345,14 @@ __heapc_del(dbc, flags) DB_MPOOLFILE *mpf; DBT hdr_dbt, log_dbt; HEAP *h; + HEAPBLOBHDR bhdr; HEAPHDR *hdr; HEAPPG *rpage; HEAP_CURSOR *cp; db_pgno_t region_pgno; - int oldspacebits, ret, spacebits, t_ret; + int ret, t_ret; + db_seq_t blob_id; + u_int32_t oldspacebits, spacebits; u_int16_t data_size, size; dbp = dbc->dbp; @@ -337,6 +360,7 @@ __heapc_del(dbc, flags) h = dbp->heap_internal; cp = (HEAP_CURSOR *)dbc->internal; rpage = NULL; + ret = 0; COMPQUIET(flags, 0); /* @@ -377,6 +401,14 @@ start: if (STD_LOCKING(dbc) && (ret = __db_lget(dbc, next_rid.indx = 0; } + /* Delete the blob file. */ + if (F_ISSET(hdr, HEAP_RECBLOB)) { + memcpy(&bhdr, hdr, HEAPBLOBREC_SIZE); + blob_id = (db_seq_t)bhdr.id; + if ((ret = __blob_del(dbc, blob_id)) != 0) + return (ret); + } + /* Log the deletion. */ if (DBC_LOGGING(dbc)) { hdr_dbt.data = hdr; @@ -384,8 +416,9 @@ start: if (STD_LOCKING(dbc) && (ret = __db_lget(dbc, log_dbt.data = (u_int8_t *)hdr + hdr_dbt.size; log_dbt.size = data_size; if ((ret = __heap_addrem_log(dbp, dbc->txn, &LSN(cp->page), - 0, DB_REM_HEAP, cp->pgno, (u_int32_t)cp->indx, - size, &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) + 0, OP_SET(DB_REM_HEAP, cp->page), + cp->pgno, (u_int32_t)cp->indx, size, + &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) goto err; } else LSN_NOT_LOGGED(LSN(cp->page)); @@ -414,7 +447,7 @@ start: if (STD_LOCKING(dbc) && (ret = __db_lget(dbc, dbc->thread_info, NULL, DB_MPOOL_DIRTY, &rpage)) != 0) goto err; HEAP_SETSPACE(dbp, rpage, - cp->pgno - region_pgno - 1, spacebits); + (cp->pgno - region_pgno) - 1, spacebits); } err: DB_ASSERT(dbp->env, ret != DB_PAGE_NOTFOUND); @@ -443,7 +476,8 @@ err: DB_ASSERT(dbp->env, ret != DB_PAGE_NOTFOUND); /* * __heap_ditem -- - * Remove an item from a page. + * Remove an item from a page. Note when deleting blob records that the file + * has to be deleted separate from calling this function. * * PUBLIC: int __heap_ditem * PUBLIC: __P((DBC *, PAGE *, u_int32_t, u_int32_t)); @@ -537,19 +571,21 @@ __heapc_get(dbc, key, data, flags, pgnop) DB_MPOOLFILE *mpf; DB_LOCK meta_lock; DBT tmp_val; - HEAP *h; + HEAPBLOBHDR bhdr; HEAPHDR *hdr; HEAPMETA *meta; HEAPPG *dpage; HEAP_CURSOR *cp; db_lockmode_t lock_type; db_pgno_t pgno; - int cmp, f_indx, found, getpage, indx, ret; + int cmp, np_inc, f_indx, found, getpage, indx, ret; + off_t blob_size; + db_seq_t blob_id; dbp = dbc->dbp; mpf = dbp->mpf; - h = dbp->heap_internal; cp = (HEAP_CURSOR *)dbc->internal; + pgno = PGNO_INVALID; LOCK_INIT(meta_lock); COMPQUIET(pgnop, NULL); @@ -564,7 +600,7 @@ __heapc_get(dbc, key, data, flags, pgnop) else lock_type = DB_LOCK_READ; - ret = 0; + np_inc = ret = 0; found = getpage = FALSE; meta = NULL; dpage = NULL; @@ -579,7 +615,7 @@ __heapc_get(dbc, key, data, flags, pgnop) ACQUIRE_CUR(dbc, lock_type, cp->pgno, 0, 0, ret); if (ret != 0) { if (ret == DB_PAGE_NOTFOUND) - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); goto err; } @@ -591,7 +627,7 @@ __heapc_get(dbc, key, data, flags, pgnop) hdr = (HEAPHDR *)P_ENTRY(dbp, dpage, cp->indx); if (F_ISSET(hdr, HEAP_RECSPLIT) && !F_ISSET(hdr, HEAP_RECFIRST)) { - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); goto err; } @@ -610,7 +646,7 @@ first: pgno = FIRST_HEAP_DPAGE; ACQUIRE_CUR(dbc, lock_type, pgno, 0, 0, ret); if (ret != 0 ) { if (ret == DB_PAGE_NOTFOUND) - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); goto err; } dpage = (HEAPPG *)cp->page; @@ -620,25 +656,10 @@ first: pgno = FIRST_HEAP_DPAGE; * finding first non-split record or first piece of a * split record, then set up cursor. */ - if (TYPE(dpage) == P_HEAP && NUM_ENT(dpage) != 0) { - for (indx = 0; - indx <= HEAP_HIGHINDX(dpage); indx++) { - if (HEAP_OFFSETTBL( - dbp, dpage)[indx] == 0) - continue; - hdr = (HEAPHDR *)P_ENTRY( - dbp, dpage, indx); - if (!F_ISSET(hdr, HEAP_RECSPLIT) || - F_ISSET(hdr, HEAP_RECFIRST)) { - found = TRUE; - cp->pgno = pgno; - cp->indx = indx; - break; - } - } - if (!found) - pgno++; - } else + __heapc_search(dbc, dpage, 0, 1, &cp->indx, &found); + if (found) + cp->pgno = pgno; + else pgno++; } break; @@ -668,7 +689,7 @@ last: pgno = PGNO_BASE_MD; while (!found) { /* Don't look earlier than the first data page. */ if (pgno < FIRST_HEAP_DPAGE) { - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); goto err; } @@ -683,33 +704,33 @@ last: pgno = PGNO_BASE_MD; * non-split record or the first piece of a split record * is found. */ - if (TYPE(dpage) == P_HEAP && NUM_ENT(dpage) != 0) { - for (indx = HEAP_HIGHINDX(dpage); - indx >= 0; indx--) { - if (HEAP_OFFSETTBL( - dbp, dpage)[indx] == 0) - continue; - hdr = (HEAPHDR *)P_ENTRY( - dbp, dpage, indx); - if (!F_ISSET(hdr, HEAP_RECSPLIT) || - F_ISSET(hdr, HEAP_RECFIRST)) { - found = TRUE; - cp->pgno = pgno; - cp->indx = indx; - break; - } - } - if (!found) - pgno--; - } else + __heapc_search(dbc, + dpage, HEAP_HIGHINDX(dpage), 1, &cp->indx, &found); + if (found) + cp->pgno = pgno; + else pgno--; } break; case DB_NEXT_NODUP: case DB_NEXT: - /* If cursor not initialize, behave as DB_FIRST */ - if (dbc->internal->pgno == PGNO_INVALID) - goto first; + case DB_PREV_NODUP: + case DB_PREV: + /* + * np_inc stores whether to increment or decrement when + * iterating through records on a page and pages in the file. + */ + if (flags == DB_NEXT_NODUP || flags == DB_NEXT) + np_inc = 1; + else + np_inc = -1; + /* If cursor not initialized, behave as DB_FIRST/DB_LAST */ + if (dbc->internal->pgno == PGNO_INVALID) { + if (np_inc == 1) + goto first; + else + goto last; + } /* * Acquire the current page with the lock we have already, @@ -720,108 +741,49 @@ last: pgno = PGNO_BASE_MD; goto err; dpage = (HEAPPG *)cp->page; - /* At end of current page, must get next page */ - if (cp->indx >= HEAP_HIGHINDX(dpage)) + if (np_inc == 1 && cp->indx >= HEAP_HIGHINDX(dpage)) + /* At end of current page, must get next page. */ getpage = TRUE; - - while (!found) { - if (getpage) { - pgno = cp->pgno + 1; - - /* Put current page/lock and get next one */ - ACQUIRE_CUR(dbc, lock_type, pgno, 0, 0, ret); - if (ret != 0) { - /* Beyond last page? */ - if (ret == DB_PAGE_NOTFOUND) - ret = DB_NOTFOUND; - goto err; - } - dpage = (HEAPPG *)cp->page; - - /* - * If page is a spam page or its a data - * page without entries, try again. - */ - if (TYPE(dpage) != P_HEAP || - (TYPE(dpage) == P_HEAP && - NUM_ENT(dpage) == 0)) - continue; - - /* When searching, indx gets bumped to 0 */ - cp->indx = -1; - getpage = FALSE; - } - + else if (np_inc == -1) { /* - * Bump index and loop through the offset table finding - * first nonzero entry. If the offset is for a split - * record, make sure it's the first piece of the split - * record. HEAP_HIGHINDX always points to highest filled - * entry on page. + * Loop through indexes and find first used slot. Check + * if already at the first slot. */ - cp->indx++; - for (indx=cp->indx; - indx <= HEAP_HIGHINDX(dpage); indx++) { - if (HEAP_OFFSETTBL(dbp, dpage)[indx] == 0) - continue; - hdr = (HEAPHDR *)P_ENTRY(dbp, dpage, indx); - if (!F_ISSET(hdr, HEAP_RECSPLIT) || - F_ISSET(hdr, HEAP_RECFIRST)) { - found = TRUE; - cp->indx = indx; - break; - } + for (f_indx=0; (f_indx <= HEAP_HIGHINDX(dpage)) && + (HEAP_OFFSETTBL(dbp, dpage)[f_indx] == 0); f_indx++) + { + /* No-op. */ } - /* Nothing of interest on page, so try next */ - if (!found) + /* At the beginning of current page, get new page */ + if (cp->indx == 0 || cp->indx <= f_indx) { + if (cp->pgno == FIRST_HEAP_DPAGE) { + ret = DBC_ERR(dbc, DB_NOTFOUND); + goto err; + } getpage = TRUE; - } - break; - case DB_PREV_NODUP: - case DB_PREV: - /* If cursor not initialize, behave as DB_LAST */ - if (dbc->internal->pgno == PGNO_INVALID) - goto last; - - /* - * Acquire the current page with the lock we have already, - * unless user has asked for a write lock. - */ - ACQUIRE_CUR(dbc, lock_type, cp->pgno, 0, 0, ret); - if (ret != 0) - goto err; - dpage = (HEAPPG *)cp->page; - - /* - * Loop through indexes and find first used slot. Check if - * already at the first slot. - */ - for (f_indx=0; (f_indx <= HEAP_HIGHINDX(dpage)) && - (HEAP_OFFSETTBL(dbp, dpage)[f_indx] == 0); f_indx++) ; - - /* At the beginning of current page, must get new page */ - if (cp->indx == 0 || cp->indx <= f_indx) { - if (cp->pgno == FIRST_HEAP_DPAGE) { - ret = DB_NOTFOUND; - goto err; } - getpage = TRUE; } while (!found) { if (getpage) { - pgno = cp->pgno - 1; - /* Do not go past first page */ + if (np_inc == -1) + pgno = cp->pgno - 1; + else if (np_inc == 1) + pgno = cp->pgno + 1; if (pgno < FIRST_HEAP_DPAGE) { - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); goto err; } - /* Put current page/lock and get prev page. */ + /* Put current page/lock and get next one */ ACQUIRE_CUR(dbc, lock_type, pgno, 0, 0, ret); - if (ret != 0) + if (ret != 0) { + if (np_inc == 1 && + ret == DB_PAGE_NOTFOUND) + /* Beyond last page */ + ret = DBC_ERR(dbc, DB_NOTFOUND); goto err; - + } dpage = (HEAPPG *)cp->page; /* @@ -833,31 +795,36 @@ last: pgno = PGNO_BASE_MD; NUM_ENT(dpage) == 0)) continue; - /* When search, this gets bumped to high indx */ - cp->indx = HEAP_HIGHINDX(dpage) + 1; + if (np_inc == 1) + /* + * When searching, indx gets + * bumped to 0 + */ + cp->indx = UINT16_MAX; + else + /* + * When searching, indx gets bumped to + * high indx + */ + cp->indx = HEAP_HIGHINDX(dpage) + 1; getpage = FALSE; } /* - * Decrement index and loop through the offset table - * finding previous nonzero entry. + * Bump index and loop through the offset table finding + * first nonzero entry. If the offset is for a split + * record, make sure it's the first piece of the split + * record. HEAP_HIGHINDX always points to highest filled + * entry on page. */ - cp->indx--; - for (indx=cp->indx; - indx >= 0; indx--) { - if (HEAP_OFFSETTBL(dbp, dpage)[indx] == 0) - continue; - hdr = (HEAPHDR *)P_ENTRY(dbp, dpage, indx); - if (!F_ISSET(hdr, HEAP_RECSPLIT) || - F_ISSET(hdr, HEAP_RECFIRST)) { - found = TRUE; - cp->indx = indx; - break; - } - } - - /* Nothing of interest on page, so try previous */ + if (np_inc == -1) + cp->indx--; + else if (np_inc == 1) + cp->indx++; + __heapc_search(dbc, + dpage, cp->indx, np_inc, &cp->indx, &found); if (!found) + /* Nothing of interest on page, so try next */ getpage = TRUE; } break; @@ -871,7 +838,7 @@ last: pgno = PGNO_BASE_MD; /* First make sure we're trying to get a data page. */ if (pgno == PGNO_BASE_MD || pgno == HEAP_REGION_PGNO(dbp, pgno)) { - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); goto err; } @@ -880,7 +847,7 @@ last: pgno = PGNO_BASE_MD; if (ret != 0) { if (ret == DB_PAGE_NOTFOUND) - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); goto err; } dpage = (HEAPPG *)cp->page; @@ -889,14 +856,14 @@ last: pgno = PGNO_BASE_MD; if ((indx > HEAP_HIGHINDX(dpage)) || (HEAP_OFFSETTBL(dbp, dpage)[indx] == 0)) { DISCARD(dbc, cp->page, cp->lock, 0, ret); - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); goto err; } hdr = (HEAPHDR *)P_ENTRY(dbp, dpage, indx); if (F_ISSET(hdr, HEAP_RECSPLIT) && !F_ISSET(hdr, HEAP_RECFIRST)) { DISCARD(dbc, cp->page, cp->lock, 0, ret); - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); goto err; } @@ -911,16 +878,30 @@ last: pgno = PGNO_BASE_MD; if ((ret = __heapc_gsplit( dbc, &tmp_val, NULL, 0)) != 0) goto err; + } else if (F_ISSET(hdr, HEAP_RECBLOB)) { + memcpy(&bhdr, hdr, HEAPBLOBREC_SIZE); + blob_id = (db_seq_t)bhdr.id; + GET_BLOB_SIZE(dbc->env, bhdr, blob_size, ret); + if (ret != 0) + goto err; + if (blob_size > UINT32_MAX) { + ret = DB_BUFFER_SMALL; + goto err; + } + tmp_val.flags = DB_DBT_MALLOC; + if ((ret = __blob_get(dbc, &tmp_val, + blob_id, blob_size, NULL, 0)) != 0) + goto err; } else { tmp_val.data = (void *)((u_int8_t *)hdr + sizeof(HEAPHDR)); tmp_val.size = hdr->size; } - cmp = __bam_defcmp(dbp, &tmp_val, data); + cmp = __bam_defcmp(dbp, &tmp_val, data, NULL); if (F_ISSET(&tmp_val, DB_DBT_MALLOC)) __os_ufree(dbp->env, tmp_val.data); if (cmp != 0) { - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); goto err; } } @@ -928,7 +909,7 @@ last: pgno = PGNO_BASE_MD; break; case DB_NEXT_DUP: case DB_PREV_DUP: - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); goto err; default: /* DB_GET_RECNO, DB_JOIN_ITEM, DB_SET_RECNO are invalid */ @@ -959,6 +940,53 @@ err: if (ret == 0 ) { return (ret); } +/* + * __heapc_search -- + * Search a given a heap page, starting at a given index, for a viable heap + * record. Return the index of the found record in indxp. + */ +static void +__heapc_search(dbc, dpage, begin, dir, indxp, found) + DBC *dbc; + HEAPPG *dpage; + db_indx_t begin; + int dir; + db_indx_t *indxp; + int *found; +{ + DB *dbp; + HEAPHDR *hdr; + db_indx_t indx; + + dbp = dbc->dbp; + DB_ASSERT(dbp->env, dir == -1 || dir == 1); + + *found = FALSE; + if (TYPE(dpage) != P_HEAP || NUM_ENT(dpage) == 0) + return; + + indx = begin; + for (;;) { + if (HEAP_OFFSETTBL(dbp, dpage)[indx] != 0) { + hdr = (HEAPHDR *)P_ENTRY(dbp, dpage, indx); + if (!F_ISSET(hdr, HEAP_RECSPLIT) || + F_ISSET(hdr, HEAP_RECFIRST)) { + *found = TRUE; + *indxp = indx; + break; + } + } + if ((dir == -1 && indx == 0) || + (dir == 1 && indx == HEAP_HIGHINDX(dpage))) + break; + + if (dir == -1) + indx--; + else + indx++; + } +} + #undef IS_FIRST #define IS_FIRST (last_rid.pgno == PGNO_INVALID) /* @@ -993,6 +1021,7 @@ __heapc_reloc_partial(dbc, key, data) /* We only work on partial puts. */ DB_ASSERT(dbp->env, F_ISSET(data, DB_DBT_PARTIAL)); + DB_ASSERT(dbp->env, !F_ISSET(old_hdr, HEAP_RECBLOB)); /* * Start by calculating the data_size, total size of the new record, and @@ -1014,7 +1043,7 @@ __heapc_reloc_partial(dbc, key, data) dlen = old_size - doff; else dlen = data->dlen; - data_size = old_size - dlen + data->size; + data_size = (old_size - dlen) + data->size; } /* @@ -1075,8 +1104,8 @@ __heapc_reloc_partial(dbc, key, data) */ data_size = doff + (add_bytes ? data->size : 0); else - data_size = old_hdr->size - - dlen + (add_bytes ? data->size : 0); + data_size = (old_hdr->size - + dlen) + (add_bytes ? data->size : 0); data_size += remaining; if (data_size > buflen) { @@ -1120,7 +1149,7 @@ __heapc_reloc_partial(dbc, key, data) if (doff + dlen < old_hdr->size) { olddata += dlen; memcpy(buf, - olddata, old_hdr->size - doff - dlen); + olddata, (old_hdr->size - doff) - dlen); dlen = 0; } else /* @@ -1145,8 +1174,8 @@ __heapc_reloc_partial(dbc, key, data) log_dbt.size = DB_ALIGN( old_hdr->size, sizeof(u_int32_t)); if ((ret = __heap_addrem_log(dbp, dbc->txn, - &LSN(cp->page), 0, DB_REM_HEAP, cp->pgno, - (u_int32_t)cp->indx, old_size, + &LSN(cp->page), 0, OP_SET(DB_REM_HEAP, cp->page), + cp->pgno, (u_int32_t)cp->indx, old_size, &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) goto err; } else @@ -1185,7 +1214,8 @@ __heapc_reloc_partial(dbc, key, data) log_dbt.size = DB_ALIGN( old_hdr->size, sizeof(u_int32_t)); if ((ret = __heap_addrem_log(dbp, dbc->txn, - &LSN(cp->page), 0, DB_REM_HEAP, cp->pgno, + &LSN(cp->page), 0, + OP_SET(DB_REM_HEAP, cp->page), cp->pgno, (u_int32_t)cp->indx, old_size, &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) goto err; @@ -1197,7 +1227,8 @@ __heapc_reloc_partial(dbc, key, data) if (DBC_LOGGING(dbc)) { if ((ret = __heap_addrem_log(dbp, dbc->txn, - &LSN(cp->page), 0, DB_ADD_HEAP, cp->pgno, + &LSN(cp->page), 0, + OP_SET(DB_ADD_HEAP, cp->page), cp->pgno, (u_int32_t)cp->indx, old_size, &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) goto err; @@ -1231,7 +1262,7 @@ __heapc_reloc_partial(dbc, key, data) size -= sizeof(db_indx_t); /* Round down to a multiple of 4. */ size = DB_ALIGN( - size - sizeof(u_int32_t) + 1, sizeof(u_int32_t)); + (size - sizeof(u_int32_t)) + 1, sizeof(u_int32_t)); DB_ASSERT(dbp->env, size >= sizeof(HEAPSPLITHDR)); /* @@ -1261,7 +1292,8 @@ __heapc_reloc_partial(dbc, key, data) if (DBC_LOGGING(dbc)) { if ((ret = __heap_addrem_log(dbp, dbc->txn, &LSN(cp->page), 0, - DB_ADD_HEAP, cp->pgno, (u_int32_t)cp->indx, + OP_SET(DB_ADD_HEAP, cp->page), cp->pgno, + (u_int32_t)cp->indx, size, &hdr_dbt, &t_data, &LSN(cp->page))) != 0) goto err; } else @@ -1343,7 +1375,8 @@ next_pg: last_rid.pgno = cp->pgno; log_dbt.size = DB_ALIGN( old_hdr->size, sizeof(u_int32_t)); if ((ret = __heap_addrem_log(dbp, dbc->txn, - &LSN(cp->page), 0, DB_REM_HEAP, cp->pgno, + &LSN(cp->page), 0, + OP_SET(DB_REM_HEAP, cp->page), cp->pgno, (u_int32_t)cp->indx, old_size, &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) goto err; @@ -1355,7 +1388,8 @@ next_pg: last_rid.pgno = cp->pgno; if (DBC_LOGGING(dbc)) { if ((ret = __heap_addrem_log(dbp, dbc->txn, - &LSN(cp->page), 0, DB_ADD_HEAP, cp->pgno, + &LSN(cp->page), 0, + OP_SET(DB_ADD_HEAP, cp->page), cp->pgno, (u_int32_t)cp->indx, old_size, &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) goto err; @@ -1397,6 +1431,8 @@ __heapc_reloc(dbc, key, data) memset(&hdr_dbt, 0, sizeof(DBT)); memset(&log_dbt, 0, sizeof(DBT)); COMPQUIET(key, NULL); + /* Blob database records never change size. */ + DB_ASSERT(dbp->env, !F_ISSET(old_hdr, HEAP_RECBLOB)); /* * We are updating an existing record, which will grow into a split @@ -1436,7 +1472,8 @@ __heapc_reloc(dbc, key, data) log_dbt.size = DB_ALIGN( old_hdr->size, sizeof(u_int32_t)); if ((ret = __heap_addrem_log(dbp, dbc->txn, - &LSN(cp->page), 0, DB_REM_HEAP, cp->pgno, + &LSN(cp->page), 0, + OP_SET(DB_REM_HEAP, cp->page), cp->pgno, (u_int32_t)cp->indx, old_size, &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) goto err; @@ -1471,7 +1508,7 @@ __heapc_reloc(dbc, key, data) size -= sizeof(db_indx_t); /* Round down to a multiple of 4. */ size = DB_ALIGN( - size - sizeof(u_int32_t) + 1, sizeof(u_int32_t)); + (size - sizeof(u_int32_t)) + 1, sizeof(u_int32_t)); DB_ASSERT(dbp->env, size >= sizeof(HEAPSPLITHDR)); new_hdr.std_hdr.size = (u_int16_t)(size - sizeof(HEAPSPLITHDR)); @@ -1495,7 +1532,8 @@ __heapc_reloc(dbc, key, data) if (DBC_LOGGING(dbc)) { if ((ret = __heap_addrem_log(dbp, dbc->txn, &LSN(cp->page), 0, - DB_ADD_HEAP, cp->pgno, (u_int32_t)cp->indx, + OP_SET(DB_ADD_HEAP, cp->page), + cp->pgno, (u_int32_t)cp->indx, size, &hdr_dbt, &t_data, &LSN(cp->page))) != 0) goto err; } else @@ -1565,7 +1603,8 @@ next_pg: if (next_rid.pgno != PGNO_INVALID) { log_dbt.size = DB_ALIGN( old_hdr->size, sizeof(u_int32_t)); if ((ret = __heap_addrem_log(dbp, dbc->txn, - &LSN(cp->page), 0, DB_REM_HEAP, cp->pgno, + &LSN(cp->page), 0, + OP_SET(DB_REM_HEAP, cp->page), cp->pgno, (u_int32_t)cp->indx, old_size, &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) goto err; @@ -1577,7 +1616,8 @@ next_pg: if (next_rid.pgno != PGNO_INVALID) { if (DBC_LOGGING(dbc)) { if ((ret = __heap_addrem_log(dbp, dbc->txn, - &LSN(cp->page), 0, DB_ADD_HEAP, cp->pgno, + &LSN(cp->page), 0, + OP_SET(DB_ADD_HEAP, cp->page), cp->pgno, (u_int32_t)cp->indx,old_size, &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) goto err; @@ -1608,20 +1648,26 @@ __heapc_put(dbc, key, data, flags, pgnop) DB *dbp; DBT hdr_dbt, log_dbt, new_data; DB_MPOOLFILE *mpf; + HEAPBLOBHDR bhdr; HEAPHDR hdr, *old_hdr; HEAP_CURSOR *cp; PAGE *rpage; db_pgno_t region_pgno; - int oldspace, ret, space, t_ret; - u_int32_t data_size, dlen, new_size, old_flags, old_size, tot_size; - u_int8_t *buf, *olddata, *src, *dest; + int buf_alloc, ret, t_ret; + off_t blob_size; + db_seq_t blob_id, new_blob_id; + u_int32_t data_size, dlen, new_size, old_flags, old_size; + u_int32_t oldspace, space, tot_size; + u_int8_t *buf, *olddata; dbp = dbc->dbp; mpf = dbp->mpf; cp = (HEAP_CURSOR *)dbc->internal; rpage = NULL; - buf = dest = src = NULL; + buf = NULL; + buf_alloc = 0; dlen = 0; + blob_id = new_blob_id = 0; if (flags != DB_CURRENT) { /* We're going to write following the get, so use RMW. */ @@ -1668,7 +1714,8 @@ __heapc_put(dbc, key, data, flags, pgnop) DB_ALIGN(old_hdr->size + HEAP_HDRSIZE(old_hdr), sizeof(u_int32_t)); if (old_size < sizeof(HEAPSPLITHDR)) old_size = sizeof(HEAPSPLITHDR); - if (F_ISSET(data, DB_DBT_PARTIAL)) { + /* Partial puts on blobs are dealt with in the blob code. */ + if (F_ISSET(data, DB_DBT_PARTIAL) && !F_ISSET(old_hdr, HEAP_RECBLOB)) { if (F_ISSET(old_hdr, HEAP_RECSPLIT)) tot_size = ((HEAPSPLITHDR *)old_hdr)->tsize; else @@ -1682,9 +1729,11 @@ __heapc_put(dbc, key, data, flags, pgnop) dlen = tot_size - data->doff; else dlen = data->dlen; - data_size = tot_size - dlen + data->size; + data_size = (tot_size - dlen) + data->size; } - } else + } else if F_ISSET(old_hdr, HEAP_RECBLOB) + data_size = HEAPBLOBREC_DSIZE; + else data_size = data->size; new_size = DB_ALIGN(data_size + sizeof(HEAPHDR), sizeof(u_int32_t)); if (new_size < sizeof(HEAPSPLITHDR)) @@ -1694,6 +1743,8 @@ __heapc_put(dbc, key, data, flags, pgnop) if (F_ISSET(old_hdr, HEAP_RECSPLIT) || (new_size > old_size && new_size - old_size > HEAP_FREESPACE(dbp, cp->page))) { + /* Blob database records never change size. */ + DB_ASSERT(dbp->env, !F_ISSET(old_hdr, HEAP_RECBLOB)); /* * We've got to split the record, not enough room on the * page. Splitting the record will remove old_size bytes and @@ -1707,13 +1758,14 @@ __heapc_put(dbc, key, data, flags, pgnop) memset(&new_data, 0, sizeof(DBT)); new_data.size = data_size; - if (F_ISSET(data, DB_DBT_PARTIAL)) { + if (F_ISSET(data, DB_DBT_PARTIAL) && !F_ISSET(old_hdr, HEAP_RECBLOB)) { /* * Before replacing the old data, we need to use it to build the * new data. */ if ((ret = __os_malloc(dbp->env, data_size, &buf)) != 0) goto err; + buf_alloc = 1; new_data.data = buf; /* @@ -1736,10 +1788,32 @@ __heapc_put(dbc, key, data, flags, pgnop) buf += data->size; /* Fill in remaining data from the old record, skipping dlen. */ - if (data->doff < old_hdr->size) { + if ((data->doff + data->dlen) < old_hdr->size) { olddata += data->doff + data->dlen; - memcpy(buf, - olddata, old_hdr->size - data->doff - data->dlen); + memcpy(buf, olddata, + (old_hdr->size - data->doff) - data->dlen); + } + } else if (F_ISSET(old_hdr, HEAP_RECBLOB)) { + data_size = HEAPBLOBREC_DSIZE; + new_data.size = HEAPBLOBREC_DSIZE; + if (F_ISSET(data, DB_DBT_BLOB_REC)) { + DB_ASSERT(dbp->env, + F_ISSET(((HEAPHDR *)data->data), HEAP_RECBLOB)); + new_data.data = HEAPBLOBREC_DATA(data->data); + } else { + memcpy(&bhdr, old_hdr, HEAPBLOBREC_SIZE); + blob_id = (db_seq_t)bhdr.id; + GET_BLOB_SIZE(dbp->env, bhdr, blob_size, ret); + if (ret != 0) + goto err; + if ((ret = __blob_repl(dbc, + data, blob_id, &new_blob_id, &blob_size)) != 0) + goto err; + bhdr.std_hdr.flags = HEAP_RECBLOB; + bhdr.std_hdr.size = HEAPBLOBREC_DSIZE; + SET_BLOB_SIZE(&bhdr, blob_size, HEAPBLOBHDR); + SET_BLOB_ID(&bhdr, new_blob_id, HEAPBLOBHDR); + new_data.data = HEAPBLOBREC_DATA(&bhdr); } } else { new_data.data = data->data; @@ -1751,19 +1825,23 @@ __heapc_put(dbc, key, data, flags, pgnop) */ memset(&hdr, 0, sizeof(HEAPHDR)); hdr.size = data_size; + if (F_ISSET(old_hdr, HEAP_RECBLOB)) + hdr.flags = HEAP_RECBLOB; if (DBC_LOGGING(dbc)) { hdr_dbt.data = old_hdr; hdr_dbt.size = HEAP_HDRSIZE(old_hdr); log_dbt.data = (u_int8_t *)old_hdr + hdr_dbt.size; log_dbt.size = DB_ALIGN(old_hdr->size, sizeof(u_int32_t)); if ((ret = __heap_addrem_log(dbp, dbc->txn, &LSN(cp->page), - 0, DB_REM_HEAP, cp->pgno, (u_int32_t)cp->indx, + 0, OP_SET(DB_REM_HEAP, cp->page), cp->pgno, + (u_int32_t)cp->indx, old_size, &hdr_dbt, &log_dbt, &LSN(cp->page))) != 0) goto err; hdr_dbt.data = &hdr; hdr_dbt.size = HEAP_HDRSIZE(&hdr); if ((ret = __heap_addrem_log(dbp, dbc->txn, &LSN(cp->page), - 0, DB_ADD_HEAP, cp->pgno, (u_int32_t)cp->indx, + 0, OP_SET(DB_ADD_HEAP, cp->page), cp->pgno, + (u_int32_t)cp->indx, new_size, &hdr_dbt, &new_data, &LSN(cp->page))) != 0) goto err; } else @@ -1788,14 +1866,14 @@ __heapc_put(dbc, key, data, flags, pgnop) dbc->thread_info, NULL, DB_MPOOL_DIRTY, &rpage)) != 0) goto err; - HEAP_SETSPACE(dbp, rpage, cp->pgno - region_pgno - 1, space); + HEAP_SETSPACE(dbp, rpage, (cp->pgno - region_pgno) - 1, space); } err: DB_ASSERT(dbp->env, ret != DB_PAGE_NOTFOUND); if (rpage != NULL && (t_ret = __memp_fput(mpf, dbc->thread_info, rpage, dbc->priority)) != 0 && ret == 0) ret = t_ret; - if (F_ISSET(data, DB_DBT_PARTIAL)) + if (buf_alloc) __os_free(dbp->env, new_data.data); if (ret != 0 && LOCK_ISSET(cp->lock)) @@ -1823,18 +1901,21 @@ __heap_getpage(dbc, size, avail) HEAP *h; HEAPPG *rpage; HEAP_CURSOR *cp; - db_pgno_t data_pgno, *lkd_pgs, meta_pgno, region_pgno, start_region; - int i, lk_mode, max, p, ret, space, start, t_ret; + db_pgno_t data_pgno, i, max, meta_pgno, p, region_pgno, start; + db_pgno_t start_region; + int ret, t_ret; + u_int32_t lk_mode, space; LOCK_INIT(meta_lock); + data_pgno = PGNO_INVALID; dbp = dbc->dbp; mpf = dbp->mpf; cp = (HEAP_CURSOR *)dbc->internal; h = dbp->heap_internal; start_region = region_pgno = h->curregion; max = HEAP_REGION_SIZE(dbp); - i = ret = t_ret = 0; - lkd_pgs = NULL; + i = 0; + ret = t_ret = 0; /* * The algorithm for finding a page: @@ -1897,10 +1978,10 @@ find: while ((ret = __memp_fget(mpf, ®ion_pgno, max = h->maxpgno - region_pgno; /* * Look in the bitmap for a page with sufficient free space. We use i - * in a slightly strange way. Because the 2-bits in the bitmap are only - * an estimate, there is a chance the data won't fit on the page we - * choose. In that case, we re-start the process and want to be able to - * resume this loop where we left off. + * in a slightly strange way. Because the 2-bits in the bitmap are + * only an estimate, there is a chance the data won't fit on the page + * we choose. In that case, we re-start the process and want to be + * able to resume this loop where we left off. */ for (; i < max; i++) { p = start + i; @@ -1908,7 +1989,7 @@ find: while ((ret = __memp_fget(mpf, ®ion_pgno, p -= max; if ((*avail = HEAP_SPACE(dbp, rpage, p)) > space) continue; - data_pgno = region_pgno + p + 1; + data_pgno = (region_pgno + p) + 1; ACQUIRE_CUR(dbc, DB_LOCK_WRITE, data_pgno, DB_LOCK_NOWAIT, 0, ret); /* @@ -2071,7 +2152,7 @@ pg_err: if (p != 0) { if (ret == DB_LOCK_NOTGRANTED) ret = 0; else if (ret != 0) { - /* + /* * Free up the metadata lock. If this was an error * other than a missing region page, bail. */ @@ -2165,7 +2246,7 @@ check: if (size + sizeof(db_indx_t) > HEAP_FREESPACE(dbp, cp->page)) { } } - h->curpgindx = data_pgno - region_pgno - 1; + h->curpgindx = (data_pgno - region_pgno) - 1; err: DB_ASSERT(dbp->env, ret != DB_PAGE_NOTFOUND); if (rpage != NULL && (t_ret = __memp_fput(mpf, dbc->thread_info, rpage, dbc->priority)) != 0 && ret == 0) @@ -2187,26 +2268,40 @@ __heap_append(dbc, key, data) DBT *data, *key; { DB *dbp; - DBT tmp_dbt; + DBT tmp_dbt, data_dbt; DB_HEAP_RID rid; + DB_LSN lsn; DB_MPOOLFILE *mpf; HEAPPG *rpage; + HEAPBLOBHDR bhdr; HEAPHDR hdr; HEAP_CURSOR *cp; db_indx_t indx; db_pgno_t region_pgno; - int ret, space, t_ret; + int is_blob, ret, t_ret; + off_t blob_size; + db_seq_t blob_id; u_int8_t avail; - u_int32_t data_size; + u_int32_t data_size, space; dbp = dbc->dbp; mpf = dbp->mpf; ret = t_ret = 0; rpage = NULL; cp = (HEAP_CURSOR *)dbc->internal; + blob_size = 0; + blob_id = 0; + + if (dbp->blob_threshold && + (data->size >= dbp->blob_threshold || F_ISSET(data, DB_DBT_BLOB))) + is_blob = 1; + else + is_blob = 0; /* Need data.size + header size, 4-byte aligned. */ - if (F_ISSET(data, DB_DBT_PARTIAL)) + if (is_blob) + data_size = HEAPBLOBREC_SIZE; + else if (F_ISSET(data, DB_DBT_PARTIAL)) data_size = DB_ALIGN(data->doff + data->size + sizeof(HEAPHDR), sizeof(u_int32_t)); else @@ -2222,24 +2317,42 @@ __heap_append(dbc, key, data) goto err; indx = HEAP_FREEINDX(cp->page); - memset(&hdr, 0, sizeof(HEAPHDR)); - hdr.size = data->size; - if (F_ISSET(data, DB_DBT_PARTIAL)) - hdr.size += data->doff; - tmp_dbt.data = &hdr; - tmp_dbt.size = sizeof(HEAPHDR); + if (is_blob) { + if ((ret = __blob_put( + dbc, data, &blob_id, &blob_size, &lsn)) != 0) + goto err; + memset(&bhdr, 0, HEAPBLOBREC_SIZE); + bhdr.std_hdr.flags = HEAP_RECBLOB; + bhdr.std_hdr.size = HEAPBLOBREC_DSIZE; + SET_BLOB_SIZE(&bhdr, blob_size, HEAPBLOBHDR); + SET_BLOB_ID(&bhdr, blob_id, HEAPBLOBHDR); + SET_BLOB_FILE_ID(&bhdr, dbp->blob_file_id, HEAPBLOBHDR); + tmp_dbt.data = &bhdr; + tmp_dbt.size = sizeof(HEAPHDR); + memset(&data_dbt, 0, sizeof(DBT)); + data_dbt.data = HEAPBLOBREC_DATA((&bhdr)); + data_dbt.size = HEAPBLOBREC_DSIZE; + } else { + memset(&hdr, 0, sizeof(HEAPHDR)); + hdr.size = data->size; + if (F_ISSET(data, DB_DBT_PARTIAL)) + hdr.size += data->doff; + tmp_dbt.data = &hdr; + tmp_dbt.size = sizeof(HEAPHDR); + memcpy(&data_dbt, data, sizeof(DBT)); + } /* Log the write. */ if (DBC_LOGGING(dbc)) { if ((ret = __heap_addrem_log(dbp, dbc->txn, &LSN(cp->page), - 0, DB_ADD_HEAP, cp->pgno, (u_int32_t)indx, - data_size, &tmp_dbt, data, &LSN(cp->page))) != 0) + 0, OP_SET(DB_ADD_HEAP, cp->page), cp->pgno, (u_int32_t)indx, + data_size, &tmp_dbt, &data_dbt, &LSN(cp->page))) != 0) goto err; } else LSN_NOT_LOGGED(LSN(cp->page)); if ((ret = __heap_pitem( - dbc, (PAGE *)cp->page, indx, data_size, &tmp_dbt, data)) != 0) + dbc, (PAGE *)cp->page, indx, data_size, &tmp_dbt, &data_dbt)) != 0) goto err; rid.pgno = cp->pgno; @@ -2256,7 +2369,7 @@ __heap_append(dbc, key, data) dbc->thread_info, NULL, DB_MPOOL_DIRTY, &rpage)) != 0) goto err; - HEAP_SETSPACE(dbp, rpage, cp->pgno - region_pgno - 1, space); + HEAP_SETSPACE(dbp, rpage, (cp->pgno - region_pgno) - 1, space); } err: DB_ASSERT(dbp->env, ret != DB_PAGE_NOTFOUND); @@ -2292,8 +2405,8 @@ __heapc_split(dbc, key, data, is_first) HEAP_CURSOR *cp; db_indx_t indx; db_pgno_t region_pgno; - int ret, spacebits, t_ret; - u_int32_t buflen, doff, left, size; + int ret, t_ret; + u_int32_t buflen, doff, left, size, spacebits; u_int8_t availbits, *buf; dbp = dbc->dbp; @@ -2308,7 +2421,6 @@ __heapc_split(dbc, key, data, is_first) ret = t_ret = 0; indx = 0; buf = NULL; - buflen = 0; /* * Write the record to multiple pages, in chunks starting from the end. @@ -2322,6 +2434,9 @@ __heapc_split(dbc, key, data, is_first) left += data->doff; } hdrs.tsize = left; + buflen = 1; + if ((ret = __os_malloc(dbp->env, buflen, &buf)) != 0) + return (ret); while (left > 0) { size = DB_ALIGN(left + sizeof(HEAPSPLITHDR), sizeof(u_int32_t)); if (size < sizeof(HEAPSPLITHDR)) @@ -2336,8 +2451,10 @@ __heapc_split(dbc, key, data, is_first) else hdrs.std_hdr.flags |= HEAP_RECFIRST; - if ((ret = __heap_getpage(dbc, size, &availbits)) != 0) + if ((ret = __heap_getpage(dbc, size, &availbits)) != 0) { + __os_free(dbp->env, buf); return (ret); + } /* * size is the total number of bytes being written to the page. @@ -2363,7 +2480,7 @@ __heapc_split(dbc, key, data, is_first) size -= sizeof(db_indx_t); /* Round down to a multiple of 4. */ size = DB_ALIGN( - size - sizeof(u_int32_t) + 1, sizeof(u_int32_t)); + (size - sizeof(u_int32_t)) + 1, sizeof(u_int32_t)); DB_ASSERT(dbp->env, size >= sizeof(HEAPSPLITHDR)); hdrs.std_hdr.size = (u_int16_t)(size - sizeof(HEAPSPLITHDR)); @@ -2401,10 +2518,10 @@ __heapc_split(dbc, key, data, is_first) * page minus the bytes we're taking from data. */ t_data.data = buf; - memset(buf, '\0', t_data.size - left + doff); - buf += t_data.size - left + doff; + memset(buf, 0, (t_data.size - left) + doff); + buf += (t_data.size - left) + doff; memcpy(buf, data->data, left - doff); - doff -= t_data.size - left + doff; + doff -= (t_data.size - left) + doff; buf = t_data.data; } hdr_dbt.data = &hdrs; @@ -2415,7 +2532,8 @@ __heapc_split(dbc, key, data, is_first) if (DBC_LOGGING(dbc)) { if ((ret = __heap_addrem_log(dbp, dbc->txn, &LSN(cp->page), 0, - DB_ADD_HEAP, cp->pgno, (u_int32_t)indx, + OP_SET(DB_ADD_HEAP, cp->page), + cp->pgno, (u_int32_t)indx, size, &hdr_dbt, &t_data, &LSN(cp->page))) != 0) goto err; } else @@ -2447,7 +2565,7 @@ __heapc_split(dbc, key, data, is_first) goto err; HEAP_SETSPACE(dbp, - rpage, cp->pgno - region_pgno - 1, spacebits); + rpage, (cp->pgno - region_pgno) - 1, spacebits); ret = __memp_fput(mpf, dbc->thread_info, rpage, dbc->priority); rpage = NULL; diff --git a/src/heap/heap.src b/src/heap/heap.src index 47bd4bb0..a08ad5eb 100644 --- a/src/heap/heap.src +++ b/src/heap/heap.src @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2010, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -29,7 +29,29 @@ INCLUDE * dbt: data that is to be added or deleted. * pagelsn: former lsn of the page. */ -BEGIN addrem 49 151 +BEGIN addrem 61 151 +OP opcode u_int32_t lu +DB fileid int32_t ld +ARG pgno db_pgno_t lu +ARG indx u_int32_t lu +ARG nbytes u_int32_t lu +HDR hdr DBT s +DATA dbt DBT s +POINTER pagelsn DB_LSN * lu +END + +BEGIN_COMPAT addrem 60 151 +OP opcode u_int32_t lu +DB fileid int32_t ld +ARG pgno db_pgno_t lu +ARG indx u_int32_t lu +ARG nbytes u_int32_t lu +HDR hdr DBT s +DBT dbt DBT s +POINTER pagelsn DB_LSN * lu +END + +BEGIN_COMPAT addrem 50 151 ARG opcode u_int32_t lu DB fileid int32_t ld ARG pgno db_pgno_t lu diff --git a/src/heap/heap_auto.c b/src/heap/heap_auto.c index 1cb705f4..9fdcce7a 100644 --- a/src/heap/heap_auto.c +++ b/src/heap/heap_auto.c @@ -9,16 +9,38 @@ #include "dbinc/txn.h" DB_LOG_RECSPEC __heap_addrem_desc[] = { - {LOGREC_ARG, SSZ(__heap_addrem_args, opcode), "opcode", "%lu"}, + {LOGREC_OP, SSZ(__heap_addrem_args, opcode), "opcode", "%lu"}, {LOGREC_DB, SSZ(__heap_addrem_args, fileid), "fileid", ""}, {LOGREC_ARG, SSZ(__heap_addrem_args, pgno), "pgno", "%lu"}, {LOGREC_ARG, SSZ(__heap_addrem_args, indx), "indx", "%lu"}, {LOGREC_ARG, SSZ(__heap_addrem_args, nbytes), "nbytes", "%lu"}, - {LOGREC_DBT, SSZ(__heap_addrem_args, hdr), "hdr", ""}, - {LOGREC_DBT, SSZ(__heap_addrem_args, dbt), "dbt", ""}, + {LOGREC_HDR, SSZ(__heap_addrem_args, hdr), "hdr", ""}, + {LOGREC_DATA, SSZ(__heap_addrem_args, dbt), "dbt", ""}, {LOGREC_POINTER, SSZ(__heap_addrem_args, pagelsn), "pagelsn", ""}, {LOGREC_Done, 0, "", ""} }; +DB_LOG_RECSPEC __heap_addrem_60_desc[] = { + {LOGREC_OP, SSZ(__heap_addrem_60_args, opcode), "opcode", "%lu"}, + {LOGREC_DB, SSZ(__heap_addrem_60_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__heap_addrem_60_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__heap_addrem_60_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__heap_addrem_60_args, nbytes), "nbytes", "%lu"}, + {LOGREC_HDR, SSZ(__heap_addrem_60_args, hdr), "hdr", ""}, + {LOGREC_DBT, SSZ(__heap_addrem_60_args, dbt), "dbt", ""}, + {LOGREC_POINTER, SSZ(__heap_addrem_60_args, pagelsn), "pagelsn", ""}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __heap_addrem_50_desc[] = { + {LOGREC_ARG, SSZ(__heap_addrem_50_args, opcode), "opcode", "%lu"}, + {LOGREC_DB, SSZ(__heap_addrem_50_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__heap_addrem_50_args, pgno), "pgno", "%lu"}, + {LOGREC_ARG, SSZ(__heap_addrem_50_args, indx), "indx", "%lu"}, + {LOGREC_ARG, SSZ(__heap_addrem_50_args, nbytes), "nbytes", "%lu"}, + {LOGREC_DBT, SSZ(__heap_addrem_50_args, hdr), "hdr", ""}, + {LOGREC_DBT, SSZ(__heap_addrem_50_args, dbt), "dbt", ""}, + {LOGREC_POINTER, SSZ(__heap_addrem_50_args, pagelsn), "pagelsn", ""}, + {LOGREC_Done, 0, "", ""} +}; DB_LOG_RECSPEC __heap_pg_alloc_desc[] = { {LOGREC_DB, SSZ(__heap_pg_alloc_args, fileid), "fileid", ""}, {LOGREC_POINTER, SSZ(__heap_pg_alloc_args, meta_lsn), "meta_lsn", ""}, diff --git a/src/heap/heap_autop.c b/src/heap/heap_autop.c index b767203b..ac08441b 100644 --- a/src/heap/heap_autop.c +++ b/src/heap/heap_autop.c @@ -28,6 +28,40 @@ __heap_addrem_print(env, dbtp, lsnp, notused2, info) } /* + * PUBLIC: int __heap_addrem_60_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__heap_addrem_60_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__heap_addrem_60", __heap_addrem_60_desc, info)); +} + +/* + * PUBLIC: int __heap_addrem_50_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__heap_addrem_50_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__heap_addrem_50", __heap_addrem_50_desc, info)); +} + +/* * PUBLIC: int __heap_pg_alloc_print __P((ENV *, DBT *, DB_LSN *, * PUBLIC: db_recops, void *)); */ diff --git a/src/heap/heap_backup.c b/src/heap/heap_backup.c index 4588b0ba..77b0eaaa 100644 --- a/src/heap/heap_backup.c +++ b/src/heap/heap_backup.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2011, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ diff --git a/src/heap/heap_conv.c b/src/heap/heap_conv.c index 9f432d13..dbf059a4 100644 --- a/src/heap/heap_conv.c +++ b/src/heap/heap_conv.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2010, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2015 Oracle and/or its affiliates. All rights reserved. */ #include "db_config.h" @@ -86,7 +86,10 @@ __heap_mswap(env, pg) SWAP32(p); /* gbytes */ SWAP32(p); /* bytes */ SWAP32(p); /* region_size */ - p += 92 * sizeof(u_int32_t); /* unused */ + SWAP32(p); /* threshold */ + SWAP32(p); /* file id lo */ + SWAP32(p); /* file id hi */ + p += 89 * sizeof(u_int32_t); /* unused */ SWAP32(p); /* crypto_magic */ return (0); diff --git a/src/heap/heap_method.c b/src/heap/heap_method.c index f938b5e7..2667f4fe 100644 --- a/src/heap/heap_method.c +++ b/src/heap/heap_method.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2010, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -49,15 +49,11 @@ __heap_db_close(dbp) DB *dbp; { HEAP *h; - int ret; - - ret = 0; - if ((h = dbp->heap_internal) == NULL) - return (0); - - __os_free(dbp->env, h); - dbp->heap_internal = NULL; + if ((h = dbp->heap_internal) != NULL) { + __os_free(dbp->env, h); + dbp->heap_internal = NULL; + } return (0); } diff --git a/src/heap/heap_open.c b/src/heap/heap_open.c index 6827450d..f5bb72ae 100644 --- a/src/heap/heap_open.c +++ b/src/heap/heap_open.c @@ -1,19 +1,19 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2010, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2015 Oracle and/or its affiliates. All rights reserved. */ #include "db_config.h" #include "db_int.h" +#include "dbinc/blob.h" #include "dbinc/crypto.h" #include "dbinc/db_page.h" #include "dbinc/db_swap.h" #include "dbinc/fop.h" #include "dbinc/heap.h" #include "dbinc/lock.h" -#include "dbinc/log.h" #include "dbinc/mp.h" static void __heap_init_meta __P((DB *, HEAPMETA *, db_pgno_t, DB_LSN*)); @@ -82,6 +82,7 @@ __heap_metachk(dbp, name, hm) env = dbp->env; h = (HEAP *)dbp->heap_internal; + ret = 0; /* * At this point, all we know is that the magic number is for a Heap. @@ -92,6 +93,7 @@ __heap_metachk(dbp, name, hm) M_32_SWAP(vers); switch (vers) { case 1: + case 2: break; default: __db_errx(env, @@ -116,6 +118,26 @@ __heap_metachk(dbp, name, hm) /* Set the page size. */ dbp->pgsize = hm->dbmeta.pagesize; + dbp->blob_threshold = hm->blob_threshold; + GET_BLOB_FILE_ID(env, hm, dbp->blob_file_id, ret); + if (ret != 0) + return (ret); + /* Blob databases must be upgraded. */ + if (vers == 1 && dbp->blob_file_id != 0) { + __db_errx(env, DB_STR_A("1209", +"%s: databases that support blobs must be upgraded.", "%s"), + name); + return (EINVAL); + } +#ifndef HAVE_64BIT_TYPES + if (dbp->blob_file_id != 0) { + __db_errx(env, DB_STR_A("1205", + "%s: blobs require 64 integer compiler support.", "%s"), + name); + return (EINVAL); + } +#endif + /* Copy the file's ID. */ memcpy(dbp->fileid, hm->dbmeta.uid, DB_FILE_ID_LEN); @@ -179,7 +201,8 @@ __heap_read_meta(dbp, ip, txn, meta_pgno, flags) h->region_size = meta->region_size; if (PGNO(meta) == PGNO_BASE_MD && !F_ISSET(dbp, DB_AM_RECOVER)) - __memp_set_last_pgno(mpf, meta->dbmeta.last_pgno); + (void)__memp_set_last_pgno( + mpf, meta->dbmeta.last_pgno); } else { DB_ASSERT(dbp->env, IS_RECOVERING(dbp->env) || F_ISSET(dbp, DB_AM_RECOVER)); @@ -285,6 +308,12 @@ __heap_new_file(dbp, ip, txn, fhp, name) pginfo.type = dbp->type; pdbt.data = &pginfo; pdbt.size = sizeof(pginfo); + if (dbp->blob_threshold) { + if ((ret = __blob_generate_dir_ids( + dbp, txn, &dbp->blob_file_id)) != 0) + return (ret); + + } if ((ret = __os_calloc(env, 1, dbp->pgsize, &buf)) != 0) return (ret); meta = (HEAPMETA *)buf; @@ -394,7 +423,9 @@ done: if (region != NULL && (t_ret = __memp_fput(mpf, dbc->thread_info, region, dbc->priority)) != 0 && ret == 0) ret = t_ret; - ret = __memp_fput(mpf, dbc->thread_info, meta, dbc->priority); + if ((t_ret = __memp_fput(mpf, + dbc->thread_info, meta, dbc->priority)) != 0 && ret == 0) + ret = t_ret; if ((t_ret = __TLPUT(dbc, meta_lock)) != 0 && ret == 0) ret = t_ret; @@ -436,4 +467,6 @@ __heap_init_meta(dbp, meta, pgno, lsnp) meta->region_size = h->region_size; meta->nregions = 1; meta->curregion = 1; + meta->blob_threshold = dbp->blob_threshold; + SET_BLOB_META_FILE_ID(meta, dbp->blob_file_id, HEAPMETA); } diff --git a/src/heap/heap_rec.c b/src/heap/heap_rec.c index 578a61c4..01803a70 100644 --- a/src/heap/heap_rec.c +++ b/src/heap/heap_rec.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2010, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2015 Oracle and/or its affiliates. All rights reserved. */ #include "db_config.h" @@ -9,7 +9,6 @@ #include "db_int.h" #include "dbinc/db_page.h" #include "dbinc/heap.h" -#include "dbinc/log.h" #include "dbinc/mp.h" /* @@ -34,7 +33,8 @@ __heap_addrem_recover(env, dbtp, lsnp, op, info) DB_THREAD_INFO *ip; PAGE *pagep, *regionp; db_pgno_t region_pgno; - int cmp_n, cmp_p, modified, oldspace, ret, space; + int cmp_n, cmp_p, modified, ret; + u_int32_t oldspace, opcode, space; ip = ((DB_TXNHEAD *)info)->thread_info; pagep = NULL; @@ -44,19 +44,20 @@ __heap_addrem_recover(env, dbtp, lsnp, op, info) REC_FGET(mpf, ip, argp->pgno, &pagep, done); modified = 0; + opcode = OP_MODE_GET(argp->opcode); cmp_n = log_compare(lsnp, &LSN(pagep)); cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); - if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_HEAP) || - (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_HEAP)) { + if ((cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_HEAP) || + (cmp_n == 0 && DB_UNDO(op) && opcode == DB_REM_HEAP)) { /* We are either redo-ing an add or undoing a delete. */ REC_DIRTY(mpf, ip, dbc->priority, &pagep); if ((ret = __heap_pitem(dbc, pagep, argp->indx, argp->nbytes, &argp->hdr, &argp->dbt)) != 0) goto out; modified = 1; - } else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_HEAP) || - (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_HEAP)) { + } else if ((cmp_n == 0 && DB_UNDO(op) && opcode == DB_ADD_HEAP) || + (cmp_p == 0 && DB_REDO(op) && opcode == DB_REM_HEAP)) { /* We are either undoing an add or redo-ing a delete. */ REC_DIRTY(mpf, ip, dbc->priority, &pagep); if ((ret = __heap_ditem( @@ -76,11 +77,11 @@ __heap_addrem_recover(env, dbtp, lsnp, op, info) HEAP_CALCSPACEBITS( file_dbp, HEAP_FREESPACE(file_dbp, pagep), space); oldspace = HEAP_SPACE(file_dbp, regionp, - argp->pgno - region_pgno - 1); + (argp->pgno - region_pgno) - 1); if (space != oldspace) { REC_DIRTY(mpf, ip, dbc->priority, ®ionp); HEAP_SETSPACE(file_dbp, - regionp, argp->pgno - region_pgno - 1, space); + regionp, (argp->pgno - region_pgno) - 1, space); } if ((ret = __memp_fput(mpf, ip, regionp, dbc->priority)) != 0) goto out; @@ -384,3 +385,200 @@ out: if (pagep != NULL) (void)__memp_fput(mpf, ip, pagep, dbc->priority); REC_CLOSE; } + +/* + * __heap_addrem_60_recover -- + * Recovery function for addrem. + * + * PUBLIC: int __heap_addrem_60_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_addrem_60_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __heap_addrem_60_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + HEAPBLOBHDR bhdr; + HEAPHDR *hhdr; + PAGE *pagep, *regionp; + db_pgno_t region_pgno; + int cmp_n, cmp_p, modified, ret; + u_int32_t oldspace, opcode, space; + u_int8_t buf[HEAPBLOBREC_SIZE]; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__heap_addrem_60_print); + REC_INTRO(__heap_addrem_60_read, ip, 1); + region_pgno = HEAP_REGION_PGNO(file_dbp, argp->pgno); + + REC_FGET(mpf, ip, argp->pgno, &pagep, done); + modified = 0; + opcode = OP_MODE_GET(argp->opcode); + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); + + if ((cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_HEAP) || + (cmp_n == 0 && DB_UNDO(op) && opcode == DB_REM_HEAP)) { + hhdr = argp->hdr.data; + /* + * In 6.0 heap blob log records were not correctly byte + * swapped, so do the swapping here if the blob file id of the + * database does not match the blob file id stored in the + * record. Technically byte swapping the blob file id could + * produce the same value, but that would only happen in + * practice if the environment contained over 4 billion blob + * databases. 0 is an invalid blob file id. + */ + if (F_ISSET(hhdr, HEAP_RECBLOB)) { + memcpy(buf + sizeof(HEAPHDR), + argp->dbt.data, HEAPBLOBREC_DSIZE); + memcpy(&bhdr, buf, HEAPBLOBREC_SIZE); + if ((db_seq_t)bhdr.file_id != dbc->dbp->blob_file_id) { + M_64_SWAP(bhdr.id); + M_64_SWAP(bhdr.size); + M_64_SWAP(bhdr.file_id); + DB_ASSERT(env, + (db_seq_t)bhdr.file_id + == dbc->dbp->blob_file_id); + memcpy(buf, &bhdr, HEAPBLOBREC_SIZE); + memcpy(argp->dbt.data, + buf + sizeof(HEAPHDR), HEAPBLOBREC_DSIZE); + } + } + /* We are either redo-ing an add or undoing a delete. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if ((ret = __heap_pitem(dbc, pagep, + argp->indx, argp->nbytes, &argp->hdr, &argp->dbt)) != 0) + goto out; + modified = 1; + } else if ((cmp_n == 0 && DB_UNDO(op) && opcode == DB_ADD_HEAP) || + (cmp_p == 0 && DB_REDO(op) && opcode == DB_REM_HEAP)) { + /* We are either undoing an add or redo-ing a delete. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if ((ret = __heap_ditem( + dbc, pagep, argp->indx, argp->nbytes)) != 0) + goto out; + modified = 1; + } + + if (modified) { + REC_FGET(mpf, ip, region_pgno, ®ionp, done); + if (DB_REDO(op)) + LSN(pagep) = *lsnp; + else + LSN(pagep) = argp->pagelsn; + + /* Update the available space bitmap, if necessary. */ + HEAP_CALCSPACEBITS( + file_dbp, HEAP_FREESPACE(file_dbp, pagep), space); + oldspace = HEAP_SPACE(file_dbp, regionp, + (argp->pgno - region_pgno) - 1); + if (space != oldspace) { + REC_DIRTY(mpf, ip, dbc->priority, ®ionp); + HEAP_SETSPACE(file_dbp, + regionp, (argp->pgno - region_pgno) - 1, space); + } + if ((ret = __memp_fput(mpf, ip, regionp, dbc->priority)) != 0) + goto out; + } + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, dbc->priority); + REC_CLOSE; + +} + +/* + * __heap_addrem_50_recover -- + * Recovery function for addrem. + * + * PUBLIC: int __heap_addrem_50_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__heap_addrem_50_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __heap_addrem_50_args *argp; + DB *file_dbp; + DBC *dbc; + DB_MPOOLFILE *mpf; + DB_THREAD_INFO *ip; + PAGE *pagep, *regionp; + db_pgno_t region_pgno; + int cmp_n, cmp_p, modified, ret; + u_int32_t oldspace, space; + + ip = ((DB_TXNHEAD *)info)->thread_info; + pagep = NULL; + REC_PRINT(__heap_addrem_50_print); + REC_INTRO(__heap_addrem_50_read, ip, 1); + region_pgno = HEAP_REGION_PGNO(file_dbp, argp->pgno); + + REC_FGET(mpf, ip, argp->pgno, &pagep, done); + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); + + if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_HEAP) || + (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_HEAP)) { + /* We are either redo-ing an add or undoing a delete. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if ((ret = __heap_pitem(dbc, pagep, + argp->indx, argp->nbytes, &argp->hdr, &argp->dbt)) != 0) + goto out; + modified = 1; + } else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_HEAP) || + (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_HEAP)) { + /* We are either undoing an add or redo-ing a delete. */ + REC_DIRTY(mpf, ip, dbc->priority, &pagep); + if ((ret = __heap_ditem( + dbc, pagep, argp->indx, argp->nbytes)) != 0) + goto out; + modified = 1; + } + + if (modified) { + REC_FGET(mpf, ip, region_pgno, ®ionp, done); + if (DB_REDO(op)) + LSN(pagep) = *lsnp; + else + LSN(pagep) = argp->pagelsn; + + /* Update the available space bitmap, if necessary. */ + HEAP_CALCSPACEBITS( + file_dbp, HEAP_FREESPACE(file_dbp, pagep), space); + oldspace = HEAP_SPACE(file_dbp, + regionp, (argp->pgno - region_pgno) - 1); + if (space != oldspace) { + REC_DIRTY(mpf, ip, dbc->priority, ®ionp); + HEAP_SETSPACE(file_dbp, + regionp, (argp->pgno - region_pgno) - 1, space); + } + if ((ret = __memp_fput(mpf, ip, regionp, dbc->priority)) != 0) + goto out; + } + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (pagep != NULL) + (void)__memp_fput(mpf, ip, pagep, dbc->priority); + REC_CLOSE; +} diff --git a/src/heap/heap_reclaim.c b/src/heap/heap_reclaim.c index 8cedb223..463e40c0 100644 --- a/src/heap/heap_reclaim.c +++ b/src/heap/heap_reclaim.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1998, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -50,7 +50,7 @@ __heap_truncate(dbc, countp) return (ret); if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &meta)) != 0) { - __TLPUT(dbc, lock); + (void)__TLPUT(dbc, lock); goto err; } diff --git a/src/heap/heap_stat.c b/src/heap/heap_stat.c index 9f4361a7..13bd36a2 100644 --- a/src/heap/heap_stat.c +++ b/src/heap/heap_stat.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2010, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -29,7 +29,7 @@ __heap_stat(dbc, spp, flags) { DB *dbp; DB_HEAP_STAT *sp; - DB_LOCK lock, metalock; + DB_LOCK metalock; DB_MPOOLFILE *mpf; ENV *env; HEAPMETA *meta; @@ -41,7 +41,6 @@ __heap_stat(dbc, spp, flags) meta = NULL; LOCK_INIT(metalock); - LOCK_INIT(lock); mpf = dbp->mpf; sp = NULL; ret = t_ret = write_meta = 0; @@ -147,6 +146,8 @@ __heap_stat_print(dbc, flags) "Underlying database page size", (u_long)sp->heap_pagesize); __db_dl(env, "Number of records in the database", (u_long)sp->heap_nrecs); + __db_dl(env, + "Number of blobs in the database", (u_long)sp->heap_nblobs); __db_dl(env, "Number of database pages", (u_long)sp->heap_pagecnt); __db_dl(env, "Number of database regions", (u_long)sp->heap_nregions); __db_dl(env, @@ -200,11 +201,13 @@ __heap_stat_callback(dbc, h, cookie, putp) * We can't just use NUM_ENT, otherwise we'd mis-count split * records. */ - for (i = 0; i < NUM_ENT(h); i++) { + for (i = 0; i <= HEAP_HIGHINDX(h); i++) { hdr = (HEAPHDR *)P_ENTRY(dbp, h, i); if (!F_ISSET(hdr, HEAP_RECSPLIT) || F_ISSET(hdr, HEAP_RECFIRST)) sp->heap_nrecs++; + if (F_ISSET(hdr, HEAP_RECBLOB)) + sp->heap_nblobs++; } break; case P_HEAPMETA: /* Fallthrough */ diff --git a/src/heap/heap_stub.c b/src/heap/heap_stub.c index b4feb2f3..3093abc2 100644 --- a/src/heap/heap_stub.c +++ b/src/heap/heap_stub.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2010, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id: */ @@ -35,6 +35,40 @@ __db_no_heap_am(env) } int +__heap_60_heapmeta(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + COMPQUIET(real_name, NULL); + COMPQUIET(flags, 0); + COMPQUIET(fhp, NULL); + COMPQUIET(h, NULL); + COMPQUIET(dirtyp, NULL); + return (__db_no_heap_am(dbp->env)); +} + +int +__heap_60_heap(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + COMPQUIET(real_name, NULL); + COMPQUIET(flags, 0); + COMPQUIET(fhp, NULL); + COMPQUIET(h, NULL); + COMPQUIET(dirtyp, NULL); + return (__db_no_heap_am(dbp->env)); +} + +int __heap_db_create(dbp) DB *dbp; { diff --git a/src/heap/heap_upgrade.c b/src/heap/heap_upgrade.c new file mode 100644 index 00000000..35fa78b9 --- /dev/null +++ b/src/heap/heap_upgrade.c @@ -0,0 +1,106 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/blob.h" +#include "dbinc/db_page.h" +#include "dbinc/heap.h" +#include "dbinc/db_upgrade.h" + +/* + * __heap_60_heapmeta-- + * Upgrade the version number. + * + * PUBLIC: int __heap_60_heapmeta + * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); + */ +int +__heap_60_heapmeta(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + HEAPMETA *hmeta; + + COMPQUIET(flags, 0); + COMPQUIET(real_name, NULL); + COMPQUIET(fhp, NULL); + COMPQUIET(dbp, NULL); + hmeta = (HEAPMETA *)h; + + hmeta->dbmeta.version = 2; + *dirtyp = 1; + + return (0); +} + +/* + * __heap_60_heap -- + * Upgrade the blob records on the database heap pages. + * + * PUBLIC: int __heap_60_heap + * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); + */ +int +__heap_60_heap(dbp, real_name, flags, fhp, h, dirtyp) + DB *dbp; + char *real_name; + u_int32_t flags; + DB_FH *fhp; + PAGE *h; + int *dirtyp; +{ + HEAPBLOBHDR60 hb60; + HEAPBLOBHDR60P1 hb60p1; + HEAPHDR *hdr; + db_seq_t blob_id, blob_size, file_id; + db_indx_t indx, *offtbl; + int ret; + + COMPQUIET(flags, 0); + COMPQUIET(real_name, NULL); + COMPQUIET(fhp, NULL); + offtbl = (db_indx_t *)HEAP_OFFSETTBL(dbp, h); + ret = 0; + + DB_ASSERT(dbp->env, HEAPBLOBREC60_SIZE == HEAPBLOBREC_SIZE); + for (indx = 0; indx <= HEAP_HIGHINDX(h); indx++) { + if (offtbl[indx] == 0) + continue; + hdr = (HEAPHDR *)P_ENTRY(dbp, h, indx); + if (F_ISSET(hdr, HEAP_RECBLOB)) { + memcpy(&hb60, hdr, HEAPBLOBREC60_SIZE); + memset(&hb60p1, 0, HEAPBLOBREC_SIZE); + hb60p1.std_hdr.flags = hb60.flags; + hb60p1.std_hdr.size = hb60.size; + hb60p1.encoding = hb60.encoding; + hb60p1.lsn = hb60.lsn; + GET_BLOB60_ID(dbp->env, hb60, blob_id, ret); + if (ret != 0) + return (ret); + GET_BLOB60_SIZE(dbp->env, hb60, blob_size, ret); + if (ret != 0) + return (ret); + GET_BLOB60_FILE_ID(dbp->env, &hb60, file_id, ret); + if (ret != 0) + return (ret); + SET_BLOB_ID(&hb60p1, blob_id, HEAPBLOBHDR60P1); + SET_BLOB_SIZE(&hb60p1, blob_size, HEAPBLOBHDR60P1); + SET_BLOB_FILE_ID(&hb60p1, file_id, HEAPBLOBHDR60P1); + memcpy(hdr, &hb60p1, HEAPBLOBREC_SIZE); + *dirtyp = 1; + } + } + + return (ret); +} diff --git a/src/heap/heap_verify.c b/src/heap/heap_verify.c index ea15c28b..7c90caf0 100644 --- a/src/heap/heap_verify.c +++ b/src/heap/heap_verify.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2010, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -9,13 +9,14 @@ #include "db_config.h" #include "db_int.h" +#include "dbinc/blob.h" #include "dbinc/db_page.h" #include "dbinc/db_verify.h" #include "dbinc/heap.h" #include "dbinc/lock.h" #include "dbinc/mp.h" -static int __heap_safe_gsplit __P((DB *, VRFY_DBINFO *, PAGE *, db_indx_t, +static int __heap_safe_gsplit __P((DB *, VRFY_DBINFO *, PAGE *, unsigned, DBT *)); static int __heap_verify_offset_cmp __P((const void *, const void *)); @@ -37,7 +38,8 @@ __heap_vrfy_meta(dbp, vdp, meta, pgno, flags) HEAP *h; VRFY_PAGEINFO *pip; db_pgno_t last_pgno, max_pgno, npgs; - int isbad, ret; + int isbad, ret, t_ret; + db_seq_t blob_id; if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) return (ret); @@ -97,8 +99,40 @@ __heap_vrfy_meta(dbp, vdp, meta, pgno, flags) "%lu"), (u_long)pgno)); isbad = 1; } + h->gbytes = meta->gbytes; + h->bytes = meta->bytes; } +/* + * Where 64-bit integer support is not available, + * return an error if the file has any blobs. + */ + t_ret = 0; +#ifdef HAVE_64BIT_TYPES + GET_BLOB_FILE_ID(dbp->env, meta, blob_id, t_ret); + if (t_ret != 0) { + isbad = 1; + EPRINT((dbp->env, DB_STR_A("1173", + "Page %lu: blob file id overflow.", "%lu"), (u_long)pgno)); + if (ret == 0) + ret = t_ret; + } +#else /* HAVE_64BIT_TYPES */ + /* + * db_seq_t is an int on systems that do not have 64 integers types, so + * this will compile and run. + */ + GET_BLOB_FILE_ID(env, meta, blob_id, t_ret); + if (t_ret != 0 || blob_id != 0) { + isbad = 1; + EPRINT((env, DB_STR_A("1206", + "Page %lu: blobs require 64 integer compiler support.", + "%lu"), (u_long)pgno)); + if (ret == 0) + ret = t_ret; + } +#endif + err: if (LF_ISSET(DB_SALVAGE)) ret = __db_salvage_markdone(vdp, pgno); @@ -120,12 +154,16 @@ __heap_vrfy(dbp, vdp, h, pgno, flags) db_pgno_t pgno; u_int32_t flags; { + HEAPBLOBHDR bhdr; HEAPHDR *hdr; - int cnt, i, j, ret; + int i, j, ret; + off_t blob_size; + db_seq_t blob_id, file_id; db_indx_t *offsets, *offtbl, end; + u_int32_t cnt; if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) - goto err; + return (ret); if (TYPE(h) == P_IHEAP) /* Nothing to verify on a region page. */ @@ -140,7 +178,7 @@ __heap_vrfy(dbp, vdp, h, pgno, flags) /* * Build a sorted list of all the offsets in the table. Entries in the * offset table are not always sorted. While we're here, check that - * flags are sane. + * flags are sane, and that the blob entries are sane. */ cnt = 0; for (i = 0; i <= HEAP_HIGHINDX(h); i++) { @@ -164,6 +202,36 @@ __heap_vrfy(dbp, vdp, h, pgno, flags) ret = DB_VERIFY_BAD; goto err; } + if (F_ISSET(hdr, HEAP_RECBLOB)) { + /* + * Check that the blob file exists and is the same + * file size as is stored in the database record. + */ + memcpy(&bhdr, hdr, sizeof(HEAPBLOBHDR)); + blob_id = (db_seq_t)bhdr.id; + GET_BLOB_SIZE(dbp->env, bhdr, blob_size, ret); + if (ret != 0 || blob_size < 0) { + EPRINT((dbp->env, DB_STR_A("1175", + "Page %lu: blob file size value has overflowed", + "%lu"), (u_long)pgno)); + ret = DB_VERIFY_BAD; + goto err; + } + file_id = (db_seq_t)bhdr.file_id; + if (file_id == 0) { + EPRINT((dbp->env, DB_STR_A("1177", + "Page %lu: invalid blob dir id %llu at item %lu", + "%lu %llu, %lu"), (u_long)pgno, + (unsigned long long)file_id, (u_long)i)); + ret = DB_VERIFY_BAD; + goto err; + } + if ((ret = __blob_vrfy(dbp->env, blob_id, + blob_size, file_id, 0, pgno, flags)) != 0) { + ret = DB_VERIFY_BAD; + goto err; + } + } offsets[cnt] = offtbl[i]; cnt++; @@ -180,7 +248,7 @@ __heap_vrfy(dbp, vdp, h, pgno, flags) * record. We can't use the P_ENTRY macro because we've kept track of * the offsets, not the indexes. */ - for (i = 0; i < cnt - 1; i++) { + for (i = 0; i < (int)cnt - 1; i++) { hdr = (HEAPHDR *)((u_int8_t *)h + offsets[i]); end = offsets[i] + HEAP_HDRSIZE(hdr) + hdr->size; if (end > offsets[i+1]) { @@ -328,12 +396,22 @@ __heap_salvage(dbp, vdp, pgno, h, handle, callback, flags) u_int32_t flags; { DBT dbt; + ENV *env; HEAPHDR *hdr; + HEAPBLOBHDR bhdr; db_indx_t i, *offtbl; + char *prefix; int err_ret, ret, t_ret; + off_t blob_size, blob_offset, remaining; + u_int32_t blob_buf_size; + u_int8_t *blob_buf; + db_seq_t blob_id, file_id; COMPQUIET(flags, 0); memset(&dbt, 0, sizeof(DBT)); + blob_buf = NULL; + blob_buf_size = 0; + env = dbp->env; offtbl = (db_indx_t *)HEAP_OFFSETTBL(dbp, h); err_ret = ret = t_ret = 0; @@ -357,9 +435,74 @@ __heap_salvage(dbp, vdp, pgno, h, handle, callback, flags) if (dbt.size > dbp->pgsize * 4) dbt.size = dbp->pgsize * 4; if ((ret = - __os_malloc(dbp->env, dbt.size, &dbt.data)) != 0) + __os_malloc(env, dbt.size, &dbt.data)) != 0) goto err; - __heap_safe_gsplit(dbp, vdp, h, i, &dbt); + if ((ret = __heap_safe_gsplit + (dbp, vdp, h, i, &dbt)) != 0) { + err_ret = ret; + __os_free(env, dbt.data); + continue; + } + } else if (F_ISSET(hdr, HEAP_RECBLOB)) { + memcpy(&bhdr, hdr, HEAPBLOBREC_SIZE); + blob_id = (db_seq_t)bhdr.id; + GET_BLOB_SIZE(env, bhdr, blob_size, ret); + if (ret != 0 || blob_size < 0) + goto err; + file_id = (db_seq_t)bhdr.file_id; + /* Read the blob, in pieces if it is too large.*/ + blob_offset = 0; + if (blob_size > MEGABYTE) { + if (blob_buf_size < MEGABYTE) { + if ((ret = __os_realloc( + env, MEGABYTE, &blob_buf)) != 0) + goto err; + blob_buf_size = MEGABYTE; + } + } else if (blob_buf_size < blob_size) { + blob_buf_size = (u_int32_t)blob_size; + if ((ret = __os_realloc( + env, blob_buf_size, &blob_buf)) != 0) + goto err; + } + dbt.data = blob_buf; + dbt.ulen = blob_buf_size; + remaining = blob_size; + prefix = " "; + do { + if ((ret = __blob_salvage(env, blob_id, + blob_offset, + ((remaining < blob_buf_size) ? + (size_t)remaining : blob_buf_size), + file_id, 0, &dbt)) != 0) { + if (LF_ISSET(DB_AGGRESSIVE)) { + ret = DB_VERIFY_BAD; + break; + } + F_CLR(vdp, SALVAGE_STREAM_BLOB); + goto err; + } + if (remaining > blob_buf_size) + F_SET(vdp, SALVAGE_STREAM_BLOB); + else + F_CLR(vdp, SALVAGE_STREAM_BLOB); + if ((t_ret = __db_vrfy_prdbt( + &dbt, 0, prefix, handle, + callback, 0, 0, vdp)) != 0) { + if (ret == 0) + ret = t_ret; + F_CLR(vdp, SALVAGE_STREAM_BLOB); + goto err; + } + prefix = NULL; + blob_offset += dbt.size; + if (remaining < blob_buf_size) + remaining = 0; + else + remaining -= blob_buf_size; + } while (remaining > 0); + F_CLR(vdp, SALVAGE_STREAM_BLOB); + continue; } else { dbt.data = (u_int8_t *)hdr + HEAP_HDRSIZE(hdr); dbt.size = hdr->size; @@ -369,11 +512,13 @@ __heap_salvage(dbp, vdp, pgno, h, handle, callback, flags) 0, " ", handle, callback, 0, 0, vdp)) != 0) err_ret = ret; if (F_ISSET(hdr, HEAP_RECSPLIT)) - __os_free(dbp->env, dbt.data); + __os_free(env, dbt.data); } err: if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0) return (t_ret); + if (blob_buf != NULL) + __os_free(env, blob_buf); return ((ret == 0 && err_ret != 0) ? err_ret : ret); } @@ -386,7 +531,7 @@ __heap_safe_gsplit(dbp, vdp, h, i, dbt) DB *dbp; VRFY_DBINFO *vdp; PAGE *h; - db_indx_t i; + unsigned i; DBT *dbt; { DB_MPOOLFILE *mpf; @@ -433,7 +578,7 @@ __heap_safe_gsplit(dbp, vdp, h, i, dbt) err: if (gotpg && (t_ret = __memp_fput( mpf, vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0 && ret == 0) - t_ret = ret; + ret = t_ret; return (ret); } |