summaryrefslogtreecommitdiff
path: root/src/db
diff options
context:
space:
mode:
Diffstat (limited to 'src/db')
-rw-r--r--src/db/crdel.src2
-rw-r--r--src/db/crdel_rec.c4
-rw-r--r--src/db/db.c46
-rw-r--r--src/db/db.src2
-rw-r--r--src/db/db_am.c18
-rw-r--r--src/db/db_backup.c169
-rw-r--r--src/db/db_cam.c367
-rw-r--r--src/db/db_cds.c22
-rw-r--r--src/db/db_compact.c72
-rw-r--r--src/db/db_conv.c145
-rw-r--r--src/db/db_copy.c2
-rw-r--r--src/db/db_dispatch.c10
-rw-r--r--src/db/db_dup.c2
-rw-r--r--src/db/db_iface.c55
-rw-r--r--src/db/db_join.c9
-rw-r--r--src/db/db_meta.c16
-rw-r--r--src/db/db_method.c225
-rw-r--r--src/db/db_open.c142
-rw-r--r--src/db/db_overflow.c187
-rw-r--r--src/db/db_ovfl_vrfy.c2
-rw-r--r--src/db/db_pr.c343
-rw-r--r--src/db/db_rec.c10
-rw-r--r--src/db/db_reclaim.c3
-rw-r--r--src/db/db_remove.c28
-rw-r--r--src/db/db_rename.c7
-rw-r--r--src/db/db_ret.c122
-rw-r--r--src/db/db_setid.c2
-rw-r--r--src/db/db_setlsn.c2
-rw-r--r--src/db/db_sort_multiple.c8
-rw-r--r--src/db/db_stati.c2
-rw-r--r--src/db/db_truncate.c6
-rw-r--r--src/db/db_upg.c122
-rw-r--r--src/db/db_upg_opd.c7
-rw-r--r--src/db/db_vrfy.c19
-rw-r--r--src/db/db_vrfy_stub.c2
-rw-r--r--src/db/db_vrfyutil.c11
-rw-r--r--src/db/partition.c292
37 files changed, 2088 insertions, 395 deletions
diff --git a/src/db/crdel.src b/src/db/crdel.src
index 70473899..a1cbc0ed 100644
--- a/src/db/crdel.src
+++ b/src/db/crdel.src
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
diff --git a/src/db/crdel_rec.c b/src/db/crdel_rec.c
index 08e7bae8..2c529627 100644
--- a/src/db/crdel_rec.c
+++ b/src/db/crdel_rec.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -81,7 +81,7 @@ __crdel_metasub_recover(env, dbtp, lsnp, op, info)
/*
* If this was an in-memory database and we are re-creating
* and this is the meta-data page, then we need to set up a
- * bunch of fields in the dbo as well.
+ * bunch of fields in the dbp as well.
*/
if (F_ISSET(file_dbp, DB_AM_INMEM) &&
argp->pgno == PGNO_BASE_MD &&
diff --git a/src/db/db.c b/src/db/db.c
index 0d9d1e6e..ffeb6d2b 100644
--- a/src/db/db.c
+++ b/src/db/db.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -41,6 +41,7 @@
#include "db_config.h"
#include "db_int.h"
+#include "dbinc_auto/sequence_ext.h"
#include "dbinc/db_page.h"
#include "dbinc/db_swap.h"
#include "dbinc/btree.h"
@@ -92,6 +93,9 @@ __db_master_open(subdbp, ip, txn, name, flags, mode, dbpp)
if ((ret = __db_create_internal(&dbp, subdbp->env, 0)) != 0)
return (ret);
+ /* Set the creation directory. */
+ dbp->dirname = subdbp->dirname;
+
/*
* It's always a btree.
* Run in the transaction we've created.
@@ -105,6 +109,20 @@ __db_master_open(subdbp, ip, txn, name, flags, mode, dbpp)
DB_AM_ENCRYPT | DB_AM_CHKSUM | DB_AM_NOT_DURABLE));
/*
+ * If creating the master database, disable blobs, but assign it a
+ * blob file id if blobs are enabled in the subdatabase. This means
+ * that subdatabses can only support blobs if the first subdatabse
+ * supports blobs. This is a temporary restriction, but is needed at
+ * the moment to prevent an infinite loop.
+ */
+ dbp->blob_threshold = 0;
+ if (LF_ISSET(DB_CREATE) && subdbp->blob_threshold != 0) {
+ if ((ret = __blob_generate_dir_ids(
+ dbp, txn, &dbp->blob_file_id)) != 0)
+ return (ret);
+ }
+
+ /*
* If there was a subdb specified, then we only want to apply
* DB_EXCL to the subdb, not the actual file. We only got here
* because there was a subdb specified.
@@ -819,6 +837,21 @@ __db_refresh(dbp, txn, flags, deferred_closep, reuse)
if (dbp->mpf == NULL)
LF_SET(DB_NOSYNC);
+#ifdef HAVE_64BIT_TYPES
+ /* Close the blob meta data databases. */
+ if (dbp->blob_seq != NULL) {
+ if ((t_ret = __seq_close(dbp->blob_seq, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ dbp->blob_seq = NULL;
+ }
+ if (dbp->blob_meta_db != NULL) {
+ if ((t_ret = __db_close(
+ dbp->blob_meta_db, NULL, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ dbp->blob_meta_db = NULL;
+ }
+#endif
+
/* If never opened, or not currently open, it's easy. */
if (!F_ISSET(dbp, DB_AM_OPEN_CALLED))
goto never_opened;
@@ -1164,6 +1197,10 @@ never_opened:
__os_free(dbp->env, dbp->dname);
dbp->dname = NULL;
}
+ if (dbp->blob_sub_dir != NULL) {
+ __os_free(dbp->env, dbp->blob_sub_dir);
+ dbp->blob_sub_dir = NULL;
+ }
/* Discard any memory used to store returned data. */
if (dbp->my_rskey.data != NULL)
@@ -1235,8 +1272,11 @@ __db_disassociate(sdbp)
sdbp->s_refcnt = 0;
while ((dbc = TAILQ_FIRST(&sdbp->free_queue)) != NULL)
- if ((t_ret = __dbc_destroy(dbc)) != 0 && ret == 0)
- ret = t_ret;
+ if ((t_ret = __dbc_destroy(dbc)) != 0) {
+ if (ret == 0)
+ ret = t_ret;
+ break;
+ }
F_CLR(sdbp, DB_AM_SECONDARY);
return (ret);
diff --git a/src/db/db.src b/src/db/db.src
index 879c7856..4a90ac16 100644
--- a/src/db/db.src
+++ b/src/db/db.src
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
diff --git a/src/db/db_am.c b/src/db/db_am.c
index 1cf3a505..84bb04bb 100644
--- a/src/db/db_am.c
+++ b/src/db/db_am.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -205,6 +205,7 @@ __db_cursor_int(dbp, ip, txn, dbtype, root, flags, locker, dbcp)
/* Refresh the DBC structure. */
dbc->dbtype = dbtype;
RESET_RET_MEM(dbc);
+ dbc->db_stream = __dbc_db_stream;
dbc->set_priority = __dbc_set_priority;
dbc->get_priority = __dbc_get_priority;
dbc->priority = dbp->priority;
@@ -314,11 +315,11 @@ __db_cursor_int(dbp, ip, txn, dbtype, root, flags, locker, dbcp)
if (F2_ISSET(dbp, DB2_AM_EXCL)) {
F_SET(dbc, DBC_DONTLOCK);
if (IS_REAL_TXN(txn)&& !LF_ISSET(DBC_OPD | DBC_DUPLICATE)) {
- /*
- * Exclusive databases can only have one active
- * transaction at a time since there are no internal
+ /*
+ * Exclusive databases can only have one active
+ * transaction at a time since there are no internal
* locks to prevent one transaction from reading and
- * writing another's uncommitted changes.
+ * writing another's uncommitted changes.
*/
if (dbp->cur_txn != NULL && dbp->cur_txn != txn) {
__db_errx(env, DB_STR("0749",
@@ -332,7 +333,7 @@ __db_cursor_int(dbp, ip, txn, dbtype, root, flags, locker, dbcp)
memset(&req, 0, sizeof(req));
req.lock = dbp->handle_lock;
req.op = DB_LOCK_TRADE;
- if ((ret = __lock_vec(env, txn->locker, 0,
+ if ((ret = __lock_vec(env, txn->locker, 0,
&req, 1, 0)) != 0)
goto err;
dbp->cur_txn = txn;
@@ -397,10 +398,11 @@ __db_cursor_int(dbp, ip, txn, dbtype, root, flags, locker, dbcp)
if (ip != NULL) {
dbc->thread_info = ip;
#ifdef DIAGNOSTIC
- if (dbc->locker != NULL)
+ if (dbc->locker != NULL) {
+ dbc->locker->prev_locker = ip->dbth_locker;
ip->dbth_locker =
R_OFFSET(&(env->lk_handle->reginfo), dbc->locker);
- else
+ } else
ip->dbth_locker = INVALID_ROFF;
#endif
} else if (txn != NULL)
diff --git a/src/db/db_backup.c b/src/db/db_backup.c
index 66d7382a..1c72e4d7 100644
--- a/src/db/db_backup.c
+++ b/src/db/db_backup.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2011, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -24,8 +24,9 @@ static int backup_read_data_dir
__P((DB_ENV *, DB_THREAD_INFO *, const char *, const char *, u_int32_t));
static int backup_dir_clean
__P((DB_ENV *, const char *, const char *, int *, u_int32_t));
-static int backup_data_copy
- __P((DB_ENV *, const char *, const char *, const char *, int));
+static int backup_lgconf_chk __P((DB_ENV *));
+static int __db_backup
+ __P((DB_ENV *, const char *, DB_THREAD_INFO *, int, u_int32_t));
/*
* __db_dbbackup_pp --
@@ -47,9 +48,9 @@ __db_dbbackup_pp(dbenv, dbfile, target, flags)
"DB_ENV->dbbackup", flags, DB_EXCL)) != 0)
return (ret);
ENV_ENTER(dbenv->env, ip);
-
- ret = __db_dbbackup(dbenv, ip, dbfile, target, flags);
-
+ REPLICATION_WRAP(dbenv->env,
+ (__db_dbbackup(
+ dbenv, ip, dbfile, target, flags, 0, NULL)), 0, ret);
ENV_LEAVE(dbenv->env, ip);
return (ret);
}
@@ -58,15 +59,17 @@ __db_dbbackup_pp(dbenv, dbfile, target, flags)
* __db_dbbackup --
* Copy a database file coordinated with mpool.
*
- * PUBLIC: int __db_dbbackup __P((DB_ENV *, DB_THREAD_INFO *,
- * PUBLIC: const char *, const char *, u_int32_t));
+ * PUBLIC: int __db_dbbackup __P((DB_ENV *, DB_THREAD_INFO *, const char *,
+ * PUBLIC: const char *, u_int32_t, u_int32_t, const char *));
*/
int
-__db_dbbackup(dbenv, ip, dbfile, target, flags)
+__db_dbbackup(dbenv, ip, dbfile, target, flags, oflags, full_path)
DB_ENV *dbenv;
DB_THREAD_INFO *ip;
const char *dbfile, *target;
u_int32_t flags;
+ u_int32_t oflags;
+ const char *full_path;
{
DB *dbp;
DB_FH *fp;
@@ -77,8 +80,8 @@ __db_dbbackup(dbenv, ip, dbfile, target, flags)
retry_count = 0;
retry: if ((ret = __db_create_internal(&dbp, dbenv->env, 0)) == 0 &&
- (ret = __db_open(dbp, ip, NULL, dbfile, NULL,
- DB_UNKNOWN, DB_AUTO_COMMIT | DB_RDONLY, 0, PGNO_BASE_MD)) != 0) {
+ (ret = __db_open(dbp, ip, NULL, dbfile, NULL, DB_UNKNOWN,
+ DB_AUTO_COMMIT | DB_RDONLY | oflags, 0, PGNO_BASE_MD)) != 0) {
if (ret == DB_LOCK_DEADLOCK || ret == DB_LOCK_NOTGRANTED) {
(void)__db_close(dbp, NULL, DB_NOSYNC);
dbp = NULL;
@@ -91,9 +94,16 @@ retry: if ((ret = __db_create_internal(&dbp, dbenv->env, 0)) == 0 &&
}
}
+ /* Hot backup requires DB_LOG_BLOB. */
+ if (ret == 0 && dbp->blob_threshold != 0 &&
+ (ret = backup_lgconf_chk(dbenv)) != 0)
+ goto err;
+
+ if (full_path == NULL)
+ full_path = dbfile;
if (ret == 0) {
if ((ret = __memp_backup_open(dbenv->env,
- dbp->mpf, dbfile, target, flags, &fp, &handle)) == 0) {
+ dbp->mpf, full_path, target, flags, &fp, &handle)) == 0) {
if (dbp->type == DB_HEAP)
ret = __heap_backup(
dbenv, dbp, ip, fp, handle, flags);
@@ -104,10 +114,21 @@ retry: if ((ret = __db_create_internal(&dbp, dbenv->env, 0)) == 0 &&
fp, handle, flags);
}
if ((t_ret = __memp_backup_close(dbenv->env,
- dbp->mpf, dbfile, fp, handle)) != 0 && ret == 0)
+ dbp->mpf, full_path, fp, handle)) != 0 && ret == 0)
ret = t_ret;
}
+ /*
+ * Copy blob files. Since no locking is done here, it is possible
+ * that a blob file may be copied in the middle of being written.
+ * This is not a problem since hotbackup requires DB_LOG_BLOB and
+ * catastrophic recovery, which will fix any inconsistances in the
+ * blob files.
+ */
+ if (ret == 0 && dbp->blob_threshold != 0 &&
+ (t_ret = __blob_copy_all(dbp, target, flags)) != 0)
+ ret= t_ret;
+
#ifdef HAVE_QUEUE
/*
* For compatibility with the 5.2 and patch versions of db_copy
@@ -117,7 +138,7 @@ retry: if ((ret = __db_create_internal(&dbp, dbenv->env, 0)) == 0 &&
ret = __qam_backup_extents(dbp, ip, target, flags);
#endif
- if (dbp != NULL &&
+err: if (dbp != NULL &&
(t_ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0 && ret == 0)
ret = t_ret;
@@ -205,8 +226,11 @@ backup_dir_clean(dbenv, backup_dir, log_dir, remove_maxp, flags)
/*
* backup_data_copy --
* Copy a non-database file into the backup directory.
+ *
+ * PUBLIC: int backup_data_copy __P((
+ * PUBLIC: DB_ENV *, const char *, const char *, const char *, int));
*/
-static int
+int
backup_data_copy(dbenv, file, from_dir, to_dir, log)
DB_ENV *dbenv;
const char *file, *from_dir, *to_dir;
@@ -352,13 +376,16 @@ backup_read_data_dir(dbenv, ip, dir, backup_dir, flags)
ENV *env;
FILE *savefile;
int fcnt, ret;
- size_t cnt;
+ size_t cnt, len;
const char *bd;
char **names, buf[DB_MAXPATHLEN], bbuf[DB_MAXPATHLEN];
+ char fullpath[DB_MAXPATHLEN];
void (*savecall) (const DB_ENV *, const char *, const char *);
env = dbenv->env;
memset(bbuf, 0, sizeof(bbuf));
+ memset(fullpath, 0, sizeof(fullpath));
+ len = 0;
bd = backup_dir;
if (!LF_ISSET(DB_BACKUP_SINGLE_DIR) && dir != env->db_home) {
@@ -401,6 +428,12 @@ backup_read_data_dir(dbenv, ip, dir, backup_dir, flags)
"%s: path too long", "%s"), buf);
return (EINVAL);
}
+ /* Save the original dir. */
+ if (!LF_ISSET(DB_BACKUP_SINGLE_DIR)) {
+ (void)snprintf(fullpath, sizeof(fullpath),
+ "%s%c%c", dir, PATH_SEPARATOR[0], '\0');
+ len = strlen(fullpath);
+ }
dir = buf;
}
/* Get a list of file names. */
@@ -449,7 +482,16 @@ backup_read_data_dir(dbenv, ip, dir, backup_dir, flags)
savefile = dbenv->db_errfile;
dbenv->db_errfile = NULL;
- ret = __db_dbbackup(dbenv, ip, names[cnt], bd, flags);
+ /*
+ * If it is not backing up to a single directory, prefix
+ * the file with 'dir' so that the file and directory structure
+ * in the source and backup location will be the same.
+ */
+ if (len != 0)
+ (void)snprintf(fullpath + len,
+ sizeof(fullpath) - len, "%s%c", names[cnt], '\0');
+ ret = __db_dbbackup(dbenv, ip, names[cnt],
+ backup_dir, flags, 0, len != 0 ? fullpath : NULL);
dbenv->db_errcall = savecall;
dbenv->db_errfile = savefile;
@@ -662,21 +704,22 @@ err: if (logd != dbenv->db_log_dir && logd != env->db_home)
* __db_backup --
* Backup databases in the enviornment.
*
- * PUBLIC: int __db_backup __P((DB_ENV *, const char *, u_int32_t));
+ * PUBLIC: int __db_backup_pp __P((DB_ENV *, const char *, u_int32_t));
*/
int
-__db_backup(dbenv, target, flags)
+__db_backup_pp(dbenv, target, flags)
DB_ENV *dbenv;
const char *target;
u_int32_t flags;
{
DB_THREAD_INFO *ip;
ENV *env;
- int copy_min, remove_max, ret;
- char **dir;
+ u_int32_t bytes;
+ int remove_max, ret;
env = dbenv->env;
- remove_max = copy_min = 0;
+ bytes = 0;
+ remove_max = 0;
#undef OKFLAGS
#define OKFLAGS \
@@ -692,6 +735,11 @@ __db_backup(dbenv, target, flags)
return (EINVAL);
}
+ /* Hot backup requires DB_LOG_BLOB. */
+ if ((ret = __env_get_blob_threshold_int(env, &bytes)) != 0 ||
+ (bytes != 0 && (ret = backup_lgconf_chk(dbenv)) != 0))
+ return (ret);
+
/*
* If the target directory for the backup does not exist, create it
* with mode read-write-execute for the owner. Ignore errors here,
@@ -714,6 +762,30 @@ __db_backup(dbenv, target, flags)
}
ENV_ENTER(env, ip);
+ REPLICATION_WRAP(env,
+ (__db_backup(dbenv, target, ip, remove_max, flags)), 0, ret);
+ ENV_LEAVE(env, ip);
+ return (ret);
+}
+
+/*
+ * __db_backup --
+ * Backup databases in the enviornment.
+ */
+static int
+__db_backup(dbenv, target, ip, remove_max, flags)
+ DB_ENV *dbenv;
+ const char *target;
+ DB_THREAD_INFO *ip;
+ int remove_max;
+ u_int32_t flags;
+{
+ ENV *env;
+ int copy_min, ret;
+ char **dir;
+
+ env = dbenv->env;
+ copy_min = 0;
/*
* If the UPDATE option was not specified, copy all database
@@ -724,6 +796,19 @@ __db_backup(dbenv, target, flags)
goto end;
F_SET(dbenv, DB_ENV_HOTBACKUP);
if (!LF_ISSET(DB_BACKUP_UPDATE)) {
+ /*
+ * Don't allow absolute path of blob directory when
+ * it is not backing up to a single directory.
+ */
+ if (!LF_ISSET(DB_BACKUP_SINGLE_DIR) &&
+ dbenv->db_blob_dir != NULL &&
+ __os_abspath(dbenv->db_blob_dir)) {
+ __db_errx(env, DB_STR_A("0780",
+"blob directory '%s' is absolute path, not permitted unless backup is to a single directory",
+ "%s"), dbenv->db_blob_dir);
+ ret = EINVAL;
+ goto err;
+ }
if ((ret = backup_read_data_dir(dbenv,
ip, env->db_home, target, flags)) != 0)
goto err;
@@ -734,8 +819,8 @@ __db_backup(dbenv, target, flags)
* enviroment -- running recovery with them would
* corrupt the source files.
*/
- if (!LF_ISSET(DB_BACKUP_SINGLE_DIR)
- && __os_abspath(*dir)) {
+ if (!LF_ISSET(DB_BACKUP_SINGLE_DIR) &&
+ __os_abspath(*dir)) {
__db_errx(env, DB_STR_A("0725",
"data directory '%s' is absolute path, not permitted unless backup is to a single directory",
"%s"), *dir);
@@ -751,7 +836,17 @@ __db_backup(dbenv, target, flags)
/*
* Copy all log files found in the log directory.
* The log directory defaults to the home directory.
+ * Don't allow absolute path of log directory when
+ * it is not backing up to a single directory.
*/
+ if (!LF_ISSET(DB_BACKUP_SINGLE_DIR) &&
+ dbenv->db_log_dir != NULL && __os_abspath(dbenv->db_log_dir)) {
+ __db_errx(env, DB_STR_A("0781",
+"log directory '%s' is absolute path, not permitted unless backup is to a single directory",
+ "%s"), dbenv->db_log_dir);
+ ret = EINVAL;
+ goto err;
+ }
if ((ret = backup_read_log_dir(dbenv, target, &copy_min, flags)) != 0)
goto err;
/*
@@ -761,7 +856,7 @@ __db_backup(dbenv, target, flags)
* cleanup.
*/
if (LF_ISSET(DB_BACKUP_UPDATE) && remove_max < copy_min &&
- !(remove_max == 0 && copy_min == 1)) {
+ remove_max != 0 && copy_min != 1) {
__db_errx(env, DB_STR_A("0743",
"the largest log file removed (%d) must be greater than or equal the smallest log file copied (%d)",
"%d %d"), remove_max, copy_min);
@@ -770,6 +865,28 @@ __db_backup(dbenv, target, flags)
err: F_CLR(dbenv, DB_ENV_HOTBACKUP);
(void)__env_set_backup(env, 0);
-end: ENV_LEAVE(env, ip);
+end: return (ret);
+}
+
+/*
+ * __db_backup_fchk --
+ * Log configure checking for backup when blob is enabled.
+ */
+static int
+backup_lgconf_chk(dbenv)
+ DB_ENV *dbenv;
+{
+ int lgconf, ret;
+
+ ret = 0;
+
+ if (LOGGING_ON(dbenv->env) && ((ret = __log_get_config(dbenv,
+ DB_LOG_BLOB, &lgconf)) != 0 || lgconf == 0)) {
+ __db_errx(dbenv->env, DB_STR("0782",
+ "Hot backup requires DB_LOG_BLOB"));
+ if (ret == 0)
+ ret = EINVAL;
+ }
+
return (ret);
}
diff --git a/src/db/db_cam.c b/src/db/db_cam.c
index 6ee8b579..1a330bdb 100644
--- a/src/db/db_cam.c
+++ b/src/db/db_cam.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2000, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -11,6 +11,7 @@
#include "db_int.h"
#include "dbinc/db_page.h"
#include "dbinc/btree.h"
+#include "dbinc/fop.h"
#include "dbinc/hash.h"
#include "dbinc/heap.h"
#include "dbinc/lock.h"
@@ -83,6 +84,9 @@ __dbc_close(dbc)
DB *dbp;
DBC *opd;
DBC_INTERNAL *cp;
+#ifdef DIAGNOSTIC
+ DB_THREAD_INFO *ip;
+#endif
DB_TXN *txn;
ENV *env;
int ret, t_ret;
@@ -149,6 +153,14 @@ __dbc_close(dbc)
ret = t_ret;
F_CLR(dbc, DBC_FAMILY);
}
+#ifdef DIAGNOSTIC
+ if (dbc->locker != NULL) {
+ ENV_GET_THREAD_INFO(env, ip);
+ if (ip != NULL)
+ ip->dbth_locker = dbc->locker->prev_locker;
+ dbc->locker->prev_locker = INVALID_ROFF;
+ }
+#endif
if ((txn = dbc->txn) != NULL)
txn->cursors--;
@@ -510,6 +522,305 @@ __dbc_idel(dbc, flags)
return (ret);
}
+/*
+ * __dbc_db_stream --
+ *
+ * DBC->db_stream
+ *
+ * PUBLIC: int __dbc_db_stream __P((DBC *, DB_STREAM **, u_int32_t));
+ */
+int
+__dbc_db_stream(dbc, dbsp, flags)
+ DBC *dbc;
+ DB_STREAM **dbsp;
+ u_int32_t flags;
+{
+ ENV *env;
+ int ret;
+ u_int32_t oflags;
+
+ env = dbc->env;
+ oflags = flags;
+
+ if ((ret = __db_fchk(
+ env, "DBC->db_stream", flags,
+ DB_STREAM_READ | DB_STREAM_WRITE | DB_STREAM_SYNC_WRITE)) != 0)
+ return (ret);
+
+ if (DB_IS_READONLY(dbc->dbp)) {
+ LF_SET(DB_STREAM_READ);
+ oflags |= DB_STREAM_READ;
+ }
+ if (LF_ISSET(DB_STREAM_READ) && LF_ISSET(DB_STREAM_WRITE)) {
+ ret = EINVAL;
+ __db_errx(env, DB_STR("0750",
+ "Error, cannot set both DB_STREAM_WRITE and DB_STREAM_READ."));
+ goto err;
+ }
+
+ if (oflags & DB_STREAM_READ)
+ LF_SET(DB_FOP_READONLY);
+ else
+ LF_SET(DB_FOP_WRITE);
+ if (oflags & DB_STREAM_SYNC_WRITE)
+ LF_SET(DB_FOP_SYNC_WRITE);
+
+ ret = __db_stream_init(dbc, dbsp, flags);
+
+err: return (ret);
+}
+
+/*
+ * __dbc_get_blob_id --
+ *
+ * Returns the blob id stored in the data record to which the cursor currently
+ * points. Returns EINVAL if the cursor does not point to a blob record.
+ *
+ * PUBLIC: int __dbc_get_blob_id __P((DBC *, db_seq_t *));
+ */
+int
+__dbc_get_blob_id(dbc, blob_id)
+ DBC *dbc;
+ db_seq_t *blob_id;
+{
+ DBT key, data;
+ BBLOB bl;
+ HBLOB hbl;
+ HEAPBLOBHDR bhdr;
+ int ret;
+
+ if (dbc->dbtype != DB_BTREE &&
+ dbc->dbtype != DB_HEAP && dbc->dbtype != DB_HASH) {
+ return (EINVAL);
+ }
+
+ ret = 0;
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ /* Get the blob database record instead of the blob. */
+ data.flags |= DB_DBT_BLOB_REC;
+
+ /*
+ * It would be great if there was a more efficient way to do this, but
+ * the complexities of getting a page from a database, especially
+ * when taking into account things like partitions and compression,
+ * make that more trouble than it is worth.
+ */
+ if ((ret = __dbc_get(dbc, &key, &data, DB_CURRENT)) != 0)
+ goto err;
+
+ switch (dbc->dbtype) {
+ case DB_BTREE:
+ if (data.size != BBLOB_SIZE) {
+ ret = EINVAL;
+ goto err;
+ }
+ memcpy(&bl, data.data, BBLOB_SIZE);
+ if (B_TYPE(bl.type) != B_BLOB) {
+ ret = EINVAL;
+ goto err;
+ }
+ *blob_id = (db_seq_t)bl.id;
+ break;
+ case DB_HEAP:
+ if (data.size != HEAPBLOBREC_SIZE) {
+ ret = EINVAL;
+ goto err;
+ }
+ memcpy(&bhdr, data.data, HEAPBLOBREC_SIZE);
+ if (!F_ISSET(&bhdr.std_hdr, HEAP_RECBLOB)) {
+ ret = EINVAL;
+ goto err;
+ }
+ *blob_id = (db_seq_t)bhdr.id;
+ break;
+ case DB_HASH:
+ if (data.size != HBLOB_SIZE) {
+ ret = EINVAL;
+ goto err;
+ }
+ memcpy(&hbl, data.data, HBLOB_SIZE);
+ if (HPAGE_PTYPE(&hbl) != H_BLOB) {
+ ret = EINVAL;
+ goto err;
+ }
+ *blob_id = (db_seq_t)hbl.id;
+ break;
+ default:
+ ret = EINVAL;
+ goto err;
+ }
+
+err: return (ret);
+}
+
+/*
+ * __dbc_get_blob_size --
+ *
+ * Returns the blob file size stored in the data record to which the cursor
+ * currently points. Returns EINVAL if the cursor does not point to a blob
+ * record.
+ *
+ * PUBLIC: int __dbc_get_blob_size __P((DBC *, off_t *));
+ */
+int
+__dbc_get_blob_size(dbc, size)
+ DBC *dbc;
+ off_t *size;
+{
+ DBT key, data;
+ ENV *env;
+ BBLOB bl;
+ HBLOB hbl;
+ HEAPBLOBHDR bhdr;
+ int ret;
+
+ if (dbc->dbtype != DB_BTREE &&
+ dbc->dbtype != DB_HEAP && dbc->dbtype != DB_HASH) {
+ return (EINVAL);
+ }
+
+ env = dbc->env;
+ ret = 0;
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ /* Get the blob database record instead of the blob. */
+ data.flags |= DB_DBT_BLOB_REC;
+
+ /*
+ * It would be great if there was a more efficient way to do this, but
+ * the complexities of getting a page from a database, especially
+ * when taking into account things like partitions and compression,
+ * make that more trouble than it is worth.
+ */
+ if ((ret = __dbc_get(dbc, &key, &data, DB_CURRENT)) != 0)
+ goto err;
+
+ switch (dbc->dbtype) {
+ case DB_BTREE:
+ if (data.size != BBLOB_SIZE) {
+ ret = EINVAL;
+ goto err;
+ }
+ memcpy(&bl, data.data, BBLOB_SIZE);
+ if (B_TYPE(bl.type) != B_BLOB) {
+ ret = EINVAL;
+ goto err;
+ }
+ GET_BLOB_SIZE(env, bl, *size, ret);
+ break;
+ case DB_HEAP:
+ if (data.size != HEAPBLOBREC_SIZE) {
+ ret = EINVAL;
+ goto err;
+ }
+ memcpy(&bhdr, data.data, HEAPBLOBREC_SIZE);
+ if (!F_ISSET(&bhdr.std_hdr, HEAP_RECBLOB)) {
+ ret = EINVAL;
+ goto err;
+ }
+ GET_BLOB_SIZE(env, bhdr, *size, ret);
+ break;
+ case DB_HASH:
+ if (data.size != HBLOB_SIZE) {
+ ret = EINVAL;
+ goto err;
+ }
+ memcpy(&hbl, data.data, HBLOB_SIZE);
+ if (HPAGE_PTYPE(&hbl) != H_BLOB) {
+ ret = EINVAL;
+ goto err;
+ }
+ GET_BLOB_SIZE(env, hbl, *size, ret);
+ break;
+ default:
+ ret = EINVAL;
+ goto err;
+ }
+
+err: return (ret);
+}
+
+/*
+ * __dbc_set_blob_size --
+ *
+ * Sets the blob file size in the data record to which the cursor
+ * currently points. Returns EINVAL if the cursor does not point to a blob
+ * record.
+ *
+ * PUBLIC: int __dbc_set_blob_size __P((DBC *, off_t));
+ */
+int
+__dbc_set_blob_size(dbc, size)
+ DBC *dbc;
+ off_t size;
+{
+ DBT key, data;
+ BBLOB *bl;
+ HBLOB *hbl;
+ HEAPBLOBHDR *bhdr;
+ int ret;
+
+ if (dbc->dbtype != DB_BTREE &&
+ dbc->dbtype != DB_HEAP && dbc->dbtype != DB_HASH) {
+ return (EINVAL);
+ }
+
+ ret = 0;
+ memset(&key, 0, sizeof(DBT));
+ memset(&data, 0, sizeof(DBT));
+ /* Get the blob database record instead of the blob. */
+ data.flags |= DB_DBT_BLOB_REC;
+
+ /*
+ * It would be great if there was a more efficient way to do this, but
+ * the complexities of getting a page from a database, especially
+ * when taking into account things like partitions and compression,
+ * make that more trouble than it is worth.
+ */
+ if ((ret = __dbc_get(dbc, &key, &data, DB_CURRENT)) != 0)
+ goto err;
+
+ switch (dbc->dbtype) {
+ case DB_BTREE:
+ bl = (BBLOB *)data.data;
+ if (bl == NULL ||
+ B_TYPE(bl->type) != B_BLOB || data.size != BBLOB_SIZE) {
+ ret = EINVAL;
+ goto err;
+ }
+ SET_BLOB_SIZE(bl, size, BBLOB);
+ break;
+ case DB_HEAP:
+ bhdr = (HEAPBLOBHDR *)data.data;
+ if (bhdr == NULL ||
+ !F_ISSET(&bhdr->std_hdr, HEAP_RECBLOB) ||
+ data.size != HEAPBLOBREC_SIZE) {
+ ret = EINVAL;
+ goto err;
+ }
+ SET_BLOB_SIZE(bhdr, size, HEAPBLOBHDR);
+ break;
+ case DB_HASH:
+ hbl = data.data;
+ if (hbl == NULL ||
+ HPAGE_PTYPE(hbl) != H_BLOB || data.size != HBLOB_SIZE) {
+ ret = EINVAL;
+ goto err;
+ }
+ SET_BLOB_SIZE((HBLOB *)hbl, size, HBLOB);
+ break;
+ default:
+ ret = EINVAL;
+ goto err;
+ }
+
+ if ((ret = __dbc_put(dbc, &key, &data, DB_CURRENT)) != 0)
+ goto err;
+
+err: return (ret);
+}
+
#ifdef HAVE_COMPRESSION
/*
* __dbc_bulk_del --
@@ -632,6 +943,12 @@ __dbc_idup(dbc_orig, dbcp, flags)
int_n->stream_off = int_orig->stream_off;
int_n->stream_curr_pgno = int_orig->stream_curr_pgno;
+#ifdef HAVE_PARTITION
+ if (DB_IS_PARTITIONED(dbp)) {
+ if ((ret = __partc_dup(dbc_orig, dbc_n)) != 0)
+ goto err;
+ } else
+#endif
switch (dbc_orig->dbtype) {
case DB_QUEUE:
if ((ret = __qamc_dup(dbc_orig, dbc_n)) != 0)
@@ -859,7 +1176,11 @@ __dbc_iget(dbc, key, data, flags)
* we acquire a write lock in the primary tree and no locks in the
* off-page dup tree. If the DB_RMW flag was specified and the get
* operation is done in an off-page duplicate tree, call the primary
- * cursor's upgrade routine first.
+ * cursor's upgrade routine first. We fetch the primary tree's data
+ * page to follow the buffer latching order rules for btrees: latch from
+ * the top of the main tree down, even when also searching OPD trees.
+ * Deadlocks could otherwise occur if we need to fetch the main page
+ * while an OPD page is latched. [#22532]
*/
cp = dbc->internal;
if (cp->opd != NULL &&
@@ -868,6 +1189,10 @@ __dbc_iget(dbc, key, data, flags)
flags == DB_PREV || flags == DB_PREV_DUP)) {
if (tmp_rmw && (ret = dbc->am_writelock(dbc)) != 0)
goto err;
+ if (cp->page == NULL && (ret = __memp_fget(mpf, &cp->pgno,
+ dbc->thread_info, dbc->txn, 0, &cp->page)) != 0)
+ goto err;
+
if (F_ISSET(dbc, DBC_TRANSIENT))
opd = cp->opd;
else if ((ret = __dbc_idup(cp->opd, &opd, DB_POSITION)) != 0)
@@ -1660,7 +1985,7 @@ __dbc_put_secondaries(dbc,
tskeyp, &oldpkey, rmw | DB_SET);
if (ret == 0) {
cmp = __bam_defcmp(sdbp,
- &oldpkey, pkey);
+ &oldpkey, pkey, NULL);
__os_ufree(env, oldpkey.data);
/*
* If the secondary key is unchanged,
@@ -1868,7 +2193,7 @@ __dbc_put_primary(dbc, key, data, flags)
olddata.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM;
ret = __dbc_get(dbc, key, &olddata, DB_SET);
if (ret == 0) {
- ret = DB_KEYEXIST;
+ ret = DBC_ERR(dbc, DB_KEYEXIST);
goto done;
} else if (ret != DB_NOTFOUND && ret != DB_KEYEMPTY)
goto err;
@@ -2100,7 +2425,7 @@ __dbc_iput(dbc, key, data, flags)
if (dbc->dbtype == DB_HASH && F_ISSET(
((BTREE_CURSOR *)(dbc->internal->opd->internal)),
C_DELETED)) {
- ret = DB_NOTFOUND;
+ ret = DBC_ERR(dbc, DB_NOTFOUND);
goto err;
}
@@ -2228,7 +2553,7 @@ __dbc_del_oldskey(sdbp, dbc, skey, pkey, olddata)
*/
for (i = 0, tskeyp = skey; i < nskey; i++, tskeyp++)
if (((BTREE *)sdbp->bt_internal)->bt_compare(sdbp,
- toldskeyp, tskeyp) == 0) {
+ toldskeyp, tskeyp, NULL) == 0) {
nsame++;
F_CLR(tskeyp, DB_DBT_ISSET);
break;
@@ -2382,12 +2707,14 @@ __dbc_cleanup(dbc, dbc_n, failed)
* cursors.
*/
if (!failed && ret == 0) {
+ MUTEX_LOCK(dbp->env, dbp->mutex);
if (opd != NULL)
opd->internal->pdbc = dbc;
if (internal->opd != NULL)
internal->opd->internal->pdbc = dbc_n;
dbc->internal = dbc_n->internal;
dbc_n->internal = internal;
+ MUTEX_UNLOCK(dbp->env, dbp->mutex);
}
/*
@@ -3501,6 +3828,32 @@ __db_check_skeyset(sdbp, skeyp)
for (key2 = key1 + 1; key2 < last_key; key2++)
DB_ASSERT(env,
((BTREE *)sdbp->bt_internal)->bt_compare(sdbp,
- key1, key2) != 0);
+ key1, key2, NULL) != 0);
+}
+#endif
+
+#ifdef HAVE_ERROR_HISTORY
+/*
+ * __dbc_diags
+ * Save the context which triggers the "first notice" of an error code;
+ * i.e., its creation. It doesn't touch anything when err == 0.
+ *
+ * PUBLIC: int __dbc_diags __P((DBC *, int));
+ */
+ int
+ __dbc_diags(dbc, err)
+ DBC *dbc;
+ int err;
+{
+ DB_MSGBUF *mb;
+
+ if (err != 0 && dbc->env != NULL &&
+ (mb = __db_deferred_get()) != NULL) {
+ (void)__db_remember_context(dbc->env, mb, err);
+ __db_msgadd(dbc->env, mb, "DB: %s:%s\n" ,
+ dbc->dbp->fname == NULL ? "in-mem" : dbc->dbp->fname,
+ dbc->dbp->dname == NULL ? "" : dbc->dbp->fname);
+ }
+ return (err);
}
#endif
diff --git a/src/db/db_cds.c b/src/db/db_cds.c
index 185d5487..d3cc990a 100644
--- a/src/db/db_cds.c
+++ b/src/db/db_cds.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2000, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -43,7 +43,15 @@ static int
__cdsgroup_abort(txn)
DB_TXN *txn;
{
- return (__cdsgroup_notsup(txn->mgrp->env, "abort"));
+ ENV *env;
+
+ env = txn->mgrp->env;
+ /*
+ * As the txn handle can not be used any more, we call
+ * __cdsgroup_commit to release the lock and destroy the handle.
+ */
+ (void)__cdsgroup_commit(txn, 0);
+ return (__cdsgroup_notsup(env, "abort"));
}
static int
@@ -83,8 +91,16 @@ static int __cdsgroup_discard(txn, flags)
DB_TXN *txn;
u_int32_t flags;
{
+ ENV *env;
+
COMPQUIET(flags, 0);
- return (__cdsgroup_notsup(txn->mgrp->env, "discard"));
+ env = txn->mgrp->env;
+ /*
+ * As the txn handle can not be used any more, we call
+ * __cdsgroup_commit to release the lock and destroy the handle.
+ */
+ (void)__cdsgroup_commit(txn, 0);
+ return (__cdsgroup_notsup(env, "discard"));
}
static u_int32_t __cdsgroup_id(txn)
diff --git a/src/db/db_compact.c b/src/db/db_compact.c
index d0f4801e..afe5a997 100644
--- a/src/db/db_compact.c
+++ b/src/db/db_compact.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -262,9 +262,11 @@ err: if (txn_local && txn != NULL) {
done: if (LF_ISSET(DB_FREE_SPACE)) {
DBMETA *meta;
db_pgno_t pgno;
+ int pgs_done;
pgno = PGNO_BASE_MD;
isdone = 1;
+ pgs_done = 0;
if (ret == 0 && !LF_ISSET(DB_FREELIST_ONLY) &&
__memp_fget(dbp->mpf, &pgno, ip, txn, 0, &meta) == 0) {
isdone = meta->free == PGNO_INVALID;
@@ -281,7 +283,8 @@ done: if (LF_ISSET(DB_FREE_SPACE)) {
} else
#endif
if (!isdone)
- ret = __bam_truncate_ipages(dbp, ip, txn_orig, c_data);
+ ret = __bam_truncate_ipages(dbp,
+ ip, txn_orig, c_data, &pgs_done);
/* Clean up the free list. */
if (list != NULL)
@@ -387,17 +390,26 @@ err: if (dbc != NULL && (t_ret = __LPUT(dbc, lock)) != 0 && ret == 0)
#endif
/*
- * __db_exchange_page -- swap a page with a lower numbered page.
- * The routine will optionally free the higher numbered page. The cursor
- * has a stack which includes at least the immediate parent of this page.
- * PUBLIC: int __db_exchange_page __P((DBC *, PAGE **, PAGE *, db_pgno_t, int));
+ * __db_exchange_page -- try to move a page 'down', to earlier in the file.
+ *
+ * This tries to move a page to a lower location the file, by swapping it
+ * with an earlier free page. The free page comes either from the free list or
+ * the newpgno parameter (e.g., __ham_compact_hash()). If the new page turns
+ * out to be higher than the original one, the allocation is undone and
+ * the caller is left unchanged. After a successful swap, this routine can
+ * optionally free the old, higher numbered page.
+ * The cursor's stack includes at least the immediate parent of this page.
+ *
+ * PUBLIC: int __db_exchange_page
+ * PUBLIC: __P((DBC *, PAGE **, PAGE *, db_pgno_t, int, int *));
*/
int
-__db_exchange_page(dbc, pgp, opg, newpgno, flags)
+__db_exchange_page(dbc, pgp, opg, newpgno, flags, pgs_donep)
DBC *dbc;
PAGE **pgp, *opg;
db_pgno_t newpgno;
int flags;
+ int *pgs_donep;
{
BTREE_CURSOR *cp;
DB *dbp;
@@ -445,7 +457,9 @@ __db_exchange_page(dbc, pgp, opg, newpgno, flags)
* are allocating at the same time, if so, just put it back.
*/
if (PGNO(newpage) > PGNO(*pgp)) {
- /* Its unfortunate but you can't just free a new overflow. */
+ /* It is unfortunate but you can't just free a new overflow. */
+ /* XXX Is the above comment still true? */
+ /* XXX Should __db_new(OVERFLOW) zero OV_LEN()? */
if (TYPE(newpage) == P_OVERFLOW)
OV_LEN(newpage) = 0;
if ((ret = __LPUT(dbc, lock)) != 0)
@@ -572,7 +586,9 @@ __db_exchange_page(dbc, pgp, opg, newpgno, flags)
if ((ret = __TLPUT(dbc, lock)) != 0)
return (ret);
-done: return (0);
+done:
+ (*pgs_donep)++;
+ return (0);
err: (void)__memp_fput(dbp->mpf, dbc->thread_info, newpage, dbc->priority);
(void)__TLPUT(dbc, lock);
@@ -584,15 +600,16 @@ err: (void)__memp_fput(dbp->mpf, dbc->thread_info, newpage, dbc->priority);
* Walk the pages of an overflow chain and swap out
* high numbered pages. We are passed the first page
* but only deal with the second and subsequent pages.
- * PUBLIC: int __db_truncate_overflow __P((DBC *,
- * PUBLIC: db_pgno_t, PAGE **, DB_COMPACT *));
+ * PUBLIC: int __db_truncate_overflow __P((DBC *, db_pgno_t,
+ * PUBLIC: PAGE **, DB_COMPACT *, int *));
*/
int
-__db_truncate_overflow(dbc, pgno, ppg, c_data)
+__db_truncate_overflow(dbc, pgno, ppg, c_data, pgs_donep)
DBC *dbc;
db_pgno_t pgno;
PAGE **ppg;
DB_COMPACT *c_data;
+ int *pgs_donep;
{
DB *dbp;
DB_LOCK lock;
@@ -618,7 +635,7 @@ __db_truncate_overflow(dbc, pgno, ppg, c_data)
return (ret);
if (pgno <= c_data->compact_truncate)
continue;
- if (have_lock == 0) {
+ if (!have_lock) {
DB_ASSERT(dbp->env, ppg != NULL);
ppgno = PGNO(*ppg);
if ((ret = __memp_fput(dbp->mpf, dbc->thread_info,
@@ -635,30 +652,32 @@ __db_truncate_overflow(dbc, pgno, ppg, c_data)
have_lock = 1;
}
if ((ret = __db_exchange_page(dbc,
- &page, NULL, PGNO_INVALID, DB_EXCH_FREE)) != 0)
+ &page, NULL, PGNO_INVALID, DB_EXCH_FREE, pgs_donep)) != 0)
break;
}
err: if (page != NULL &&
- (t_ret = __memp_fput( dbp->mpf,
+ (t_ret = __memp_fput(dbp->mpf,
dbc->thread_info, page, dbc->priority)) != 0 && ret == 0)
ret = t_ret;
if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0)
ret = t_ret;
return (ret);
}
+
/*
* __db_truncate_root -- swap a root page for a lower numbered page.
* PUBLIC: int __db_truncate_root __P((DBC *,
- * PUBLIC: PAGE *, u_int32_t, db_pgno_t *, u_int32_t));
+ * PUBLIC: PAGE *, u_int32_t, db_pgno_t *, u_int32_t, int *));
*/
int
-__db_truncate_root(dbc, ppg, indx, pgnop, tlen)
+__db_truncate_root(dbc, ppg, indx, pgnop, tlen, pgs_donep)
DBC *dbc;
PAGE *ppg;
u_int32_t indx;
db_pgno_t *pgnop;
u_int32_t tlen;
+ int *pgs_donep;
{
DB *dbp;
DBT orig;
@@ -693,7 +712,7 @@ __db_truncate_root(dbc, ppg, indx, pgnop, tlen)
} else {
LOCK_CHECK_OFF(dbc->thread_info);
ret = __db_exchange_page(dbc,
- &page, NULL, PGNO_INVALID, DB_EXCH_FREE);
+ &page, NULL, PGNO_INVALID, DB_EXCH_FREE, pgs_donep);
LOCK_CHECK_ON(dbc->thread_info);
if (ret != 0)
goto err;
@@ -705,8 +724,7 @@ __db_truncate_root(dbc, ppg, indx, pgnop, tlen)
/* Update the reference. */
if (DBC_LOGGING(dbc)) {
- if ((ret = __db_pgno_log(dbp,
- dbc->txn, &LSN(ppg), 0, PGNO(ppg),
+ if ((ret = __db_pgno_log(dbp, dbc->txn, &LSN(ppg), 0, PGNO(ppg),
&LSN(ppg), (u_int32_t)indx, *pgnop, newpgno)) != 0)
goto err;
} else
@@ -780,13 +798,13 @@ __db_find_free(dbc, type, size, bstart, freep)
goto err;
if (nelems == 0) {
- ret = DB_NOTFOUND;
+ ret = DBC_ERR(dbc, DB_NOTFOUND);
goto err;
}
for (i = 0; i < nelems; i++) {
if (list[i] > bstart) {
- ret = DB_NOTFOUND;
+ ret = DBC_ERR(dbc, DB_NOTFOUND);
goto err;
}
start = i;
@@ -812,7 +830,7 @@ __db_find_free(dbc, type, size, bstart, freep)
goto found;
}
}
- ret = DB_NOTFOUND;
+ ret = DBC_ERR(dbc, DB_NOTFOUND);
goto err;
found: /* We have size range of pages. Remove them. */
@@ -1005,13 +1023,15 @@ err: if (np != NULL && np != otherp)
* __db_move_metadata -- move a meta data page to a lower page number.
* The meta data page must be exclusively latched on entry.
*
- * PUBLIC: int __db_move_metadata __P((DBC *, DBMETA **, DB_COMPACT *));
+ * PUBLIC: int __db_move_metadata
+ * PUBLIC: __P((DBC *, DBMETA **, DB_COMPACT *, int *));
*/
int
-__db_move_metadata(dbc, metap, c_data)
+__db_move_metadata(dbc, metap, c_data, pgs_donep)
DBC *dbc;
DBMETA **metap;
DB_COMPACT *c_data;
+ int *pgs_donep;
{
BTREE *bt;
DB *dbp, *mdbp;
@@ -1023,7 +1043,7 @@ __db_move_metadata(dbc, metap, c_data)
c_data->compact_pages_examine++;
if ((ret = __db_exchange_page(dbc,
- (PAGE**)metap, NULL, PGNO_INVALID, DB_EXCH_FREE)) != 0)
+ (PAGE **)metap, NULL, PGNO_INVALID, DB_EXCH_FREE, pgs_donep)) != 0)
return (ret);
if (PGNO(*metap) == dbp->meta_pgno)
diff --git a/src/db/db_conv.c b/src/db/db_conv.c
index 210b4d6e..77c6b760 100644
--- a/src/db/db_conv.c
+++ b/src/db/db_conv.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -487,8 +487,12 @@ __db_byteswap(dbp, pg, h, pagesize, pgin)
{
ENV *env;
BINTERNAL *bi;
+ BBLOB *bl;
BKEYDATA *bk;
BOVERFLOW *bo;
+ HEAPBLOBHDR *bhdr;
+ HEAPHDR *hh;
+ HEAPSPLITHDR *hsh;
RINTERNAL *ri;
db_indx_t i, *inp, len, tmp;
u_int8_t *end, *p, *pgend;
@@ -500,8 +504,14 @@ __db_byteswap(dbp, pg, h, pagesize, pgin)
M_32_SWAP(h->lsn.file);
M_32_SWAP(h->lsn.offset);
M_32_SWAP(h->pgno);
- M_32_SWAP(h->prev_pgno);
- M_32_SWAP(h->next_pgno);
+ if (TYPE(h) == P_HEAP) {
+ M_32_SWAP(((HEAPPG *)h)->high_pgno);
+ M_16_SWAP(((HEAPPG *)h)->high_indx);
+ M_16_SWAP(((HEAPPG *)h)->free_indx);
+ } else {
+ M_32_SWAP(h->prev_pgno);
+ M_32_SWAP(h->next_pgno);
+ }
M_16_SWAP(h->entries);
M_16_SWAP(h->hf_offset);
}
@@ -527,6 +537,14 @@ __db_byteswap(dbp, pg, h, pagesize, pgin)
continue;
switch (HPAGE_TYPE(dbp, h, i)) {
+ case H_BLOB:
+ p = HBLOB_ID(P_ENTRY(dbp, h, i));
+ SWAP64(p); /* id */
+ SWAP64(p); /* size */
+ p = HBLOB_FILE_ID(P_ENTRY(dbp, h, i));
+ SWAP64(p); /* file id */
+ SWAP64(p); /* sdb id */
+ break;
case H_KEYDATA:
break;
case H_DUPLICATE:
@@ -599,6 +617,14 @@ __db_byteswap(dbp, pg, h, pagesize, pgin)
if ((u_int8_t *)bk >= pgend)
continue;
switch (B_TYPE(bk->type)) {
+ case B_BLOB:
+ bl = (BBLOB *)bk;
+ M_16_SWAP(bl->len);
+ M_64_SWAP(bl->id); /* id */
+ M_64_SWAP(bl->size); /* size */
+ M_64_SWAP(bl->file_id); /* file id */
+ M_64_SWAP(bl->sdb_id); /* sdb id */
+ break;
case B_KEYDATA:
M_16_SWAP(bk->len);
break;
@@ -663,6 +689,32 @@ __db_byteswap(dbp, pg, h, pagesize, pgin)
}
break;
case P_HEAP:
+ for (i = 0; i <= HEAP_HIGHINDX(h); i++) {
+ if (pgin)
+ M_16_SWAP(inp[i]);
+ if (inp[i] == 0)
+ continue;
+
+ hh = (HEAPHDR *)P_ENTRY(dbp, h, i);
+ if ((u_int8_t *)hh >= pgend)
+ continue;
+ M_16_SWAP(hh->size);
+ if (F_ISSET(hh, HEAP_RECSPLIT)) {
+ hsh = (HEAPSPLITHDR *)hh;
+ M_32_SWAP(hsh->tsize);
+ M_32_SWAP(hsh->nextpg);
+ M_16_SWAP(hsh->nextindx);
+ } else if (F_ISSET(hh, HEAP_RECBLOB)) {
+ bhdr = (HEAPBLOBHDR *)hh;
+ M_64_SWAP(bhdr->id); /* id */
+ M_64_SWAP(bhdr->size); /* size */
+ M_64_SWAP(bhdr->file_id); /* file id */
+ }
+
+ if (!pgin)
+ M_16_SWAP(inp[i]);
+ }
+ break;
case P_IHEAP:
case P_INVALID:
case P_OVERFLOW:
@@ -678,8 +730,14 @@ out: if (!pgin) {
M_32_SWAP(h->lsn.file);
M_32_SWAP(h->lsn.offset);
M_32_SWAP(h->pgno);
- M_32_SWAP(h->prev_pgno);
- M_32_SWAP(h->next_pgno);
+ if (TYPE(h) == P_HEAP) {
+ M_32_SWAP(((HEAPPG *)h)->high_pgno);
+ M_16_SWAP(((HEAPPG *)h)->high_indx);
+ M_16_SWAP(((HEAPPG *)h)->free_indx);
+ } else {
+ M_32_SWAP(h->prev_pgno);
+ M_32_SWAP(h->next_pgno);
+ }
M_16_SWAP(h->entries);
M_16_SWAP(h->hf_offset);
}
@@ -718,7 +776,10 @@ __db_pageswap(env, dbp, pp, len, pdata, pgin)
case P_HASHMETA:
return (__ham_mswap(env, pp));
-
+#ifdef HAVE_HEAP
+ case P_HEAPMETA:
+ return (__heap_mswap(env, pp));
+#endif
case P_QAMMETA:
return (__qam_mswap(env, pp));
@@ -794,12 +855,17 @@ __db_recordswap(op, size, hdr, data, pgin)
void *hdr, *data;
u_int32_t pgin;
{
+ BBLOB *bl;
BKEYDATA *bk;
BOVERFLOW *bo;
BINTERNAL *bi;
+ DBT *dbt;
+ HEAPHDR *hh;
+ HEAPBLOBHDR bhdr;
+ HEAPSPLITHDR *hsh;
RINTERNAL *ri;
db_indx_t tmp;
- u_int8_t *p, *end;
+ u_int8_t buf[HEAPBLOBREC_SIZE], *end, *p;
if (size == 0)
return;
@@ -812,6 +878,14 @@ __db_recordswap(op, size, hdr, data, pgin)
case B_KEYDATA:
M_16_SWAP(bk->len);
break;
+ case B_BLOB:
+ bl = (BBLOB *)bk;
+ M_16_SWAP(bl->len);
+ M_64_SWAP(bl->id); /* id */
+ M_64_SWAP(bl->size); /* size */
+ M_64_SWAP(bl->file_id); /* file id */
+ M_64_SWAP(bl->sdb_id); /* sdb id */
+ break;
case B_DUPLICATE:
case B_OVERFLOW:
bo = (BOVERFLOW *)hdr;
@@ -835,6 +909,7 @@ __db_recordswap(op, size, hdr, data, pgin)
} else
bo = (BOVERFLOW *)data;
M_32_SWAP(bo->pgno);
+ M_32_SWAP(bo->tlen);
}
break;
case P_IRECNO:
@@ -867,10 +942,10 @@ __db_recordswap(op, size, hdr, data, pgin)
SWAP16(p);
}
break;
- /* These two record types include the full header. */
+ /* These three record types include the full header. */
case H_OFFDUP:
p = (u_int8_t *)hdr;
- p += SSZ(HOFFPAGE, pgno);
+ p += SSZ(HOFFDUP, pgno);
SWAP32(p); /* pgno */
break;
case H_OFFPAGE:
@@ -879,11 +954,61 @@ __db_recordswap(op, size, hdr, data, pgin)
SWAP32(p); /* pgno */
SWAP32(p); /* tlen */
break;
+ case H_BLOB:
+ p = HBLOB_ID(hdr);
+ SWAP64(p); /* id */
+ SWAP64(p); /* size */
+ p = HBLOB_FILE_ID(hdr);
+ SWAP64(p); /* file id */
+ SWAP64(p); /* sdb id */
+ break;
default:
DB_ASSERT(NULL, op != op);
}
break;
-
+ case P_HEAP:
+ hh = (HEAPHDR *)hdr;
+ M_16_SWAP(hh->size);
+ if (F_ISSET(hh, HEAP_RECSPLIT)) {
+ hsh = (HEAPSPLITHDR *)hdr;
+ M_32_SWAP(hsh->tsize);
+ M_32_SWAP(hsh->nextpg);
+ M_16_SWAP(hsh->nextindx);
+ }else if (F_ISSET(hh, HEAP_RECBLOB)) {
+ /*
+ * Heap blob records are broken into two parts when
+ * logged, the shared header and the part that is
+ * unique to blob records, which is stored in the
+ * log data field.
+ */
+ if (data != NULL) {
+ dbt = NULL;
+ if (pgin) {
+ dbt = data;
+ memcpy(buf + sizeof(HEAPHDR),
+ dbt->data, HEAPBLOBREC_DSIZE);
+ } else {
+ memcpy(buf + sizeof(HEAPHDR),
+ data, HEAPBLOBREC_DSIZE);
+ }
+ memcpy(&bhdr, buf, HEAPBLOBREC_SIZE);
+ M_64_SWAP(bhdr.id); /* id */
+ M_64_SWAP(bhdr.size); /* size */
+ M_64_SWAP(bhdr.file_id); /* file id */
+ memcpy(buf, &bhdr, HEAPBLOBREC_SIZE);
+ if (pgin) {
+ memcpy(dbt->data,
+ HEAPBLOBREC_DATA(buf),
+ HEAPBLOBREC_DSIZE);
+ } else {
+ memcpy(data,
+ HEAPBLOBREC_DATA(buf),
+ HEAPBLOBREC_DSIZE);
+ }
+ }
+ break;
+ }
+ break;
default:
DB_ASSERT(NULL, op != op);
}
diff --git a/src/db/db_copy.c b/src/db/db_copy.c
index 359c74be..d9786702 100644
--- a/src/db/db_copy.c
+++ b/src/db/db_copy.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2011, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
diff --git a/src/db/db_dispatch.c b/src/db/db_dispatch.c
index 06de4ef7..7cb7f9ca 100644
--- a/src/db/db_dispatch.c
+++ b/src/db/db_dispatch.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 1995, 1996
@@ -639,7 +639,7 @@ __db_txnlist_find(env, hp, txnid, statusp)
DB_TXNLIST *entry;
if (txnid == 0)
- return (DB_NOTFOUND);
+ return (USR_ERR(env, DB_NOTFOUND));
return (__db_txnlist_find_internal(env, hp,
TXNLIST_TXNID, txnid, &entry, 0, statusp));
@@ -666,7 +666,7 @@ __db_txnlist_update(env, hp, txnid, status, lsn, ret_status, add_ok)
int ret;
if (txnid == 0)
- return (DB_NOTFOUND);
+ return (USR_ERR(env, DB_NOTFOUND));
ret = __db_txnlist_find_internal(env,
hp, TXNLIST_TXNID, txnid, &elp, 0, ret_status);
@@ -715,7 +715,7 @@ __db_txnlist_find_internal(env,
ret = 0;
if (hp == NULL)
- return (DB_NOTFOUND);
+ return (USR_ERR(env, DB_NOTFOUND));
switch (type) {
case TXNLIST_TXNID:
@@ -759,7 +759,7 @@ __db_txnlist_find_internal(env,
return (ret);
}
- return (DB_NOTFOUND);
+ return (USR_ERR(env, DB_NOTFOUND));
}
/*
diff --git a/src/db/db_dup.c b/src/db/db_dup.c
index 9fd04791..e66ec92b 100644
--- a/src/db/db_dup.c
+++ b/src/db/db_dup.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
diff --git a/src/db/db_iface.c b/src/db/db_iface.c
index 59e0ba53..da6140a4 100644
--- a/src/db/db_iface.c
+++ b/src/db/db_iface.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -159,9 +159,15 @@ __db_associate_arg(dbp, sdbp, callback, flags)
env = dbp->env;
+ if (dbp->blob_threshold || sdbp->blob_threshold) {
+ __db_errx(env, DB_STR("0751",
+ "Secondary and primary databases cannot support blobs."));
+ return (EINVAL);
+ }
+
if (sdbp->type == DB_HEAP) {
- __db_errx(env,
- "Heap databases may not be used as secondary databases");
+ __db_errx(env, DB_STR("0752",
+ "Heap databases may not be used as secondary databases"));
return (EINVAL);
}
@@ -288,6 +294,7 @@ __db_cursor_pp(dbp, txn, dbcp, flags)
int rep_blocked, ret;
env = dbp->env;
+ (*dbcp) = NULL;
DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->cursor");
@@ -331,7 +338,8 @@ __db_cursor_pp(dbp, txn, dbcp, flags)
* If a family transaction was passed in, the transaction handle in
* the cursor may not match.
*/
- txn = (*dbcp)->txn;
+ if ((*dbcp) != NULL)
+ txn = (*dbcp)->txn;
if (txn != NULL && ret == 0)
TAILQ_INSERT_HEAD(&(txn->my_cursors), *dbcp, txn_cursors);
@@ -434,6 +442,13 @@ __db_cursor_arg(dbp, flags)
return (__db_fnl(env, "DB->cursor"));
}
+ if (dbp->blob_threshold &&
+ LF_ISSET(DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT)) {
+ __db_errx(dbp->env, DB_STR("0753",
+"Blob enabled databases do not support READ_UNCOMMITTED and TXN_SNAPSHOT."));
+ return (EINVAL);
+ }
+
LF_CLR(DB_CURSOR_BULK |
DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT);
@@ -828,6 +843,12 @@ __db_get_arg(dbp, key, data, flags)
env = dbp->env;
+ if (dbp->blob_threshold && LF_ISSET(DB_READ_UNCOMMITTED)) {
+ __db_errx(env, DB_STR("0754",
+ "Blob enabled databases do not support DB_READ_UNCOMMITTED."));
+ return (EINVAL);
+ }
+
/*
* Check for read-modify-write validity. DB_RMW doesn't make sense
* with CDB cursors since if you're going to write the cursor, you
@@ -876,6 +897,9 @@ __db_get_arg(dbp, key, data, flags)
break;
case DB_CONSUME:
case DB_CONSUME_WAIT:
+ if (DB_IS_READONLY(dbp))
+ return (__db_rdonly(env,
+ "DB->get CONSUME/CONSUME_WAIT"));
if (dirty) {
__db_errx(env, DB_STR_A("0583",
"%s is not supported with DB_CONSUME or DB_CONSUME_WAIT",
@@ -1148,6 +1172,13 @@ __db_open_pp(dbp, txn, fname, dname, type, flags, mode)
/* Save the current DB handle flags for refresh. */
dbp->orig_flags = dbp->flags;
+ if (fname == 0 && PREFMAS_IS_SET(env)) {
+ __db_errx(env, DB_STR("0783", "In-memory databases are not "
+ "supported in Replication Manager preferred master mode"));
+ ret = EINVAL;
+ goto err;
+ }
+
/* Check for replication block. */
handle_check = IS_ENV_REPLICATED(env);
if (handle_check &&
@@ -1389,6 +1420,18 @@ __db_open_arg(dbp, txn, fname, dname, type, flags)
return (EINVAL);
}
+ if (LF_ISSET(DB_MULTIVERSION) && dbp->blob_threshold) {
+ __db_errx(env, DB_STR("0755",
+ "DB_MULTIVERSION illegal with blob enabled databases"));
+ return (EINVAL);
+ }
+
+ if (LF_ISSET(DB_READ_UNCOMMITTED) && dbp->blob_threshold) {
+ __db_errx(env, DB_STR("0756",
+ "DB_READ_UNCOMMITTED illegal with blob enabled databases"));
+ return (EINVAL);
+ }
+
/* DB_TRUNCATE is neither transaction recoverable nor lockable. */
if (LF_ISSET(DB_TRUNCATE) && (LOCKING_ON(env) || txn != NULL)) {
__db_errx(env, DB_STR_A("0599",
@@ -1901,8 +1944,6 @@ __db_compact_pp(dbp, txn, start, stop, c_data, flags, end)
ret = __db_compact_int(dbp, ip,
txn, start, stop, dp, flags, end);
break;
- case DB_HEAP:
- break;
default:
ret = __dbh_am_chk(dbp, DB_OK_BTREE);
break;
@@ -2893,7 +2934,7 @@ __dbt_ferr(dbp, name, dbt, check_thread)
* database, without having to clear flags.
*/
if ((ret = __db_fchk(env, name, dbt->flags,
- DB_DBT_APPMALLOC | DB_DBT_BULK | DB_DBT_DUPOK |
+ DB_DBT_APPMALLOC | DB_DBT_BLOB | DB_DBT_BULK | DB_DBT_DUPOK |
DB_DBT_MALLOC | DB_DBT_REALLOC | DB_DBT_USERCOPY |
DB_DBT_USERMEM | DB_DBT_PARTIAL | DB_DBT_READONLY)) != 0)
return (ret);
diff --git a/src/db/db_join.c b/src/db/db_join.c
index 751cf9e2..24d5260e 100644
--- a/src/db/db_join.c
+++ b/src/db/db_join.c
@@ -1,7 +1,7 @@
/*
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1998, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -717,7 +717,6 @@ __db_join_close(dbc)
DBC *dbc;
{
DB *dbp;
- DB_THREAD_INFO *ip;
ENV *env;
JOIN_CURSOR *jc;
int ret, t_ret;
@@ -737,7 +736,6 @@ __db_join_close(dbc)
TAILQ_REMOVE(&dbp->join_queue, dbc, links);
MUTEX_UNLOCK(env, dbp->mutex);
- ENV_ENTER(env, ip);
/*
* Close any open scratch cursors. In each case, there may
* not be as many outstanding as there are cursors in
@@ -757,7 +755,6 @@ __db_join_close(dbc)
(t_ret = __dbc_close(jc->j_fdupcurs[i])) != 0)
ret = t_ret;
}
- ENV_LEAVE(env, ip);
__os_free(env, jc->j_exhausted);
__os_free(env, jc->j_curslist);
@@ -796,7 +793,7 @@ __db_join_getnext(dbc, key, data, exhausted, opmods)
int ret, cmp;
DB *dbp;
DBT ldata;
- int (*func) __P((DB *, const DBT *, const DBT *));
+ int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
dbp = dbc->dbp;
func = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare;
@@ -812,7 +809,7 @@ __db_join_getnext(dbc, key, data, exhausted, opmods)
if ((ret = __dbc_get(dbc,
key, &ldata, opmods | DB_CURRENT)) != 0)
break;
- cmp = func(dbp, data, &ldata);
+ cmp = func(dbp, data, &ldata, NULL);
if (cmp == 0) {
/*
* We have to return the real data value. Copy
diff --git a/src/db/db_meta.c b/src/db/db_meta.c
index 8f97ebd8..53cf77cc 100644
--- a/src/db/db_meta.c
+++ b/src/db/db_meta.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -939,12 +939,14 @@ done: if (last_pgnop != NULL)
*last_pgnop = meta->last_pgno;
/*
- * The truncate point is the number of pages in the free
- * list back from the last page. The number of pages
- * in the free list are the number that we can swap in.
- * Adjust it down slightly so if we find higher numbered
- * pages early and then free other pages later we can
- * truncate them.
+ * Set the truncation point which determines which pages may be
+ * relocated. Pages above are candidates to be swapped with a lower one
+ * from the freelist by __db_exchange_page(); pages before the truncate
+ * point are not relocated.
+ * The truncation point starts as N pages less than the last_pgno, where
+ * N is the size of the free list. This is reduced by 1/4 in the hope
+ * that partially full pages will be coalesced together, creating
+ * additional free pages during the compact.
*/
if (c_data) {
c_data->compact_truncate = (u_int32_t)meta->last_pgno - nelems;
diff --git a/src/db/db_method.c b/src/db/db_method.c
index 82d03e5f..d807bab6 100644
--- a/src/db/db_method.c
+++ b/src/db/db_method.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -9,6 +9,7 @@
#include "db_config.h"
#include "db_int.h"
+#include "dbinc/blob.h"
#include "dbinc/crypto.h"
#include "dbinc/db_page.h"
#include "dbinc/btree.h"
@@ -36,14 +37,15 @@ static int __db_set_alloc __P((DB *, void *(*)(size_t),
static int __db_get_append_recno __P((DB *,
int (**)(DB *, DBT *, db_recno_t)));
static int __db_set_append_recno __P((DB *, int (*)(DB *, DBT *, db_recno_t)));
+static int __db_get_blob_dir __P((DB *, const char **));
+static int __db_set_blob_dir __P((DB *, const char *));
+static int __db_get_blob_sub_dir __P((DB *, const char **));
static int __db_get_cachesize __P((DB *, u_int32_t *, u_int32_t *, int *));
static int __db_set_cachesize __P((DB *, u_int32_t, u_int32_t, int));
static int __db_get_create_dir __P((DB *, const char **));
static int __db_set_create_dir __P((DB *, const char *));
static int __db_get_dup_compare
- __P((DB *, int (**)(DB *, const DBT *, const DBT *)));
-static int __db_set_dup_compare
- __P((DB *, int (*)(DB *, const DBT *, const DBT *)));
+ __P((DB *, int (**)(DB *, const DBT *, const DBT *, size_t *)));
static int __db_get_encrypt_flags __P((DB *, u_int32_t *));
static int __db_set_encrypt __P((DB *, const char *, u_int32_t));
static int __db_get_feedback __P((DB *, void (**)(DB *, int, int)));
@@ -90,6 +92,12 @@ db_create(dbpp, dbenv, flags)
ip = NULL;
env = dbenv == NULL ? NULL : dbenv->env;
+#ifdef HAVE_ERROR_HISTORY
+ /* Call thread local storage initializer at least once per process. */
+ if (env == NULL)
+ __db_thread_init();
+#endif
+
/* Check for invalid function flags. */
switch (flags) {
case 0:
@@ -206,12 +214,11 @@ __db_create_internal(dbpp, env, flags)
err: if (dbp != NULL) {
if (dbp->mpf != NULL)
(void)__memp_fclose(dbp->mpf, 0);
+ if (F_ISSET(env, ENV_DBLOCAL))
+ (void)__env_close(dbp->dbenv, 0);
__os_free(env, dbp);
}
- if (dbp != NULL && F_ISSET(env, ENV_DBLOCAL))
- (void)__env_close(dbp->dbenv, 0);
-
return (ret);
}
@@ -225,6 +232,7 @@ __db_init(dbp, flags)
u_int32_t flags;
{
int ret;
+ u_int32_t bytes;
dbp->locker = NULL;
dbp->alt_close = NULL;
@@ -254,6 +262,9 @@ __db_init(dbp, flags)
dbp->get_alloc = __db_get_alloc;
dbp->get_append_recno = __db_get_append_recno;
dbp->get_assoc_flags = __db_get_assoc_flags;
+ dbp->get_blob_dir = __db_get_blob_dir;
+ dbp->get_blob_sub_dir = __db_get_blob_sub_dir;
+ dbp->get_blob_threshold = __db_get_blob_threshold;
dbp->get_byteswapped = __db_get_byteswapped;
dbp->get_cachesize = __db_get_cachesize;
dbp->get_create_dir = __db_get_create_dir;
@@ -290,6 +301,8 @@ __db_init(dbp, flags)
dbp->rename = __db_rename_pp;
dbp->set_alloc = __db_set_alloc;
dbp->set_append_recno = __db_set_append_recno;
+ dbp->set_blob_dir = __db_set_blob_dir;
+ dbp->set_blob_threshold = __db_set_blob_threshold;
dbp->set_cachesize = __db_set_cachesize;
dbp->set_create_dir = __db_set_create_dir;
dbp->set_dup_compare = __db_set_dup_compare;
@@ -316,7 +329,11 @@ __db_init(dbp, flags)
dbp->verify = __db_verify_pp;
/* DB PUBLIC HANDLE LIST END */
- /* Access method specific. */
+ if ((ret = __env_get_blob_threshold_int(dbp->env, &bytes)) != 0)
+ return (ret);
+ dbp->blob_threshold = bytes;
+
+ /* Access method specific. */
if ((ret = __bam_db_create(dbp)) != 0)
return (ret);
if ((ret = __ham_db_create(dbp)) != 0)
@@ -535,6 +552,182 @@ __db_set_append_recno(dbp, func)
}
/*
+ * __db_get_blob_threshold --
+ * Get the current threshold size at which records are stored as blobs.
+ *
+ * PUBLIC: int __db_get_blob_threshold __P((DB *, u_int32_t *));
+ */
+int
+__db_get_blob_threshold(dbp, bytes)
+ DB *dbp;
+ u_int32_t *bytes;
+{
+ /*
+ * While shared, this value never changes after open, so it is safe
+ * to access it without mutex protection.
+ */
+ *bytes = dbp->blob_threshold;
+
+ return (0);
+}
+
+/*
+ * __db_set_blob_threshold --
+ * API to allow setting the threshold size at which records are stored
+ * as blobs rather than in database items. No flags currently supported.
+ * PUBLIC: int __db_set_blob_threshold __P((DB *, u_int32_t, u_int32_t));
+ */
+int
+__db_set_blob_threshold(dbp, bytes, flags)
+ DB *dbp;
+ u_int32_t bytes;
+ u_int32_t flags;
+{
+ if (__db_fchk(dbp->env, "DB->set_blob_threshold", flags, 0) != 0)
+ return (EINVAL);
+
+ DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_blob_threshold");
+
+ if (bytes != 0 && F_ISSET(dbp,
+ (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_DUP | DB_AM_DUPSORT))) {
+ __db_errx(dbp->env, DB_STR("0760",
+"Cannot enable blobs in databases with checksum, encryption, or duplicates."));
+ return (EINVAL);
+ }
+#ifdef HAVE_COMPRESSION
+ if (DB_IS_COMPRESSED(dbp) && bytes != 0) {
+ __db_errx(dbp->env, DB_STR("0761",
+ "Cannot enable blobs in databases with compression."));
+ return (EINVAL);
+ }
+#endif
+
+ dbp->blob_threshold = bytes;
+
+ return (0);
+}
+
+/*
+ * __db_blobs_enabled --
+ *
+ * Used to tell if the database is configured to support blobs.
+ * PUBLIC: int __db_blobs_enabled __P((DB *));
+ */
+int
+__db_blobs_enabled(dbp)
+ DB *dbp;
+{
+ /* Blob threshold must be non-0. */
+ if (!dbp->blob_threshold)
+ return (0);
+ /* Blobs cannot support encryption or checksum, but that may change. */
+ if (F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT)))
+ return (0);
+ /* Blobs do not support compression, but that may change. */
+#ifdef HAVE_COMPRESSION
+ if (DB_IS_COMPRESSED(dbp))
+ return (0);
+#endif
+ if (dbp->env->dbenv != NULL &&
+ F_ISSET(dbp->env->dbenv, DB_ENV_TXN_SNAPSHOT))
+ return (0);
+ /* Cannot support blobs in recno or queue. */
+ if (dbp->type == DB_RECNO || dbp->type == DB_QUEUE)
+ return (0);
+ /*
+ * Cannot support dups because that would require comparing
+ * blob data items.
+ */
+ if (F_ISSET(dbp, (DB_AM_DUP | DB_AM_DUPSORT)))
+ return (0);
+ /* No place to put blob files when using an in-memory db. */
+ if (F_ISSET(dbp, (DB_AM_INMEM)))
+ return (0);
+
+ /* BDB managed databases should not support blobs. */
+ if ((dbp->fname != NULL && IS_DB_FILE(dbp->fname)) ||
+ (dbp->dname != NULL && IS_DB_FILE(dbp->dname)))
+ return (0);
+
+ return (1);
+}
+
+/*
+ * __db_get_blob_sub_dir --
+ *
+ * Returns the subdirectory of the blob directory in which the blob files
+ * for the given db are stored, or NULL if there is none.
+ *
+ */
+static int
+__db_get_blob_sub_dir(dbp, dir)
+ DB *dbp;
+ const char **dir;
+{
+ DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_blob_sub_dir");
+
+ *dir = dbp->blob_sub_dir;
+
+ return (0);
+}
+
+/*
+ * __db_get_blob_dir --
+ *
+ * Get the blob directory for this database.
+ */
+static int
+__db_get_blob_dir(dbp, dir)
+ DB *dbp;
+ const char **dir;
+{
+ DB_ENV *dbenv;
+ ENV *env;
+
+ env = dbp->env;
+ dbenv = dbp->env->dbenv;
+ *dir = NULL;
+
+ if (dbenv == NULL)
+ return (0);
+
+ if (dbenv->db_blob_dir != NULL)
+ *dir = dbenv->db_blob_dir;
+ else if (env->db_home != NULL)
+ *dir = BLOB_DEFAULT_DIR;
+
+ return (0);
+}
+
+/*
+ * __db_set_blob_dir --
+ *
+ * Set the blob directory in a local environment.
+ */
+static int
+__db_set_blob_dir(dbp, dir)
+ DB *dbp;
+ const char *dir;
+{
+ DB_ENV *dbenv;
+ ENV *env;
+
+ DB_ILLEGAL_IN_ENV(dbp, "DB->set_blob_dir");
+ DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_blob_dir");
+ env = dbp->env;
+ dbenv = dbp->env->dbenv;
+
+ if (dbenv == NULL)
+ return (0);
+
+ if (dbenv->db_blob_dir != NULL)
+ __os_free(env, dbenv->db_blob_dir);
+ dbenv->db_blob_dir = NULL;
+
+ return (__os_strdup(env, dir, &dbenv->db_blob_dir));
+}
+
+/*
* __db_get_cachesize --
* Get underlying cache size.
*/
@@ -607,7 +800,7 @@ __db_get_create_dir(dbp, dirp)
static int
__db_get_dup_compare(dbp, funcp)
DB *dbp;
- int (**funcp) __P((DB *, const DBT *, const DBT *));
+ int (**funcp) __P((DB *, const DBT *, const DBT *, size_t *));
{
DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE | DB_OK_HASH);
@@ -628,11 +821,14 @@ __db_get_dup_compare(dbp, funcp)
/*
* __db_set_dup_compare --
* Set duplicate comparison routine.
+ *
+ * PUBLIC: int __db_set_dup_compare __P((DB *,
+ * PUBLIC: int (*)(DB *, const DBT *, const DBT *, size_t *)));
*/
-static int
+int
__db_set_dup_compare(dbp, func)
DB *dbp;
- int (*func) __P((DB *, const DBT *, const DBT *));
+ int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
{
int ret;
@@ -900,6 +1096,13 @@ __db_set_flags(dbp, flags)
ENV_REQUIRES_CONFIG(env,
env->tx_handle, "DB_NOT_DURABLE", DB_INIT_TXN);
+ if (dbp->blob_threshold &&
+ LF_ISSET(DB_CHKSUM | DB_ENCRYPT | DB_DUP | DB_DUPSORT)) {
+ __db_errx(dbp->env, DB_STR("0763",
+"Cannot enable checksum, encryption, or duplicates with blob support."));
+ return (EINVAL);
+ }
+
__db_map_flags(dbp, &flags, &dbp->flags);
if ((ret = __bam_set_flags(dbp, &flags)) != 0)
diff --git a/src/db/db_open.c b/src/db/db_open.c
index fefda48f..21074b15 100644
--- a/src/db/db_open.c
+++ b/src/db/db_open.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -119,6 +119,15 @@ __db_open(dbp, ip, txn, fname, dname, type, flags, mode, meta_pgno)
goto err;
/*
+ * Silently disabled blobs in databases that cannot support them.
+ * Most illegal configurations will have already been caught, this
+ * is to allow a user to set an environment wide blob threshold, but
+ * not have to explicitly turn it off for in-memory or queue databases.
+ */
+ if (!__db_blobs_enabled(dbp))
+ dbp->blob_threshold = 0;
+
+ /*
* If both fname and subname are NULL, it's always a create, so make
* sure that we have both DB_CREATE and a type specified. It would
* be nice if this checking were done in __db_open where most of the
@@ -259,6 +268,11 @@ __db_open(dbp, ip, txn, fname, dname, type, flags, mode, meta_pgno)
if (ret != 0)
goto err;
+ if (dbp->blob_file_id != 0)
+ if ((ret = __blob_make_sub_dir(env, &dbp->blob_sub_dir,
+ dbp->blob_file_id, dbp->blob_sdb_id)) != 0)
+ goto err;
+
#ifdef HAVE_PARTITION
if (dbp->p_internal != NULL && (ret =
__partition_open(dbp, ip, txn, fname, type, flags, mode, 1)) != 0)
@@ -432,8 +446,10 @@ err: return (ret);
/*
* __db_chk_meta --
- * Take a buffer containing a meta-data page and check it for a valid LSN,
- * checksum (and verify the checksum if necessary) and possibly decrypt it.
+ * Validate a buffer containing a possible meta-data page. It is
+ * byte-swapped as necessary and checked for having a valid magic number.
+ * If it does, then it can validate the LSN, checksum (if necessary),
+ * and possibly decrypt it.
*
* Return 0 on success, >0 (errno).
*
@@ -447,44 +463,64 @@ __db_chk_meta(env, dbp, meta, flags)
u_int32_t flags;
{
DB_LSN swap_lsn;
- int is_hmac, ret, swapped;
- u_int32_t magic, orig_chk;
+ int is_hmac, needs_swap, ret;
+ u_int32_t magic;
u_int8_t *chksum;
ret = 0;
- swapped = 0;
+ needs_swap = 0;
+ /*
+ * We can verify that this is some kind of db now, before any potential
+ * decryption, because the first P_OVERHEAD() bytes of most pages are
+ * cleartext. This gets called both before and after swapping, so we
+ * need to check for byte swapping ourselves.
+ */
+ magic = meta->magic;
+magic_retry:
+ switch (magic) {
+ case DB_BTREEMAGIC:
+ case DB_HASHMAGIC:
+ case DB_HEAPMAGIC:
+ case DB_QAMMAGIC:
+ case DB_RENAMEMAGIC:
+ break;
+ default:
+ if (needs_swap)
+ /* It's already been swapped, so it isn't a BDB file. */
+ return (EINVAL);
+ M_32_SWAP(magic);
+ needs_swap = 1;
+ goto magic_retry;
+ }
+
+ if (LOGGING_ON(env) && !LF_ISSET(DB_CHK_NOLSN)) {
+ swap_lsn = meta->lsn;
+ if (needs_swap) {
+ M_32_SWAP(swap_lsn.file);
+ M_32_SWAP(swap_lsn.offset);
+ }
+ if (!IS_REP_CLIENT(env) && !IS_NOT_LOGGED_LSN(swap_lsn) &&
+ !IS_ZERO_LSN(swap_lsn) && (ret =
+ __log_check_page_lsn(env, dbp, &swap_lsn)) != 0)
+ return (ret);
+ }
if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM)) {
if (dbp != NULL)
F_SET(dbp, DB_AM_CHKSUM);
-
- is_hmac = meta->encrypt_alg == 0 ? 0 : 1;
- chksum = ((BTMETA *)meta)->chksum;
-
- /*
- * If we need to swap, the checksum function overwrites the
- * original checksum with 0, so we need to save a copy of the
- * original for swapping later.
- */
- orig_chk = *(u_int32_t *)chksum;
-
/*
* We cannot add this to __db_metaswap because that gets done
* later after we've verified the checksum or decrypted.
*/
if (LF_ISSET(DB_CHK_META)) {
- swapped = 0;
-chk_retry: if ((ret =
+ is_hmac = meta->encrypt_alg != 0;
+ chksum = ((BTMETA *)meta)->chksum;
+ if (needs_swap && !is_hmac)
+ M_32_SWAP(*(u_int32_t *)chksum);
+ if ((ret =
__db_check_chksum(env, NULL, env->crypto_handle,
- chksum, meta, DBMETASIZE, is_hmac)) != 0) {
- if (is_hmac || swapped)
- return (DB_CHKSUM_FAIL);
-
- M_32_SWAP(orig_chk);
- swapped = 1;
- *(u_int32_t *)chksum = orig_chk;
- goto chk_retry;
- }
+ chksum, meta, DBMETASIZE, is_hmac)) != 0)
+ return (DB_CHKSUM_FAIL);
}
} else if (dbp != NULL)
F_CLR(dbp, DB_AM_CHKSUM);
@@ -492,44 +528,8 @@ chk_retry: if ((ret =
#ifdef HAVE_CRYPTO
if (__crypto_decrypt_meta(env,
dbp, (u_int8_t *)meta, LF_ISSET(DB_CHK_META)) != 0)
- ret = DB_CHKSUM_FAIL;
- else
+ ret = DB_CHKSUM_FAIL;
#endif
-
- /* Now that we're decrypted, we can check LSN. */
- if (LOGGING_ON(env) && !LF_ISSET(DB_CHK_NOLSN)) {
- /*
- * This gets called both before and after swapping, so we
- * need to check ourselves. If we already swapped it above,
- * we'll know that here.
- */
-
- swap_lsn = meta->lsn;
- magic = meta->magic;
-lsn_retry:
- if (swapped) {
- M_32_SWAP(swap_lsn.file);
- M_32_SWAP(swap_lsn.offset);
- M_32_SWAP(magic);
- }
- switch (magic) {
- case DB_BTREEMAGIC:
- case DB_HASHMAGIC:
- case DB_HEAPMAGIC:
- case DB_QAMMAGIC:
- case DB_RENAMEMAGIC:
- break;
- default:
- if (swapped)
- return (EINVAL);
- swapped = 1;
- goto lsn_retry;
- }
- if (!IS_REP_CLIENT(env) &&
- !IS_NOT_LOGGED_LSN(swap_lsn) && !IS_ZERO_LSN(swap_lsn))
- /* Need to do check. */
- ret = __log_check_page_lsn(env, dbp, &swap_lsn);
- }
return (ret);
}
@@ -598,7 +598,6 @@ swap_retry:
}
/*
- * We can only check the meta page if we are sure we have a meta page.
* If it is random data, then this check can fail. So only now can we
* checksum and decrypt. Don't distinguish between configuration and
* checksum match errors here, because we haven't opened the database
@@ -606,9 +605,9 @@ swap_retry:
* If DB_SKIP_CHK is set, it means the checksum was already checked
* and the page was already decrypted.
*/
- if (!LF_ISSET(DB_SKIP_CHK) &&
+ if (!LF_ISSET(DB_SKIP_CHK) &&
(ret = __db_chk_meta(env, dbp, meta, flags)) != 0) {
- if (ret == DB_CHKSUM_FAIL)
+ if (ret == DB_CHKSUM_FAIL)
__db_errx(env, DB_STR_A("0640",
"%s: metadata page checksum error", "%s"), name);
goto bad_format;
@@ -669,10 +668,9 @@ swap_retry:
}
if (FLD_ISSET(meta->metaflags,
- DBMETA_PART_RANGE | DBMETA_PART_CALLBACK))
- if ((ret =
- __partition_init(dbp, meta->metaflags)) != 0)
- return (ret);
+ DBMETA_PART_RANGE | DBMETA_PART_CALLBACK) &&
+ (ret = __partition_init(dbp, meta->metaflags)) != 0)
+ return (ret);
return (0);
bad_format:
diff --git a/src/db/db_overflow.c b/src/db/db_overflow.c
index d992ec0d..22f349ed 100644
--- a/src/db/db_overflow.c
+++ b/src/db/db_overflow.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -58,39 +58,26 @@
*/
/*
- * __db_goff --
- * Get an offpage item.
+ * __db_alloc_dbt
*
- * PUBLIC: int __db_goff __P((DBC *,
- * PUBLIC: DBT *, u_int32_t, db_pgno_t, void **, u_int32_t *));
+ * Allocate enough space in the dbt to hold the data. Also used by the
+ * blob file API.
+ *
+ * PUBLIC: int __db_alloc_dbt __P((ENV *, DBT *, u_int32_t, u_int32_t *,
+ * PUBLIC: u_int32_t *, void **, u_int32_t *));
*/
int
-__db_goff(dbc, dbt, tlen, pgno, bpp, bpsz)
- DBC *dbc;
+__db_alloc_dbt(env, dbt, tlen, nd, st, bpp, bpsz)
+ ENV *env;
DBT *dbt;
u_int32_t tlen;
- db_pgno_t pgno;
+ u_int32_t *nd;
+ u_int32_t *st;
void **bpp;
u_int32_t *bpsz;
{
- DB *dbp;
- DB_MPOOLFILE *mpf;
- DB_TXN *txn;
- DBC_INTERNAL *cp;
- ENV *env;
- PAGE *h;
- DB_THREAD_INFO *ip;
- db_indx_t bytes;
- u_int32_t curoff, needed, start;
- u_int8_t *p, *src;
int ret;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
- env = dbp->env;
- ip = dbc->thread_info;
- mpf = dbp->mpf;
- txn = dbc->txn;
+ u_int32_t needed, start;
/*
* Check if the buffer is big enough; if it is not and we are
@@ -110,6 +97,8 @@ __db_goff(dbc, dbt, tlen, pgno, bpp, bpsz)
start = 0;
needed = tlen;
}
+ *nd = needed;
+ *st = start;
/*
* If the caller has not requested any data, return success. This
@@ -123,7 +112,7 @@ __db_goff(dbc, dbt, tlen, pgno, bpp, bpsz)
}
if (F_ISSET(dbt, DB_DBT_USERCOPY))
- goto skip_alloc;
+ return (0);
/* Allocate any necessary memory. */
if (F_ISSET(dbt, DB_DBT_USERMEM)) {
@@ -152,7 +141,48 @@ __db_goff(dbc, dbt, tlen, pgno, bpp, bpsz)
return (DB_BUFFER_SMALL);
}
-skip_alloc:
+ return (0);
+}
+
+/*
+ * __db_goff --
+ * Get an offpage item.
+ *
+ * PUBLIC: int __db_goff __P((DBC *,
+ * PUBLIC: DBT *, u_int32_t, db_pgno_t, void **, u_int32_t *));
+ */
+int
+__db_goff(dbc, dbt, tlen, pgno, bpp, bpsz)
+ DBC *dbc;
+ DBT *dbt;
+ u_int32_t tlen;
+ db_pgno_t pgno;
+ void **bpp;
+ u_int32_t *bpsz;
+{
+ DB *dbp;
+ DB_MPOOLFILE *mpf;
+ DB_TXN *txn;
+ DBC_INTERNAL *cp;
+ ENV *env;
+ PAGE *h;
+ DB_THREAD_INFO *ip;
+ db_indx_t bytes;
+ u_int32_t curoff, needed, start;
+ u_int8_t *p, *src;
+ int ret;
+
+ dbp = dbc->dbp;
+ cp = dbc->internal;
+ env = dbp->env;
+ ip = dbc->thread_info;
+ mpf = dbp->mpf;
+ txn = dbc->txn;
+
+ if (((ret = __db_alloc_dbt(
+ env, dbt, tlen, &needed, &start, bpp, bpsz)) != 0) || needed == 0)
+ return (ret);
+
/* Set up a start page in the overflow chain if streaming. */
if (cp->stream_start_pgno != PGNO_INVALID &&
pgno == cp->stream_start_pgno && start >= cp->stream_off &&
@@ -485,28 +515,33 @@ __db_doff(dbc, pgno)
/*
* __db_moff --
- * Match on overflow pages.
+ * Match on overflow pages from a specific offset.
*
- * Given a starting page number and a key, return <0, 0, >0 to indicate if the
- * key on the page is less than, equal to or greater than the key specified.
- * We optimize this by doing chunk at a time comparison unless the user has
- * specified a comparison function. In this case, we need to materialize
- * the entire object and call their comparison routine.
+ * Given a starting page number and a key, store <0, 0, >0 in 'cmpp' to indicate
+ * if the key on the page is less than, equal to or greater than the key
+ * specified. We optimize this by doing a chunk at a time comparison unless the
+ * user has specified a comparison function. In this case, we need to
+ * materialize the entire object and call their comparison routine.
+ *
+ * We start the comparison at an offset and update the offset with the
+ * longest matching count after the comparison.
*
* __db_moff and __db_coff are generic functions useful in searching and
* ordering off page items. __db_moff matches an overflow DBT with an offpage
* item. __db_coff compares two offpage items for lexicographic sort order.
*
* PUBLIC: int __db_moff __P((DBC *, const DBT *, db_pgno_t, u_int32_t,
- * PUBLIC: int (*)(DB *, const DBT *, const DBT *), int *));
+ * PUBLIC: int (*)(DB *, const DBT *, const DBT *, size_t *),
+ * PUBLIC: int *, size_t *));
*/
int
-__db_moff(dbc, dbt, pgno, tlen, cmpfunc, cmpp)
+__db_moff(dbc, dbt, pgno, tlen, cmpfunc, cmpp, locp)
DBC *dbc;
const DBT *dbt;
db_pgno_t pgno;
u_int32_t tlen;
- int (*cmpfunc) __P((DB *, const DBT *, const DBT *)), *cmpp;
+ int (*cmpfunc) __P((DB *, const DBT *, const DBT *, size_t *)), *cmpp;
+ size_t *locp;
{
DB *dbp;
DBT local_dbt;
@@ -517,6 +552,7 @@ __db_moff(dbc, dbt, pgno, tlen, cmpfunc, cmpp)
u_int32_t bufsize, cmp_bytes, key_left;
u_int8_t *p1, *p2;
int ret;
+ size_t pos, start;
dbp = dbc->dbp;
ip = dbc->thread_info;
@@ -535,39 +571,76 @@ __db_moff(dbc, dbt, pgno, tlen, cmpfunc, cmpp)
&local_dbt, tlen, pgno, &buf, &bufsize)) != 0)
return (ret);
/* Pass the key as the first argument */
- *cmpp = cmpfunc(dbp, dbt, &local_dbt);
+ *cmpp = cmpfunc(dbp, dbt, &local_dbt, NULL);
__os_free(dbp->env, buf);
return (0);
}
+ /*
+ * We start the comparison from the location of 'locp' and store the
+ * last matching location into 'locp'.
+ */
+ start = (locp == NULL ? 0 : *locp);
+ pos = 0;
+
+ /* Subtract prefix length from lengths. */
+ tlen -= (u_int32_t)start;
+ key_left = dbt->size - (u_int32_t)start;
+ p1 = (u_int8_t *)dbt->data + start;
+
/* While there are both keys to compare. */
- for (*cmpp = 0, p1 = dbt->data,
- key_left = dbt->size; key_left > 0 && pgno != PGNO_INVALID;) {
+ for (*cmpp = 0; key_left > 0 &&
+ tlen > 0 && pgno != PGNO_INVALID;) {
if ((ret =
__memp_fget(mpf, &pgno, ip, dbc->txn, 0, &pagep)) != 0)
return (ret);
- cmp_bytes = OV_LEN(pagep) < key_left ? OV_LEN(pagep) : key_left;
- tlen -= cmp_bytes;
- key_left -= cmp_bytes;
- for (p2 = (u_int8_t *)pagep + P_OVERHEAD(dbp);
- cmp_bytes-- > 0; ++p1, ++p2)
- if (*p1 != *p2) {
- *cmpp = (long)*p1 - (long)*p2;
- break;
+ /*
+ * Figure out where to start comparison, and how many
+ * bytes to compare.
+ */
+ if (pos >= start) {
+ p2 = (u_int8_t *)pagep + P_OVERHEAD(dbp);
+ cmp_bytes = OV_LEN(pagep);
+ } else if (pos + OV_LEN(pagep) > start) {
+ p2 = (u_int8_t *)pagep +
+ P_OVERHEAD(dbp) + (start - pos);
+ cmp_bytes = OV_LEN(pagep) - (u_int32_t)(start - pos);
+ } else {
+ p2 = NULL;
+ cmp_bytes = 0;
+ }
+
+ pos += OV_LEN(pagep);
+
+ if (cmp_bytes != 0) {
+ if (cmp_bytes > key_left)
+ cmp_bytes = key_left;
+ tlen -= cmp_bytes;
+ key_left -= cmp_bytes;
+ for (;cmp_bytes-- > 0; ++p1, ++p2) {
+ if (*p1 != *p2) {
+ *cmpp = (long)*p1 - (long)*p2;
+ break;
+ }
+ if (locp != NULL)
+ ++(*locp);
}
+
+ }
pgno = NEXT_PGNO(pagep);
if ((ret = __memp_fput(mpf, ip, pagep, dbp->priority)) != 0)
return (ret);
if (*cmpp != 0)
return (0);
}
- if (key_left > 0) /* DBT is longer than the page key. */
- *cmpp = 1;
- else if (tlen > 0) /* DBT is shorter than the page key. */
- *cmpp = -1;
- else
- *cmpp = 0;
+
+ if (*cmpp == 0) {
+ if (key_left > 0) /* DBT is longer than the page key. */
+ *cmpp = 1;
+ else if (tlen > 0) /* DBT is shorter than the page key. */
+ *cmpp = -1;
+ }
return (0);
}
@@ -587,13 +660,13 @@ __db_moff(dbc, dbt, pgno, tlen, cmpfunc, cmpp)
* DBT type.
*
* PUBLIC: int __db_coff __P((DBC *, const DBT *, const DBT *,
- * PUBLIC: int (*)(DB *, const DBT *, const DBT *), int *));
+ * PUBLIC: int (*)(DB *, const DBT *, const DBT *, size_t *), int *));
*/
int
__db_coff(dbc, dbt, match, cmpfunc, cmpp)
DBC *dbc;
const DBT *dbt, *match;
- int (*cmpfunc) __P((DB *, const DBT *, const DBT *)), *cmpp;
+ int (*cmpfunc) __P((DB *, const DBT *, const DBT *, size_t *)), *cmpp;
{
DB *dbp;
DB_THREAD_INFO *ip;
@@ -643,7 +716,7 @@ __db_coff(dbc, dbt, match, cmpfunc, cmpp)
match_pgno, &match_buf, &match_bufsz)) != 0)
goto err1;
/* The key needs to be the first argument for sort order */
- *cmpp = cmpfunc(dbp, &local_key, &local_match);
+ *cmpp = cmpfunc(dbp, &local_key, &local_match, NULL);
err1: if (dbt_buf != NULL)
__os_free(dbp->env, dbt_buf);
@@ -657,6 +730,7 @@ err1: if (dbt_buf != NULL)
if ((ret =
__memp_fget(mpf, &dbt_pgno, ip, txn, 0, &dbt_pagep)) != 0)
return (ret);
+ DB_ASSERT(dbc->env, TYPE(dbt_pagep) == P_OVERFLOW);
if ((ret =
__memp_fget(mpf, &match_pgno,
ip, txn, 0, &match_pagep)) != 0) {
@@ -664,6 +738,7 @@ err1: if (dbt_buf != NULL)
mpf, ip, dbt_pagep, DB_PRIORITY_UNCHANGED);
return (ret);
}
+ DB_ASSERT(dbc->env, TYPE(match_pagep) == P_OVERFLOW);
cmp_bytes = page_space < max_data ? page_space : max_data;
for (p1 = (u_int8_t *)dbt_pagep + P_OVERHEAD(dbp),
p2 = (u_int8_t *)match_pagep + P_OVERHEAD(dbp);
diff --git a/src/db/db_ovfl_vrfy.c b/src/db/db_ovfl_vrfy.c
index fa630f7b..55eb2b70 100644
--- a/src/db/db_ovfl_vrfy.c
+++ b/src/db/db_ovfl_vrfy.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
diff --git a/src/db/db_pr.c b/src/db/db_pr.c
index d95440f9..4933498e 100644
--- a/src/db/db_pr.c
+++ b/src/db/db_pr.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -11,6 +11,7 @@
#include "db_int.h"
#include "dbinc/db_page.h"
#include "dbinc/btree.h"
+#include "dbinc/fop.h"
#include "dbinc/hash.h"
#include "dbinc/heap.h"
#include "dbinc/mp.h"
@@ -25,6 +26,11 @@ static int __db_hmeta __P((ENV *, DB *, HMETA *, u_int32_t));
static void __db_meta __P((ENV *, DB *, DBMETA *, FN const *, u_int32_t));
static void __db_proff __P((ENV *, DB_MSGBUF *, void *));
static int __db_qmeta __P((ENV *, DB *, QMETA *, u_int32_t));
+static int __db_prblob __P((DBC *, DBT *, DBT *, int, const char *,
+ void *, int (*callback) __P((void *, const void *)), int, int));
+static int __db_prblob_id __P((DB *, db_seq_t,
+ off_t, DBT *, int, const char *, void *,
+ int (*callback) __P((void *, const void *))));
#ifdef HAVE_STATISTICS
static void __db_prdb __P((DB *, u_int32_t));
static int __db_prtree __P((DB *, DB_TXN *,
@@ -515,6 +521,11 @@ __db_bmeta(env, dbp, h, flags)
__db_msg(env, "\tre_len: %#lx re_pad: %#lx",
(u_long)h->re_len, (u_long)h->re_pad);
__db_msg(env, "\troot: %lu", (u_long)h->root);
+ __db_msg(env, "\tblob_threshold: %lu", (u_long)h->blob_threshold);
+ __db_msg(env, "\tblob_file_lo: %lu", (u_long)h->blob_file_lo);
+ __db_msg(env, "\tblob_file_hi: %lu", (u_long)h->blob_file_hi);
+ __db_msg(env, "\tblob_sdb_lo: %lu", (u_long)h->blob_sdb_lo);
+ __db_msg(env, "\tblob_sdb_hi: %lu", (u_long)h->blob_sdb_hi);
return (0);
}
@@ -549,6 +560,11 @@ __db_hmeta(env, dbp, h, flags)
__db_msg(env, "\tffactor: %lu", (u_long)h->ffactor);
__db_msg(env, "\tnelem: %lu", (u_long)h->nelem);
__db_msg(env, "\th_charkey: %#lx", (u_long)h->h_charkey);
+ __db_msg(env, "\tblob_threshold: %lu", (u_long)h->blob_threshold);
+ __db_msg(env, "\tblob_file_lo: %lu", (u_long)h->blob_file_lo);
+ __db_msg(env, "\tblob_file_hi: %lu", (u_long)h->blob_file_hi);
+ __db_msg(env, "\tblob_sdb_lo: %lu", (u_long)h->blob_sdb_lo);
+ __db_msg(env, "\tblob_sdb_hi: %lu", (u_long)h->blob_sdb_hi);
__db_msgadd(env, &mb, "\tspare points:\n\t");
for (i = 0; i < NCACHED; i++) {
__db_msgadd(env, &mb, "%lu (%lu) ", (u_long)h->spares[i],
@@ -604,6 +620,9 @@ __db_heapmeta(env, dbp, h, flags)
__db_msg(env, "\tnregions: %lu", (u_long)h->nregions);
__db_msg(env, "\tgbytes: %lu", (u_long)h->gbytes);
__db_msg(env, "\tbytes: %lu", (u_long)h->bytes);
+ __db_msg(env, "\tblob_threshold: %lu", (u_long)h->blob_threshold);
+ __db_msg(env, "\tblob_file_lo: %lu", (u_long)h->blob_file_lo);
+ __db_msg(env, "\tblob_file_hi: %lu", (u_long)h->blob_file_hi);
return (0);
}
@@ -682,14 +701,19 @@ __db_prpage_int(env, mbp, dbp, lead, h, pagesize, data, flags)
{
BINTERNAL *bi;
BKEYDATA *bk;
+ BBLOB bl;
HOFFPAGE a_hkd;
+ HBLOB hblob;
QAMDATA *qp, *qep;
RINTERNAL *ri;
HEAPHDR *hh;
HEAPSPLITHDR *hs;
+ HEAPBLOBHDR bhdr;
db_indx_t dlen, len, i, *inp, max;
db_pgno_t pgno;
db_recno_t recno;
+ off_t blob_size;
+ db_seq_t blob_id;
u_int32_t qlen;
u_int8_t *ep, *hk, *p;
int deleted, ret;
@@ -899,6 +923,23 @@ __db_prpage_int(env, mbp, dbp, lead, h, pagesize, data, flags)
(u_long)a_hkd.tlen, (u_long)a_hkd.pgno);
DB_MSGBUF_FLUSH(env, mbp);
break;
+ case H_BLOB:
+ memcpy(&hblob, hk, HBLOB_SIZE);
+ blob_id = (db_seq_t)hblob.id;
+ __db_msgadd(env, mbp, "blob: id: %llu ",
+ (long long)blob_id);
+ GET_BLOB_SIZE(env, hblob, blob_size, ret);
+ if (ret != 0)
+ __db_msgadd(env, mbp,
+ "blob: blob_size overflow. ");
+ __db_msgadd(env, mbp, "blob: size: %llu",
+ (long long)blob_size);
+ /*
+ * No point printing the blob file, it is
+ * likely not readable by humans.
+ */
+ DB_MSGBUF_FLUSH(env, mbp);
+ break;
default:
DB_MSGBUF_FLUSH(env, mbp);
__db_msg(env, "ILLEGAL HASH PAGE TYPE: %lu",
@@ -925,6 +966,7 @@ __db_prpage_int(env, mbp, dbp, lead, h, pagesize, data, flags)
__db_proff(env, mbp, bi->data);
break;
default:
+ /* B_BLOB does not appear on internal pages. */
DB_MSGBUF_FLUSH(env, mbp);
__db_msg(env, "ILLEGAL BINTERNAL TYPE: %lu",
(u_long)B_TYPE(bi->type));
@@ -950,6 +992,19 @@ __db_prpage_int(env, mbp, dbp, lead, h, pagesize, data, flags)
case B_OVERFLOW:
__db_proff(env, mbp, bk);
break;
+ case B_BLOB:
+ memcpy(&bl, bk, BBLOB_SIZE);
+ blob_id = (db_seq_t)bl.id;
+ __db_msgadd(env, mbp, "blob: id: %llu ",
+ (long long)blob_id);
+ GET_BLOB_SIZE(env, bl, blob_size, ret);
+ if (ret != 0)
+ __db_msgadd(env, mbp,
+ "blob: blob_size overflow. ");
+ __db_msgadd(env, mbp, "blob: size: %llu",
+ (long long)blob_size);
+ DB_MSGBUF_FLUSH(env, mbp);
+ break;
default:
DB_MSGBUF_FLUSH(env, mbp);
__db_msg(env,
@@ -961,9 +1016,27 @@ __db_prpage_int(env, mbp, dbp, lead, h, pagesize, data, flags)
break;
case P_HEAP:
hh = sp;
- if (!F_ISSET(hh,HEAP_RECSPLIT))
+ if (!F_ISSET(hh,HEAP_RECSPLIT) &&
+ !F_ISSET(hh, HEAP_RECBLOB))
hdata = (u_int8_t *)hh + sizeof(HEAPHDR);
- else {
+ else if (F_ISSET(hh, HEAP_RECBLOB)) {
+ memcpy(&bhdr, hh, HEAPBLOBREC_SIZE);
+ blob_id = (db_seq_t)bhdr.id;
+ __db_msgadd(env, mbp, "blob: id: %llu ",
+ (long long)blob_id);
+ GET_BLOB_SIZE(env, bhdr, blob_size, ret);
+ if (ret != 0)
+ __db_msgadd(env, mbp,
+ "blob: blob_size overflow. ");
+ __db_msgadd(env, mbp, "blob: size: %llu",
+ (long long)blob_size);
+ /*
+ * No point printing the blob file, it is
+ * likely not readable by humans.
+ */
+ DB_MSGBUF_FLUSH(env, mbp);
+ break;
+ } else {
hs = sp;
__db_msgadd(env, mbp,
"split: 0x%02x tsize: %lu next: %lu.%lu ",
@@ -1276,10 +1349,16 @@ __db_dump(dbp, subname, callback, handle, pflag, keyflag)
ENV *env;
db_recno_t recno;
int is_recno, is_heap, ret, t_ret;
+ u_int32_t blob_threshold;
void *pointer;
env = dbp->env;
is_heap = 0;
+ memset(&dataret, 0, sizeof(DBT));
+ memset(&keyret, 0, sizeof(DBT));
+
+ if ((ret = __db_get_blob_threshold(dbp, &blob_threshold)) != 0)
+ return (ret);
if ((ret = __db_prheader(
dbp, subname, pflag, keyflag, handle, callback, NULL, 0)) != 0)
@@ -1317,8 +1396,8 @@ retry: while ((ret =
!is_heap ? DB_NEXT | DB_MULTIPLE_KEY : DB_NEXT )) == 0) {
if (is_heap) {
/* Never dump keys for HEAP */
- if ((ret = __db_prdbt(
- &data, pflag, " ", handle, callback, 0, 0)) != 0)
+ if ((ret = __db_prdbt(&data,
+ pflag, " ", handle, callback, 0, 0, 0)) != 0)
goto err;
continue;
}
@@ -1337,17 +1416,24 @@ retry: while ((ret =
if ((keyflag &&
(ret = __db_prdbt(&keyret, pflag, " ",
- handle, callback, is_recno, 0)) != 0) ||
+ handle, callback, is_recno, 0, 0)) != 0) ||
(ret = __db_prdbt(&dataret, pflag, " ",
- handle, callback, 0, 0)) != 0)
+ handle, callback, 0, 0, 0)) != 0)
goto err;
}
}
if (ret == DB_BUFFER_SMALL) {
- data.size = (u_int32_t)DB_ALIGN(data.size, 1024);
- if ((ret = __os_realloc(env, data.size, &data.data)) != 0)
- goto err;
- data.ulen = data.size;
+ if (blob_threshold != 0 && data.size >= blob_threshold) {
+ if ((ret = __db_prblob(dbcp, &key, &data, pflag,
+ " ", handle, callback, is_heap, keyflag)) != 0)
+ goto err;
+ } else {
+ data.size = (u_int32_t)DB_ALIGN(data.size, 1024);
+ if ((ret = __os_realloc(
+ env, data.size, &data.data)) != 0)
+ goto err;
+ data.ulen = data.size;
+ }
goto retry;
}
if (ret == DB_NOTFOUND)
@@ -1365,14 +1451,153 @@ err: if ((t_ret = __dbc_close(dbcp)) != 0 && ret == 0)
}
/*
+ * __db_prblob
+ * Print a blob file.
+ */
+static int
+__db_prblob(dbc, key, data, checkprint,
+ prefix, handle, callback, is_heap, keyflag)
+ DBC *dbc;
+ DBT *key;
+ DBT *data;
+ int checkprint;
+ const char *prefix;
+ void *handle;
+ int (*callback) __P((void *, const void *));
+ int is_heap;
+ int keyflag;
+{
+ DBC *local;
+ DBT partial;
+ int ret, t_ret;
+ off_t blob_size;
+ db_seq_t blob_id;
+
+ local = NULL;
+ memset(&partial, 0, sizeof(DBT));
+ partial.flags = DB_DBT_PARTIAL;
+
+ if ((ret = __dbc_idup(dbc, &local, DB_POSITION)) != 0)
+ goto err;
+
+ /* Move the cursor to the blob. */
+ if ((ret = __dbc_get(local, key, &partial, DB_NEXT)) != 0)
+ return (ret);
+
+ if ((ret = __dbc_get_blob_id(local, &blob_id)) != 0) {
+ /*
+ * It is possible this is not a blob. Non-blob items that are
+ * larger than the blob threshold can exist if the item was
+ * smaller than the threshold when created, then later updated
+ * to larger than the threshold value.
+ */
+ if (ret == EINVAL) {
+ ret = 0;
+ data->size = (u_int32_t)DB_ALIGN(data->size, 1024);
+ if ((ret = __os_realloc(
+ dbc->env, data->size, &data->data)) != 0)
+ goto err;
+ data->ulen = data->size;
+ }
+ goto err;
+ }
+
+ if (data->ulen < MEGABYTE) {
+ if ((data->data = realloc(
+ data->data, data->ulen = MEGABYTE)) == NULL) {
+ ret = ENOMEM;
+ goto err;
+ }
+ }
+
+ if ((ret = __dbc_get_blob_size(local, &blob_size)) != 0)
+ goto err;
+
+ if (keyflag && !is_heap && (ret = __db_prdbt(
+ key, checkprint, " ", handle, callback, 0, 0, 0)) != 0)
+ goto err;
+
+ if ((ret = __db_prblob_id(local->dbp, blob_id, blob_size,
+ data, checkprint, prefix, handle, callback)) != 0)
+ goto err;
+
+ /* Move the cursor. */
+ ret = __dbc_get(dbc, key, &partial, DB_NEXT);
+
+err: if (local != NULL) {
+ if ((t_ret = __dbc_close(local)) != 0 && ret == 0)
+ ret = t_ret;
+ }
+
+ return (ret);
+}
+
+/*
+ * __db_prblob_id --
+ * Print a blob file identified by the given id.
+ */
+static int
+__db_prblob_id(dbp, blob_id,
+ blob_size, data, checkprint, prefix, handle, callback)
+ DB *dbp;
+ db_seq_t blob_id;
+ off_t blob_size;
+ DBT *data;
+ int checkprint;
+ const char *prefix;
+ void *handle;
+ int (*callback) __P((void *, const void *));
+{
+ DB_FH *fhp;
+ const char *pre;
+ int ret, skip_newline, t_ret;
+ off_t left, offset;
+
+ fhp = NULL;
+ offset = 0;
+
+ if ((ret = __blob_file_open(
+ dbp, &fhp, blob_id, DB_FOP_READONLY, 1)) != 0)
+ goto err;
+
+ left = blob_size;
+ while (left > 0) {
+ if ((ret = __blob_file_read(
+ dbp->env, fhp, data, offset, data->ulen)) != 0)
+ goto err;
+ if (offset == 0)
+ pre = prefix;
+ else
+ pre = NULL;
+ skip_newline = data->size < left ? 1 : 0;
+ if ((ret = __db_prdbt(data, checkprint, pre,
+ handle, callback, 0, 0, skip_newline)) != 0)
+ goto err;
+ if (data->size > left)
+ left = 0;
+ else
+ left = left - data->size;
+ offset = offset + data->size;
+ }
+
+err: if (fhp != NULL) {
+ if ((t_ret = __os_closehandle(dbp->env, fhp)) != 0 && ret == 0)
+ ret = t_ret;
+ }
+
+ return (ret);
+}
+
+/*
* __db_prdbt --
* Print out a DBT data element.
*
* PUBLIC: int __db_prdbt __P((DBT *, int, const char *, void *,
- * PUBLIC: int (*)(void *, const void *), int, int));
+ * PUBLIC: int (*)(void *, const void *), int, int, int));
*/
int
-__db_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno, is_heap)
+__db_prdbt(dbtp, checkprint,
+ prefix, handle, callback, is_recno, is_heap, no_newline)
DBT *dbtp;
int checkprint;
const char *prefix;
@@ -1380,16 +1605,17 @@ __db_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno, is_heap)
int (*callback) __P((void *, const void *));
int is_recno;
int is_heap;
+ int no_newline;
{
- static const u_char hex[] = "0123456789abcdef";
db_recno_t recno;
DB_HEAP_RID rid;
- size_t len;
+ size_t count, len;
int ret;
+ u_int8_t *p;
#define DBTBUFLEN 100
- u_int8_t *p, *hp;
- char buf[DBTBUFLEN], hbuf[DBTBUFLEN];
+ char buf[DBTBUFLEN], hexbuf[2 * DBTBUFLEN + 1];
+ ret = 0;
/*
* !!!
* This routine is the routine that dumps out items in the format
@@ -1409,13 +1635,8 @@ __db_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno, is_heap)
/* If we're printing data as hex, print keys as hex too. */
if (!checkprint) {
- for (len = strlen(buf), p = (u_int8_t *)buf,
- hp = (u_int8_t *)hbuf; len-- > 0; ++p) {
- *hp++ = hex[(u_int8_t)(*p & 0xf0) >> 4];
- *hp++ = hex[*p & 0x0f];
- }
- *hp = '\0';
- ret = callback(handle, hbuf);
+ (void)__db_tohex(buf, strlen(buf), hexbuf);
+ ret = callback(handle, hexbuf);
} else
ret = callback(handle, buf);
@@ -1433,44 +1654,46 @@ __db_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno, is_heap)
/* If we're printing data as hex, print keys as hex too. */
if (!checkprint) {
- for (len = strlen(buf), p = (u_int8_t *)buf,
- hp = (u_int8_t *)hbuf; len-- > 0; ++p) {
- *hp++ = hex[(u_int8_t)(*p & 0xf0) >> 4];
- *hp++ = hex[*p & 0x0f];
- }
- *hp = '\0';
- ret = callback(handle, hbuf);
+ (void)__db_tohex(buf, strlen(buf), hexbuf);
+ ret = callback(handle, hexbuf);
} else
ret = callback(handle, buf);
if (ret != 0)
return (ret);
} else if (checkprint) {
+ /*
+ * Prepare buf for the 'isprint()' case: printable single char
+ * strings; prepare hexbuf for the other case '\<2 hex digits>'.
+ */
+ buf[1] = '\0';
+ hexbuf[0] = '\\';
for (len = dbtp->size, p = dbtp->data; len--; ++p)
if (isprint((int)*p)) {
if (*p == '\\' &&
(ret = callback(handle, "\\")) != 0)
return (ret);
- snprintf(buf, DBTBUFLEN, "%c", *p);
+ buf[0] = (char)*p;
if ((ret = callback(handle, buf)) != 0)
return (ret);
} else {
- snprintf(buf, DBTBUFLEN, "\\%c%c",
- hex[(u_int8_t)(*p & 0xf0) >> 4],
- hex[*p & 0x0f]);
- if ((ret = callback(handle, buf)) != 0)
+ (void)__db_tohex(p, 1, hexbuf + 1);
+ if ((ret = callback(handle, hexbuf)) != 0)
return (ret);
}
} else
- for (len = dbtp->size, p = dbtp->data; len--; ++p) {
- snprintf(buf, DBTBUFLEN, "%c%c",
- hex[(u_int8_t)(*p & 0xf0) >> 4],
- hex[*p & 0x0f]);
- if ((ret = callback(handle, buf)) != 0)
+ for (len = dbtp->size, p = dbtp->data, count = DBTBUFLEN;
+ len > 0; len -= count, p += count) {
+ if (count > len)
+ count = len;
+ (void)__db_tohex(p, count, hexbuf);
+ if ((ret = callback(handle, hexbuf)) != 0)
return (ret);
}
-
- return (callback(handle, "\n"));
+ if (no_newline == 0)
+ return (callback(handle, "\n"));
+ else
+ return (ret);
}
/*
@@ -1598,7 +1821,7 @@ __db_prheader(dbp, subname, pflag, keyflag, handle, callback, vdp, meta_pgno)
goto err;
DB_INIT_DBT(dbt, subname, strlen(subname));
if ((ret = __db_prdbt(&dbt, 1,
- NULL, handle, callback, 0, 0)) != 0)
+ NULL, handle, callback, 0, 0, 0)) != 0)
goto err;
}
switch (dbtype) {
@@ -1868,7 +2091,7 @@ __db_prheader(dbp, subname, pflag, keyflag, handle, callback, vdp, meta_pgno)
goto err;
for (i = 0; i < tmp_u_int32 - 1; i++)
if ((ret = __db_prdbt(&keys[i],
- pflag, " ", handle, callback, 0, 0)) != 0)
+ pflag, " ", handle, callback, 0, 0, 0)) != 0)
goto err;
}
}
@@ -1954,3 +2177,33 @@ __db_dbtype_to_string(type)
}
return ("UNKNOWN TYPE");
}
+
+/*
+ * __db_tohex --
+ * Generate a hex string representation of a byte array.
+ * The size of the destination must be at least 2*len + 1 bytes long,
+ * to allow for the '\0' terminator, which is always added.
+ *
+ * PUBLIC: char *__db_tohex __P((const void *, size_t, char *));
+ */
+char *
+__db_tohex(source, len, dest)
+ const void *source;
+ size_t len;
+ char *dest;
+{
+ static const char hex[] = "0123456789abcdef";
+ const u_int8_t *s;
+ char *d;
+
+ s = source;
+ d = dest;
+ while (len > 0) {
+ *d++ = hex[(*s & 0xf0) >> 4];
+ *d++ = hex[*s & 0x0f];
+ s++;
+ len--;
+ }
+ *d = '\0';
+ return ((char *)dest);
+}
diff --git a/src/db/db_rec.c b/src/db/db_rec.c
index 8ba1124e..98b29b22 100644
--- a/src/db/db_rec.c
+++ b/src/db/db_rec.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -1194,8 +1194,9 @@ __db_pg_init_recover(env, dbtp, lsnp, op, info)
DB_LSN copy_lsn;
DB_MPOOLFILE *mpf;
PAGE *pagep;
- int cmp_n, cmp_p, ret, type;
+ int cmp_n, cmp_p, ret, t_ret, type;
+ pagep = NULL;
ip = ((DB_TXNHEAD *)info)->thread_info;
REC_PRINT(__db_pg_init_print);
REC_INTRO(__db_pg_init_read, ip, 0);
@@ -1247,11 +1248,12 @@ __db_pg_init_recover(env, dbtp, lsnp, op, info)
memcpy((u_int8_t*)pagep + HOFFSET(pagep),
argp->data.data, argp->data.size);
}
- if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
- goto out;
done: *lsnp = argp->prev_lsn;
out:
+ if (pagep != NULL && (t_ret =
+ __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0 && ret == 0)
+ ret = t_ret;
REC_CLOSE;
}
diff --git a/src/db/db_reclaim.c b/src/db/db_reclaim.c
index b902769a..abae33d9 100644
--- a/src/db/db_reclaim.c
+++ b/src/db/db_reclaim.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -181,6 +181,7 @@ __db_truncate_callback(dbc, p, cookie, putp)
switch (*H_PAIRDATA(dbp, p, indx)) {
case H_OFFDUP:
break;
+ case H_BLOB:
case H_OFFPAGE:
case H_KEYDATA:
++*countp;
diff --git a/src/db/db_remove.c b/src/db/db_remove.c
index 591a29b2..d6118fae 100644
--- a/src/db/db_remove.c
+++ b/src/db/db_remove.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -18,7 +18,7 @@
#include "dbinc/txn.h"
static int __db_dbtxn_remove __P((DB *,
- DB_THREAD_INFO *, DB_TXN *, const char *, const char *));
+ DB_THREAD_INFO *, DB_TXN *, const char *, const char *, APPNAME));
static int __db_subdb_remove __P((DB *,
DB_THREAD_INFO *, DB_TXN *, const char *, const char *, u_int32_t));
@@ -264,7 +264,7 @@ __db_remove_int(dbp, ip, txn, name, subdb, flags)
/* Handle transactional file removes separately. */
if (IS_REAL_TXN(txn)) {
- ret = __db_dbtxn_remove(dbp, ip, txn, name, subdb);
+ ret = __db_dbtxn_remove(dbp, ip, txn, name, subdb, DB_APP_DATA);
goto err;
}
@@ -293,6 +293,10 @@ __db_remove_int(dbp, ip, txn, name, subdb, flags)
(ret = dbp->db_am_remove(dbp, ip, NULL, name, subdb, flags)) != 0)
goto err;
+ if (dbp->db_am_remove == NULL &&
+ (ret = __blob_del_all(dbp, txn, 0)) != 0)
+ goto err;
+
ret = F_ISSET(dbp, DB_AM_INMEM) ?
__db_inmem_remove(dbp, NULL, real_name) :
__fop_remove(env,
@@ -407,6 +411,10 @@ __db_subdb_remove(dbp, ip, txn, name, subdb, flags)
txn, name, subdb, DB_UNKNOWN, DB_WRITEOPEN, 0, PGNO_BASE_MD)) != 0)
goto err;
+ if (sdbp->blob_threshold != 0)
+ if ((ret = __blob_del_all(sdbp, txn, 0)) != 0)
+ goto err;
+
DB_TEST_RECOVERY(sdbp, DB_TEST_PREDESTROY, ret, name);
/* Have the handle locked so we will not lock pages. */
@@ -460,18 +468,21 @@ err:
}
static int
-__db_dbtxn_remove(dbp, ip, txn, name, subdb)
+__db_dbtxn_remove(dbp, ip, txn, name, subdb, appname)
DB *dbp;
DB_THREAD_INFO *ip;
DB_TXN *txn;
const char *name, *subdb;
+ APPNAME appname;
{
ENV *env;
int ret;
char *tmpname;
+ u_int32_t flags;
env = dbp->env;
tmpname = NULL;
+ flags = DB_NOSYNC;
/*
* This is a transactional remove, so we have to keep the name
@@ -488,7 +499,12 @@ __db_dbtxn_remove(dbp, ip, txn, name, subdb)
DB_TEST_RECOVERY(dbp, DB_TEST_PREDESTROY, ret, name);
if ((ret = __db_rename_int(dbp,
- txn->thread_info, txn, name, subdb, tmpname, DB_NOSYNC)) != 0)
+ txn->thread_info, txn, name, subdb, tmpname, flags)) != 0)
+ goto err;
+
+ /* Delete all blob files, if this database supports blobs. */
+ if (appname != DB_APP_BLOB && (dbp->blob_file_id != 0 ||
+ dbp->blob_sdb_id != 0) && (ret = __blob_del_all(dbp, txn, 0)) != 0)
goto err;
/*
@@ -501,7 +517,7 @@ __db_dbtxn_remove(dbp, ip, txn, name, subdb)
ret = F_ISSET(dbp, DB_AM_INMEM) ?
__db_inmem_remove(dbp, txn, tmpname) :
__fop_remove(env,
- txn, dbp->fileid, tmpname, &dbp->dirname, DB_APP_DATA,
+ txn, dbp->fileid, tmpname, &dbp->dirname, appname,
F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0);
DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, name);
diff --git a/src/db/db_rename.c b/src/db/db_rename.c
index 2812b948..5b2bed42 100644
--- a/src/db/db_rename.c
+++ b/src/db/db_rename.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -285,10 +285,11 @@ __db_rename_int(dbp, ip, txn, name, subdb, newname, flags)
* taken care of in the fop layer.
*/
if (IS_REAL_TXN(txn)) {
- if ((ret = __fop_dummy(dbp, txn, old, newname)) != 0)
+ if ((ret =
+ __fop_dummy(dbp, txn, old, newname, DB_APP_DATA)) != 0)
goto err;
} else {
- if ((ret = __fop_dbrename(dbp, old, newname)) != 0)
+ if ((ret = __fop_dbrename(dbp, old, newname, DB_APP_DATA)) != 0)
goto err;
}
diff --git a/src/db/db_ret.c b/src/db/db_ret.c
index 709605f6..ddd0ef51 100644
--- a/src/db/db_ret.c
+++ b/src/db/db_ret.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -29,18 +29,27 @@ __db_ret(dbc, h, indx, dbt, memp, memsize)
void **memp;
u_int32_t *memsize;
{
+ BBLOB bl;
BKEYDATA *bk;
BOVERFLOW *bo;
DB *dbp;
+ ENV *env;
+ HBLOB hblob;
+ HEAPBLOBHDR bhdr;
HEAPHDR *hdr;
+ db_seq_t blob_id;
+ int ret;
HOFFPAGE ho;
+ off_t blob_size;
u_int32_t len;
u_int8_t *hk;
void *data;
if (F_ISSET(dbt, DB_DBT_READONLY))
return (0);
+ ret = 0;
dbp = dbc->dbp;
+ env = dbp->env;
switch (TYPE(h)) {
case P_HASH_UNSORTED:
@@ -50,6 +59,20 @@ __db_ret(dbc, h, indx, dbt, memp, memsize)
memcpy(&ho, hk, sizeof(HOFFPAGE));
return (__db_goff(dbc, dbt,
ho.tlen, ho.pgno, memp, memsize));
+ } else if (HPAGE_PTYPE(hk) == H_BLOB) {
+ /* Get the record instead of the blob item. */
+ if (F_ISSET(dbt, DB_DBT_BLOB_REC)) {
+ data = P_ENTRY(dbp, h, indx);
+ len = HBLOB_SIZE;
+ break;
+ }
+ memcpy(&hblob, hk, HBLOB_SIZE);
+ blob_id = (db_seq_t)hblob.id;
+ GET_BLOB_SIZE(env, hblob, blob_size, ret);
+ if (ret != 0)
+ return (ret);
+ return (__blob_get(
+ dbc, dbt, blob_id, blob_size, memp, memsize));
}
len = LEN_HKEYDATA(dbp, h, dbp->pgsize, indx);
data = HKEYDATA_DATA(hk);
@@ -58,6 +81,21 @@ __db_ret(dbc, h, indx, dbt, memp, memsize)
hdr = (HEAPHDR *)P_ENTRY(dbp, h, indx);
if (F_ISSET(hdr,(HEAP_RECSPLIT | HEAP_RECFIRST)))
return (__heapc_gsplit(dbc, dbt, memp, memsize));
+ else if (F_ISSET(hdr, HEAP_RECBLOB)) {
+ /* Get the record instead of the blob item. */
+ if (F_ISSET(dbt, DB_DBT_BLOB_REC)) {
+ data = P_ENTRY(dbp, h, indx);
+ len = HEAPBLOBREC_SIZE;
+ break;
+ }
+ memcpy(&bhdr, hdr, HEAPBLOBREC_SIZE);
+ blob_id = (db_seq_t)bhdr.id;
+ GET_BLOB_SIZE(env, bhdr, blob_size, ret);
+ if (ret != 0)
+ return (ret);
+ return (__blob_get(
+ dbc, dbt, blob_id, blob_size, memp, memsize));
+ }
len = hdr->size;
data = (u_int8_t *)hdr + sizeof(HEAPHDR);
break;
@@ -69,6 +107,20 @@ __db_ret(dbc, h, indx, dbt, memp, memsize)
bo = (BOVERFLOW *)bk;
return (__db_goff(dbc, dbt,
bo->tlen, bo->pgno, memp, memsize));
+ } else if (B_TYPE(bk->type) == B_BLOB) {
+ /* Get the record instead of the blob item. */
+ if (F_ISSET(dbt, DB_DBT_BLOB_REC)) {
+ data = P_ENTRY(dbp, h, indx);
+ len = BBLOB_SIZE;
+ break;
+ }
+ memcpy(&bl, bk, BBLOB_SIZE);
+ blob_id = (db_seq_t)bl.id;
+ GET_BLOB_SIZE(env, bl, blob_size, ret);
+ if (ret != 0)
+ return (ret);
+ return (__blob_get(
+ dbc, dbt, blob_id, blob_size, memp, memsize));
}
len = bk->len;
data = bk->data;
@@ -167,3 +219,71 @@ __db_retcopy(env, dbt, data, len, memp, memsize)
return (ret);
}
+
+/*
+ * __db_dbt_clone --
+ * Clone a DBT from another DBT.
+ * The input dest DBT must be a zero initialized DBT that will be populated.
+ * The function does not allocate a dest DBT to allow for cloning into stack
+ * or locally allocated variables. It is the callers responsibility to free
+ * the memory allocated in dest->data.
+ *
+ * PUBLIC: int __db_dbt_clone __P((ENV *, DBT *, const DBT *));
+ */
+int
+__db_dbt_clone(env, dest, src)
+ ENV *env;
+ DBT *dest;
+ const DBT *src;
+{
+ u_int32_t err_flags;
+ int ret;
+
+ DB_ASSERT(env, dest->data == NULL);
+
+ ret = 0;
+
+ /* The function does not support the following DBT flags. */
+ err_flags = DB_DBT_MALLOC | DB_DBT_REALLOC |
+ DB_DBT_MULTIPLE | DB_DBT_PARTIAL;
+ if (F_ISSET(src, err_flags)) {
+ __db_errx(env, DB_STR("0758",
+ "Unsupported flags when cloning the DBT."));
+ return (EINVAL);
+ }
+
+ if ((ret = __os_malloc(env, src->size, &dest->data)) != 0)
+ return (ret);
+
+ memcpy(dest->data, src->data, src->size);
+ dest->ulen = src->size;
+ dest->size = src->size;
+ dest->flags = DB_DBT_USERMEM;
+
+ return (ret);
+}
+
+/*
+ * __db_dbt_clone_free --
+ * Free a DBT cloned by __db_dbt_clone
+ *
+ * PUBLIC: int __db_dbt_clone_free __P((ENV *, DBT *));
+ */
+int
+__db_dbt_clone_free(env, dbt)
+ ENV *env;
+ DBT *dbt;
+{
+ /* Currently only DB_DBT_USERMEM is supported. */
+ if (dbt->flags != DB_DBT_USERMEM) {
+ __db_errx(env, DB_STR("0759",
+ "Unsupported flags when freeing the cloned DBT."));
+ return (EINVAL);
+ }
+
+ if (dbt->data != NULL)
+ __os_free(env, dbt->data);
+ dbt->size = dbt->ulen = 0;
+
+ return (0);
+}
diff --git a/src/db/db_setid.c b/src/db/db_setid.c
index 697c3ff7..5c61a139 100644
--- a/src/db/db_setid.c
+++ b/src/db/db_setid.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2000, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
diff --git a/src/db/db_setlsn.c b/src/db/db_setlsn.c
index 1a3280ed..acee80f6 100644
--- a/src/db/db_setlsn.c
+++ b/src/db/db_setlsn.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2000, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
diff --git a/src/db/db_sort_multiple.c b/src/db/db_sort_multiple.c
index c5e2e941..7facb80e 100644
--- a/src/db/db_sort_multiple.c
+++ b/src/db/db_sort_multiple.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*/
#include "db_config.h"
@@ -34,7 +34,7 @@ __db_compare_both(db, akey, adata, bkey, bdata)
t = (BTREE *)db->bt_internal;
- cmp = t->bt_compare(db, akey, bkey);
+ cmp = t->bt_compare(db, akey, bkey, NULL);
if (cmp != 0) return cmp;
if (!F_ISSET(db, DB_AM_DUPSORT))
return (0);
@@ -44,9 +44,9 @@ __db_compare_both(db, akey, adata, bkey, bdata)
#ifdef HAVE_COMPRESSION
if (DB_IS_COMPRESSED(db))
- return t->compress_dup_compare(db, adata, bdata);
+ return t->compress_dup_compare(db, adata, bdata, NULL);
#endif
- return db->dup_compare(db, adata, bdata);
+ return db->dup_compare(db, adata, bdata, NULL);
}
#define DB_SORT_SWAP(a, ad, b, bd) \
diff --git a/src/db/db_stati.c b/src/db/db_stati.c
index 61744e81..b7367f37 100644
--- a/src/db/db_stati.c
+++ b/src/db/db_stati.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
diff --git a/src/db/db_truncate.c b/src/db/db_truncate.c
index 0eeb0c64..d57a23b2 100644
--- a/src/db/db_truncate.c
+++ b/src/db/db_truncate.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -191,6 +191,10 @@ __db_truncate(dbp, ip, txn, countp)
if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
ret = t_ret;
+ /* Delete all blob files. */
+ if (ret == 0)
+ ret = __blob_del_all(dbp, txn, 1);
+
DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, NULL);
DB_TEST_RECOVERY_LABEL
diff --git a/src/db/db_upg.c b/src/db/db_upg.c
index de5d0dc7..7dcc3b1c 100644
--- a/src/db/db_upg.c
+++ b/src/db/db_upg.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -13,6 +13,7 @@
#include "dbinc/db_swap.h"
#include "dbinc/btree.h"
#include "dbinc/hash.h"
+#include "dbinc/heap.h"
#include "dbinc/qam.h"
/*
@@ -98,6 +99,27 @@ static int (* const func_46_list[P_PAGETYPE_MAX])
NULL, /* P_IHEAP */
};
+static int (* const func_60_list[P_PAGETYPE_MAX])
+ __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)) = {
+ NULL, /* P_INVALID */
+ NULL, /* __P_DUPLICATE */
+ NULL, /* P_HASH_UNSORTED */
+ NULL, /* P_IBTREE */
+ NULL, /* P_IRECNO */
+ __bam_60_lbtree, /* P_LBTREE */
+ NULL, /* P_LRECNO */
+ NULL, /* P_OVERFLOW */
+ __ham_60_hashmeta, /* P_HASHMETA */
+ __bam_60_btreemeta, /* P_BTREEMETA */
+ NULL, /* P_QAMMETA */
+ NULL, /* P_QAMDATA */
+ NULL, /* P_LDUP */
+ __ham_60_hash, /* P_HASH */
+ __heap_60_heapmeta, /* P_HEAPMETA */
+ __heap_60_heap, /* P_HEAP */
+ NULL, /* P_IHEAP */
+};
+
static int __db_page_pass __P((DB *, char *, u_int32_t, int (* const [])
(DB *, char *, u_int32_t, DB_FH *, PAGE *, int *), DB_FH *));
static int __db_set_lastpgno __P((DB *, char *, DB_FH *));
@@ -181,6 +203,34 @@ __db_upgrade(dbp, fname, flags)
goto err;
/* FALLTHROUGH */
case 9:
+ /*
+ * Various blob ids and size use two u_int32_t values
+ * to represent 64 bit integers in early 6.0. Change
+ * those values to 64 bit integers.
+ */
+ /*
+ * Read the encrypt_alg and chksum fields from the
+ * metadata page.
+ */
+ meta = (DBMETA *)mbuf;
+ if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM))
+ F_SET(dbp, DB_AM_CHKSUM);
+ if (meta->encrypt_alg != 0) {
+ if (!CRYPTO_ON(dbp->env)) {
+ __db_errx(env, DB_STR("0777",
+"Attempt to upgrade an encrypted database without providing a password."));
+ ret = EINVAL;
+ goto err;
+ }
+ F_SET(dbp, DB_AM_ENCRYPT);
+ }
+ memcpy(&dbp->pgsize,
+ &meta->pagesize, sizeof(u_int32_t));
+ if ((ret = __db_page_pass(dbp,
+ real_name, flags, func_60_list, fhp)) != 0)
+ goto err;
+ /* FALLTHROUGH */
+ case 10:
break;
default:
__db_errx(env, DB_STR_A("0666",
@@ -307,6 +357,34 @@ __db_upgrade(dbp, fname, flags)
/* FALLTHROUGH */
case 9:
+ /*
+ * Various blob ids and size use two u_int32_t values
+ * to represent 64 bit integers in early 6.0. Change
+ * those values to 64 bit integers.
+ */
+ meta = (DBMETA*)mbuf;
+ memcpy(&dbp->pgsize,
+ &meta->pagesize, sizeof(u_int32_t));
+ /*
+ * Read the encrypt_alg and chksum fields from the
+ * metadata page.
+ */
+ if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM))
+ F_SET(dbp, DB_AM_CHKSUM);
+ if (meta->encrypt_alg != 0) {
+ if (!CRYPTO_ON(dbp->env)) {
+ __db_errx(env, DB_STR("0778",
+"Attempt to upgrade an encrypted database without providing a password."));
+ ret = EINVAL;
+ goto err;
+ }
+ F_SET(dbp, DB_AM_ENCRYPT);
+ }
+ if ((ret = __db_page_pass(dbp,
+ real_name, flags, func_60_list, fhp)) != 0)
+ goto err;
+ /* FALLTHROUGH */
+ case 10:
break;
default:
__db_errx(env, DB_STR_A("0668",
@@ -317,9 +395,45 @@ __db_upgrade(dbp, fname, flags)
}
break;
case DB_HEAPMAGIC:
- /*
- * There's no upgrade needed for Heap yet.
- */
+ switch (((DBMETA *)mbuf)->version) {
+ case 1:
+ /*
+ * Various blob ids and size use two u_int32_t values
+ * to represent 64 bit integers in early 6.0. Change
+ * those values to 64 bit integers.
+ */
+ meta = (DBMETA*)mbuf;
+ memcpy(&dbp->pgsize,
+ &meta->pagesize, sizeof(u_int32_t));
+ /*
+ * Read the encrypt_alg and chksum fields from the
+ * metadata page.
+ */
+ if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM))
+ F_SET(dbp, DB_AM_CHKSUM);
+ if (meta->encrypt_alg != 0) {
+ if (!CRYPTO_ON(dbp->env)) {
+ __db_errx(env, DB_STR("0779",
+"Attempt to upgrade an encrypted database without providing a password."));
+ ret = EINVAL;
+ goto err;
+ }
+ F_SET(dbp, DB_AM_ENCRYPT);
+ }
+ if ((ret = __db_page_pass(dbp,
+ real_name, flags, func_60_list, fhp)) != 0)
+ goto err;
+ /* FALLTHROUGH */
+ case 2:
+ break;
+ default:
+ __db_errx(env, DB_STR_A("0776",
+ "%s: unsupported heap version: %lu",
+ "%s %lu"), real_name,
+ (u_long)((DBMETA *)mbuf)->version);
+ ret = DB_OLD_VERSION;
+ goto err;
+ }
break;
case DB_QAMMAGIC:
switch (((DBMETA *)mbuf)->version) {
diff --git a/src/db/db_upg_opd.c b/src/db/db_upg_opd.c
index 992115ad..6f6dfb71 100644
--- a/src/db/db_upg_opd.c
+++ b/src/db/db_upg_opd.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -37,6 +37,9 @@ static int __db_up_ovref __P((DB *, DB_FH *, db_pgno_t));
* __db_31_offdup --
* Convert 3.0 off-page duplicates to 3.1 off-page duplicates.
*
+ * This code and its descendants should be removed when support for
+ * upgrading from a 3.0 database format is removed.
+ *
* PUBLIC: int __db_31_offdup __P((DB *, char *, DB_FH *, int, db_pgno_t *));
*/
int
@@ -317,7 +320,7 @@ __db_build_ri(dbp, fhp, ipage, page, indx, nomemp)
/*
* __db_up_ovref --
- * Increment/decrement the reference count on an overflow page.
+ * Increment the reference count on an overflow page.
*/
static int
__db_up_ovref(dbp, fhp, pgno)
diff --git a/src/db/db_vrfy.c b/src/db/db_vrfy.c
index 9cb94ad2..a8c80cae 100644
--- a/src/db/db_vrfy.c
+++ b/src/db/db_vrfy.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2000, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -553,7 +553,7 @@ __db_vrfy_pagezero(dbp, vdp, fhp, name, flags)
if ((ret = __db_vrfy_getpageinfo(vdp, PGNO_BASE_MD, &pip)) != 0)
return (ret);
- if ((ret = __db_chk_meta(env, dbp, meta, 1)) != 0) {
+ if ((ret = __db_chk_meta(env, dbp, meta, DB_CHK_META)) != 0) {
EPRINT((env, DB_STR_A("0522",
"Page %lu: metadata page corrupted", "%lu"),
(u_long)PGNO_BASE_MD));
@@ -920,7 +920,7 @@ err1: if (ret == 0)
* If we've seen a Queue metadata page, we may need to walk Queue
* extent pages that won't show up between 0 and vdp->last_pgno.
*/
- if (F_ISSET(vdp, VRFY_QMETA_SET) && (t_ret =
+ if (F_ISSET(vdp, SALVAGE_QMETA_SET) && (t_ret =
__qam_vrfy_walkqueue(dbp, vdp, handle, callback, flags)) != 0) {
if (ret == 0)
ret = t_ret;
@@ -1563,6 +1563,10 @@ __db_vrfy_meta(dbp, vdp, meta, pgno, flags)
* If we don't have FTRUNCATE then mpool could include some
* zeroed pages at the end of the file, we assume the meta page
* is correct. Queue does not update the meta page's last_pgno.
+ *
+ * We have seen one false positive after a failure while rolling the log
+ * forward, last_pgno was updated and the file had not yet been
+ * extended. [#18418]
*/
if (pgno == PGNO_BASE_MD &&
dbtype != DB_QUEUE && meta->last_pgno != vdp->last_pgno) {
@@ -2401,6 +2405,15 @@ __db_vrfy_inpitem(dbp, h, pgno, i, is_btree, flags, himarkp, offsetp)
* length, so it's not possible to certify it as safe.
*/
switch (B_TYPE(bk->type)) {
+ case B_BLOB:
+ len = bk->len;
+ if (len != BBLOB_DSIZE) {
+ EPRINT((env, DB_STR_A("0771",
+ "Page %lu: item %lu illegal size.",
+ "%lu %lu"), (u_long)pgno, (u_long)i));
+ return (DB_VERIFY_BAD);
+ }
+ break;
case B_KEYDATA:
len = bk->len;
break;
diff --git a/src/db/db_vrfy_stub.c b/src/db/db_vrfy_stub.c
index 5037f33e..a9eed84c 100644
--- a/src/db/db_vrfy_stub.c
+++ b/src/db/db_vrfy_stub.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
diff --git a/src/db/db_vrfyutil.c b/src/db/db_vrfyutil.c
index d72e1188..3a64bd50 100644
--- a/src/db/db_vrfyutil.c
+++ b/src/db/db_vrfyutil.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2000, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -43,6 +43,9 @@ __db_vrfy_dbinfo_create(env, ip, pgsize, vdpp)
if ((ret = __db_create_internal(&cdbp, env, 0)) != 0)
goto err;
+ if ((ret = __db_set_blob_threshold(cdbp, 0, 0)) != 0)
+ goto err;
+
if ((ret = __db_set_flags(cdbp, DB_DUP)) != 0)
goto err;
@@ -60,6 +63,9 @@ __db_vrfy_dbinfo_create(env, ip, pgsize, vdpp)
if ((ret = __db_create_internal(&pgdbp, env, 0)) != 0)
goto err;
+ if ((ret = __db_set_blob_threshold(pgdbp, 0, 0)) != 0)
+ goto err;
+
if ((ret = __db_set_pagesize(pgdbp, pgsize)) != 0)
goto err;
@@ -928,5 +934,6 @@ __db_vrfy_prdbt(dbtp, checkprint, prefix,
}
return (
__db_prdbt(dbtp, checkprint,
- prefix, handle, callback, is_recno, is_heap));
+ prefix, handle, callback, is_recno, is_heap,
+ vdp != NULL && F_ISSET(vdp, SALVAGE_STREAM_BLOB) ? 1 : 0));
}
diff --git a/src/db/partition.c b/src/db/partition.c
index f8beaf16..86491ba3 100644
--- a/src/db/partition.c
+++ b/src/db/partition.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -32,13 +32,12 @@ static int __partc_writelock __P((DBC*));
static int __partition_chk_meta __P((DB *,
DB_THREAD_INFO *, DB_TXN *, u_int32_t));
static int __partition_setup_keys __P((DBC *,
- DB_PARTITION *, DBMETA *, u_int32_t));
+ DB_PARTITION *, u_int32_t, u_int32_t));
static int __part_key_cmp __P((const void *, const void *));
static inline void __part_search __P((DB *,
DB_PARTITION *, DBT *, u_int32_t *));
-static char *Alloc_err = DB_STR_A("0644",
- "Partition open failed to allocate %d bytes", "%d");
+#define ALLOC_ERR DB_STR_A("0764","Partition failed to allocate %d bytes","%d")
/*
* Allocate a partition cursor and copy flags to the partition cursor.
@@ -70,20 +69,27 @@ static inline void __part_search(dbp, part, key, part_idp)
{
db_indx_t base, indx, limit;
int cmp;
- int (*func) __P((DB *, const DBT *, const DBT *));
+ int (*func) __P((DB *, const DBT *, const DBT *, size_t *));
+ size_t pos, pos_h, pos_l;
DB_ASSERT(dbp->env, part->nparts != 0);
COMPQUIET(cmp, 0);
COMPQUIET(indx, 0);
+ pos_h = 0;
+ pos_l = 0;
func = ((BTREE *)dbp->bt_internal)->bt_compare;
DB_BINARY_SEARCH_FOR(base, limit, part->nparts, O_INDX) {
+ pos = pos_l > pos_h ? pos_h : pos_l;
DB_BINARY_SEARCH_INCR(indx, base, limit, O_INDX);
- cmp = func(dbp, key, &part->keys[indx]);
+ cmp = func(dbp, key, &part->keys[indx], &pos);
if (cmp == 0)
break;
- if (cmp > 0)
+ if (cmp > 0) {
DB_BINARY_SEARCH_SHIFT_BASE(indx, base, limit, O_INDX);
+ pos_l = pos;
+ } else
+ pos_h = pos;
}
if (cmp == 0)
*part_idp = indx;
@@ -146,7 +152,8 @@ __partition_set(dbp, parts, keys, callback)
{
DB_PARTITION *part;
ENV *env;
- int ret;
+ u_int32_t i;
+ int ret, t_ret;
DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_partition");
env = dbp->dbenv->env;
@@ -155,6 +162,11 @@ __partition_set(dbp, parts, keys, callback)
__db_errx(env, DB_STR("0646",
"Must specify at least 2 partitions."));
return (EINVAL);
+ } else if (parts > PART_MAXIMUM) {
+ __db_errx(env, DB_STR_A("0772",
+ "Must not specify more than %u partitions.", "%u"),
+ (unsigned int)PART_MAXIMUM);
+ return (EINVAL);
}
if (keys == NULL && callback == NULL) {
@@ -178,11 +190,59 @@ bad: __db_errx(env, DB_STR("0648",
(part->callback != NULL && keys != NULL))
goto bad;
+ /*
+ * Free a key array that was allocated by an earlier set_partition call.
+ */
+ if (part->keys != NULL) {
+ for (i = 0; i < part->nparts - 1; i++) {
+ /*
+ * Always free all entries in the key array and return
+ * the first error code.
+ */
+ if ((t_ret = __db_dbt_clone_free(dbp->env,
+ &part->keys[i])) != 0 && ret == 0)
+ ret = t_ret;
+ }
+ __os_free(dbp->env, part->keys);
+ part->keys = NULL;
+ }
+
+ if (ret != 0)
+ return (ret);
+
part->nparts = parts;
- part->keys = keys;
part->callback = callback;
- return (0);
+ /*
+ * Take a copy of the users key array otherwise we cannot be sure
+ * that the memory will still be valid when the database is opened.
+ */
+ if (keys != NULL) {
+ if ((ret = __os_calloc(dbp->env,
+ part->nparts - 1, sizeof(DBT), &part->keys)) != 0)
+ goto err;
+
+ for (i = 0, parts = 0; i < part->nparts - 1; i++, parts++)
+ if ((ret = __db_dbt_clone(dbp->env,
+ &part->keys[i], &keys[i])) != 0)
+ goto err;
+ }
+
+err: if (ret != 0 && part->keys != NULL) {
+ /*
+ * Always free those entries cloned successfully in the key
+ * array and the one which fails in __db_dbt_clone, and
+ * return the first error code. As ret != 0 here, so it is
+ * safe to ignore any error from __db_dbt_clone_free.
+ */
+ for (i = 0; i < parts; i++)
+ (void)__db_dbt_clone_free(dbp->env, &part->keys[i]);
+ if (parts < part->nparts - 1 && part->keys[parts].data != NULL)
+ __os_free(dbp->env, part->keys[parts].data);
+ __os_free(dbp->env, part->keys);
+ part->keys = NULL;
+ }
+ return (ret);
}
/*
@@ -288,15 +348,16 @@ __partition_open(dbp, ip, txn, fname, type, flags, mode, do_open)
if ((ret = __os_calloc(env,
part->nparts, sizeof(*part->handles), &part->handles)) != 0) {
- __db_errx(env,
- Alloc_err, part->nparts * sizeof(*part->handles));
+ __db_errx(env, ALLOC_ERR,
+ (int)(part->nparts * sizeof(*part->handles)));
goto err;
}
DB_ASSERT(env, fname != NULL);
if ((ret = __os_malloc(env,
strlen(fname) + PART_LEN + 1, &name)) != 0) {
- __db_errx(env, Alloc_err, strlen(fname) + PART_LEN + 1);
+ __db_errx(env, ALLOC_ERR,
+ (int)(strlen(fname) + PART_LEN + 1));
goto err;
}
@@ -330,6 +391,9 @@ __partition_open(dbp, ip, txn, fname, type, flags, mode, do_open)
part_db->dup_compare = dbp->dup_compare;
part_db->app_private = dbp->app_private;
part_db->api_internal = dbp->api_internal;
+ part_db->blob_threshold = dbp->blob_threshold;
+ part_db->blob_file_id = dbp->blob_file_id;
+ part_db->blob_sdb_id = dbp->blob_sdb_id;
if (dbp->type == DB_BTREE)
__bam_copy_config(dbp, part_db, part->nparts);
@@ -388,7 +452,8 @@ __partition_chk_meta(dbp, ip, txn, flags)
DB_MPOOLFILE *mpf;
ENV *env;
db_pgno_t base_pgno;
- int ret, t_ret;
+ int ret, set_keys, t_ret;
+ u_int32_t pgsize;
dbc = NULL;
meta = NULL;
@@ -397,6 +462,14 @@ __partition_chk_meta(dbp, ip, txn, flags)
mpf = dbp->mpf;
env = dbp->env;
ret = 0;
+ set_keys = 0;
+
+ /*
+ * Just to fix the lint warning.
+ * The real value will be set later, and we will
+ * only use the value after being set properly.
+ */
+ pgsize = dbp->pgsize;
/* Get a cursor on the main db. */
dbp->p_internal = NULL;
@@ -475,10 +548,12 @@ __partition_chk_meta(dbp, ip, txn, flags)
}
} else if (meta->magic != DB_BTREEMAGIC) {
__db_errx(env, DB_STR("0658",
- "Partitioning only supported on BTREE nad HASH."));
+ "Partitioning only supported on BTREE and HASH."));
ret = EINVAL;
- } else
- ret = __partition_setup_keys(dbc, part, meta, flags);
+ } else {
+ set_keys = 1;
+ pgsize = meta->pagesize;
+ }
err: /* Put the metadata page back. */
if (meta != NULL && (t_ret = __memp_fput(mpf,
@@ -487,6 +562,15 @@ err: /* Put the metadata page back. */
if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0)
ret = t_ret;
+ /*
+ * We can only call __partition_setup_keys after putting
+ * the meta page and releasing the meta lock, or self-deadlock
+ * will occur.
+ */
+ if (ret == 0 && set_keys && (t_ret =
+ __partition_setup_keys(dbc, part, pgsize, flags)) != 0)
+ ret = t_ret;
+
if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
ret = t_ret;
@@ -502,7 +586,7 @@ err: /* Put the metadata page back. */
struct key_sort {
DB *dbp;
DBT *key;
- int (*compare) __P((DB *, const DBT *, const DBT *));
+ int (*compare) __P((DB *, const DBT *, const DBT *, size_t *));
};
static int __part_key_cmp(a, b)
@@ -512,7 +596,7 @@ static int __part_key_cmp(a, b)
ka = a;
kb = b;
- return (ka->compare(ka->dbp, ka->key, kb->key));
+ return (ka->compare(ka->dbp, ka->key, kb->key, NULL));
}
/*
* __partition_setup_keys --
@@ -520,25 +604,22 @@ static int __part_key_cmp(a, b)
* are creating a partitioned database.
*/
static int
-__partition_setup_keys(dbc, part, meta, flags)
+__partition_setup_keys(dbc, part, pgsize, flags)
DBC *dbc;
DB_PARTITION *part;
- DBMETA *meta;
- u_int32_t flags;
+ u_int32_t flags, pgsize;
{
BTREE *t;
DB *dbp;
- DBT data, key, *keys, *kp;
+ DBT data, key, *keys, *kp, *okp;
ENV *env;
- u_int32_t ds, i, j;
- u_int8_t *dd;
+ db_pgno_t last_pgno;
+ u_int32_t cgetflags, i, j;
+ size_t dsize;
struct key_sort *ks;
- int have_keys, ret;
- int (*compare) __P((DB *, const DBT *, const DBT *));
- void *dp;
+ int have_keys, ret, t_ret;
+ int (*compare) __P((DB *, const DBT *, const DBT *, size_t *));
- COMPQUIET(dd, NULL);
- COMPQUIET(ds, 0);
memset(&data, 0, sizeof(data));
memset(&key, 0, sizeof(key));
ks = NULL;
@@ -549,6 +630,9 @@ __partition_setup_keys(dbc, part, meta, flags)
/* Need to just read the main database. */
dbp->p_internal = NULL;
have_keys = 0;
+ dsize = 0;
+
+ keys = part->keys;
/* First verify that things what we expect. */
if ((ret = __dbc_get(dbc, &key, &data, DB_FIRST)) != 0) {
@@ -581,11 +665,15 @@ __partition_setup_keys(dbc, part, meta, flags)
}
if (LF_ISSET(DB_CREATE) && have_keys == 0) {
- /* Insert the keys into the master database. */
+ /*
+ * Insert the keys into the master database. We will also
+ * compute the total size of the keys for later use.
+ */
for (i = 0; i < part->nparts - 1; i++) {
if ((ret = __db_put(dbp, dbc->thread_info,
dbc->txn, &part->keys[i], &data, 0)) != 0)
goto err;
+ dsize += part->keys[i].size;
}
/*
@@ -604,39 +692,71 @@ __partition_setup_keys(dbc, part, meta, flags)
}
done: if (F_ISSET(part, PART_RANGE)) {
/*
- * Allocate one page to hold the keys plus space at the
- * end of the buffer to put an array of DBTs. If there
- * is not enough space __dbc_get will return how much
- * is needed and we realloc.
+ * If we just did the insert, we have known the total size of
+ * the keys. Otherwise, the keys must have been in the database,
+ * and we can calculate the size by checking the last pgno of
+ * the corresponding mpoolfile.
+ *
+ * We make the size aligned at 1024 for performance.
*/
+ if (dsize == 0) {
+ ret = __memp_get_last_pgno(dbp->mpf, &last_pgno);
+ if (ret != 0)
+ goto err;
+ if (last_pgno > 1)
+ last_pgno--;
+ dsize = last_pgno * pgsize;
+ }
+ dsize = DB_ALIGN(dsize, 1024);
+
if ((ret = __os_malloc(env,
- meta->pagesize + (sizeof(DBT) * part->nparts),
+ dsize + (sizeof(DBT) * part->nparts),
&part->data)) != 0) {
- __db_errx(env, Alloc_err, meta->pagesize);
+ __db_errx(env, ALLOC_ERR, (int)dsize);
goto err;
}
+ memset(part->data, 0,
+ dsize + (sizeof(DBT) * part->nparts));
+
+ kp = okp = (DBT *)
+ ((u_int8_t *)part->data + dsize);
memset(&key, 0, sizeof(key));
memset(&data, 0, sizeof(data));
- data.data = part->data;
- data.ulen = meta->pagesize;
data.flags = DB_DBT_USERMEM;
-again: if ((ret = __dbc_get(dbc, &key, &data,
- DB_FIRST | DB_MULTIPLE_KEY)) == DB_BUFFER_SMALL) {
- if ((ret = __os_realloc(env,
- data.size + (sizeof(DBT) * part->nparts),
- &part->data)) != 0)
+ j = 0;
+ cgetflags = DB_FIRST;
+ while ((ret = __dbc_get(dbc, &key, &data, cgetflags)) == 0) {
+ /* It is an error if we get more keys than expect. */
+ if ((u_int32_t)(kp - okp) > part->nparts) {
+ ret = EINVAL;
goto err;
- data.data = part->data;
- data.ulen = data.size;
- goto again;
+ }
+ kp->size = key.size;
+ kp->data = (u_int8_t *)part->data + j;
+ /* It is an error if the keys overflow the space. */
+ if (j + kp->size > dsize) {
+ ret = EINVAL;
+ goto err;
+ }
+ memcpy(kp->data, key.data, kp->size);
+ j += kp->size;
+ cgetflags = DB_NEXT;
+ kp++;
}
+
+ /*
+ * We should get part->nparts keys back, otherwise it means
+ * the passed-in keys are not valid.
+ */
+ if (ret == DB_NOTFOUND && (u_int32_t)(kp - okp) == part->nparts)
+ ret = 0;
+
if (ret == 0) {
/*
* They passed in keys, they must match.
*/
- keys = NULL;
compare = NULL;
- if (have_keys == 1 && (keys = part->keys) != NULL) {
+ if (have_keys == 1 && keys != NULL) {
t = dbc->dbp->bt_internal;
compare = t->bt_compare;
if ((ret = __os_malloc(env, (part->nparts - 1)
@@ -651,20 +771,15 @@ again: if ((ret = __dbc_get(dbc, &key, &data,
qsort(ks, (size_t)part->nparts - 1,
sizeof(struct key_sort), __part_key_cmp);
}
- DB_MULTIPLE_INIT(dp, &data);
part->keys = (DBT *)
- ((u_int8_t *)part->data + data.size);
+ ((u_int8_t *)part->data + dsize);
+ F_SET(part, PART_KEYS_SETUP);
j = 0;
for (kp = part->keys;
kp < &part->keys[part->nparts]; kp++, j++) {
- DB_MULTIPLE_KEY_NEXT(dp,
- &data, kp->data, kp->size, dd, ds);
- if (dp == NULL) {
- ret = DB_NOTFOUND;
- break;
- }
- if (keys != NULL && j != 0 &&
- compare(dbc->dbp, ks[j - 1].key, kp) != 0) {
+ if (have_keys == 1 && keys != NULL && j != 0 &&
+ compare(dbc->dbp, ks[j - 1].key,
+ kp, NULL) != 0) {
if (kp->data == NULL &&
F_ISSET(dbp, DB_AM_RECOVER))
goto err;
@@ -683,6 +798,24 @@ again: if ((ret = __dbc_get(dbc, &key, &data,
err: dbp->p_internal = part;
if (ks != NULL)
__os_free(env, ks);
+
+ /*
+ * We only free the original copy of the key array when
+ * the keys have been setup properly, otherwise we let
+ * the close function to free the memory.
+ */
+ if (keys != NULL && F_ISSET(part, PART_KEYS_SETUP)) {
+ for (i = 0; i < part->nparts - 1; i++)
+ /*
+ * Always free all entries in the key array and return
+ * the first error code.
+ */
+ if ((t_ret = __db_dbt_clone_free(env,
+ &keys[i])) != 0 && ret == 0)
+ ret = t_ret;
+ __os_free(env, keys);
+ }
+
return (ret);
}
@@ -1183,6 +1316,15 @@ __partition_close(dbp, txn, flags)
ret = t_ret;
__os_free(env, part->handles);
}
+ if (!F_ISSET(part, PART_KEYS_SETUP) && part->keys != NULL) {
+ for (i = 0; i < part->nparts - 1; i++) {
+ if (part->keys[i].data != NULL && (t_ret =
+ __db_dbt_clone_free(env, &part->keys[i])) != 0 &&
+ ret == 0)
+ ret = t_ret;
+ }
+ __os_free(env, part->keys);
+ }
if (part->dirs != NULL)
__os_free(env, (char **)part->dirs);
if (part->data != NULL)
@@ -1471,7 +1613,8 @@ __part_fileid_reset(env, ip, fname, nparts, encrypted)
if ((ret = __os_malloc(env,
strlen(fname) + PART_LEN + 1, &name)) != 0) {
- __db_errx(env, Alloc_err, strlen(fname) + PART_LEN + 1);
+ __db_errx(env, ALLOC_ERR,
+ (int)(strlen(fname) + PART_LEN + 1));
return (ret);
}
@@ -1747,7 +1890,8 @@ __part_rr(dbp, ip, txn, name, subdb, newname, flags)
COMPQUIET(np, NULL);
if (newname != NULL && (ret = __os_malloc(env,
strlen(newname) + PART_LEN + 1, &np)) != 0) {
- __db_errx(env, Alloc_err, strlen(newname) + PART_LEN + 1);
+ __db_errx(env, ALLOC_ERR,
+ (int)(strlen(newname) + PART_LEN + 1));
goto err;
}
for (i = 0; i < part->nparts; i++, pdbp++) {
@@ -1790,6 +1934,32 @@ err: /*
}
return (ret);
}
+
+/*
+ * __partc_dup --
+ * Duplicate a cursor on a partitioned database.
+ *
+ * PUBLIC: int __partc_dup __P((DBC *, DBC *));
+ */
+int
+__partc_dup(dbc_orig, dbc_n)
+ DBC *dbc_orig;
+ DBC *dbc_n;
+{
+ PART_CURSOR *orig, *new;
+
+ orig = (PART_CURSOR *)dbc_orig->internal;
+ new = (PART_CURSOR *)dbc_n->internal;
+
+ /*
+ * A cursor on a partitioned database contains the identifier
+ * of the underlying database and a regular cursor that points
+ * to the underlying database. Copy both pieces.
+ */
+ new->part_id = orig->part_id;
+
+ return (__dbc_dup(orig->sub_cursor, &new->sub_cursor, DB_POSITION));
+}
#ifdef HAVE_VERIFY
/*
* __part_verify --