summaryrefslogtreecommitdiff
path: root/src/env
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@baserock.org>2015-02-17 17:25:57 +0000
committer <>2015-03-17 16:26:24 +0000
commit780b92ada9afcf1d58085a83a0b9e6bc982203d1 (patch)
tree598f8b9fa431b228d29897e798de4ac0c1d3d970 /src/env
parent7a2660ba9cc2dc03a69ddfcfd95369395cc87444 (diff)
downloadberkeleydb-master.tar.gz
Imported from /home/lorry/working-area/delta_berkeleydb/db-6.1.23.tar.gz.HEADdb-6.1.23master
Diffstat (limited to 'src/env')
-rw-r--r--src/env/env_alloc.c2
-rw-r--r--src/env/env_backup.c2
-rw-r--r--src/env/env_config.c38
-rw-r--r--src/env/env_failchk.c141
-rw-r--r--src/env/env_file.c16
-rw-r--r--src/env/env_globals.c19
-rw-r--r--src/env/env_method.c187
-rw-r--r--src/env/env_name.c20
-rw-r--r--src/env/env_open.c250
-rw-r--r--src/env/env_recover.c130
-rw-r--r--src/env/env_region.c117
-rw-r--r--src/env/env_register.c197
-rw-r--r--src/env/env_sig.c17
-rw-r--r--src/env/env_stat.c43
14 files changed, 892 insertions, 287 deletions
diff --git a/src/env/env_alloc.c b/src/env/env_alloc.c
index 700bfb27..9c8fd046 100644
--- a/src/env/env_alloc.c
+++ b/src/env/env_alloc.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
diff --git a/src/env/env_backup.c b/src/env/env_backup.c
index 9c79dbb4..2940f44b 100644
--- a/src/env/env_backup.c
+++ b/src/env/env_backup.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2011, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
diff --git a/src/env/env_config.c b/src/env/env_config.c
index 57496909..56cebb63 100644
--- a/src/env/env_config.c
+++ b/src/env/env_config.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -84,8 +84,10 @@ static const CFG_DESC config_descs[] = {
{ "rep_set_clockskew", CFG_2UINT, __rep_set_clockskew },
{ "rep_set_limit", CFG_2UINT, __rep_set_limit },
{ "rep_set_nsites", CFG_UINT, __rep_set_nsites_pp },
- { "rep_set_priority", CFG_UINT, __rep_set_priority },
+ { "rep_set_priority", CFG_UINT, __rep_set_priority_pp },
{ "rep_set_request", CFG_2UINT, __rep_set_request },
+ { "set_blob_dir", CFG_STRING, __env_set_blob_dir },
+ { "set_blob_threshold", CFG_2UINT, __env_set_blob_threshold },
{ "set_cache_max", CFG_2UINT, __memp_set_cache_max },
{ "set_create_dir", CFG_STRING, __env_set_create_dir },
{ "set_data_dir", CFG_STRING, __env_set_data_dir },
@@ -133,11 +135,16 @@ static const FN config_rep_config[] = {
{ DB_REP_CONF_AUTOROLLBACK, "db_rep_conf_autorollback" },
{ DB_REP_CONF_BULK, "db_rep_conf_bulk" },
{ DB_REP_CONF_DELAYCLIENT, "db_rep_conf_delayclient" },
+ { DB_REP_CONF_ELECT_LOGLENGTH, "db_rep_conf_elect_loglength" },
{ DB_REP_CONF_INMEM, "db_rep_conf_inmem" },
{ DB_REP_CONF_LEASE, "db_rep_conf_lease" },
{ DB_REP_CONF_NOWAIT, "db_rep_conf_nowait" },
{ DB_REPMGR_CONF_2SITE_STRICT, "db_repmgr_conf_2site_strict" },
{ DB_REPMGR_CONF_ELECTIONS, "db_repmgr_conf_elections" },
+ { DB_REPMGR_CONF_PREFMAS_CLIENT,
+ "db_repmgr_conf_prefmas_client" },
+ { DB_REPMGR_CONF_PREFMAS_MASTER,
+ "db_repmgr_conf_prefmas_master" },
{ 0, NULL }
};
@@ -198,7 +205,9 @@ static const FN config_set_flags_forlog[] = {
{ DB_LOG_DIRECT, "db_direct_log" },
{ DB_LOG_DSYNC, "db_dsync_log" },
{ DB_LOG_AUTO_REMOVE, "db_log_autoremove" },
+ { DB_LOG_BLOB, "db_log_blob" },
{ DB_LOG_IN_MEMORY, "db_log_inmemory" },
+ { DB_LOG_NOSYNC, "db_log_nosync" },
{ 0, NULL }
};
@@ -206,7 +215,9 @@ static const FN config_log_set_config[] = {
{ DB_LOG_DIRECT, "db_log_direct" },
{ DB_LOG_DSYNC, "db_log_dsync" },
{ DB_LOG_AUTO_REMOVE, "db_log_auto_remove" },
+ { DB_LOG_BLOB, "db_log_blob" },
{ DB_LOG_IN_MEMORY, "db_log_in_memory" },
+ { DB_LOG_NOSYNC, "db_log_nosync" },
{ DB_LOG_ZERO, "db_log_zero" },
{ 0, NULL }
};
@@ -237,6 +248,7 @@ static const FN config_set_verbose[] = {
{ DB_VERB_DEADLOCK, "db_verb_deadlock" },
{ DB_VERB_FILEOPS, "db_verb_fileops" },
{ DB_VERB_FILEOPS_ALL, "db_verb_fileops_all" },
+ { DB_VERB_MVCC, "db_verb_mvcc" },
{ DB_VERB_RECOVERY, "db_verb_recovery" },
{ DB_VERB_REGISTER, "db_verb_register" },
{ DB_VERB_REPLICATION, "db_verb_replication" },
@@ -462,7 +474,7 @@ format: __db_errx(env, DB_STR_A("1584",
if ((lv1 = __db_name_to_val(config_rep_timeout, argv[1])) == -1)
goto format;
CFG_GET_UINT32(argv[2], &uv2);
- return (__rep_set_timeout(dbenv, lv1, (db_timeout_t)uv2));
+ return (__rep_set_timeout_pp(dbenv, lv1, (db_timeout_t)uv2));
}
/* repmgr_set_ack_policy db_repmgr_acks_XXX */
@@ -475,6 +487,15 @@ format: __db_errx(env, DB_STR_A("1584",
return (__repmgr_set_ack_policy(dbenv, lv1));
}
+ if (strcasecmp(argv[0], "repmgr_set_incoming_queue_max") == 0) {
+ if (nf != 3)
+ goto format;
+ CFG_GET_UINT32(argv[1], &uv1);
+ CFG_GET_UINT32(argv[2], &uv2);
+ return (__repmgr_set_incoming_queue_max(
+ dbenv, (u_int32_t)uv1, (u_int32_t)uv2));
+ }
+
/*
* Configure name/value pairs of config information for a site (local or
* remote).
@@ -503,7 +524,7 @@ format: __db_errx(env, DB_STR_A("1584",
uv2 = 0;
else
CFG_GET_UINT32(argv[i + 1], &uv2);
- if ((ret = __repmgr_site_config(site,
+ if ((ret = __repmgr_site_config_int(site,
(u_int32_t)lv1, (u_int32_t)uv2)) != 0)
break;
}
@@ -630,6 +651,15 @@ format: __db_errx(env, DB_STR_A("1584",
dbenv, DB_REGION_INIT, lv1 == 0 ? 0 : 1));
}
+ /* set_mutex_failchk_timeout <unsigned timeout> */
+ if (strcasecmp(argv[0], "set_mutex_failchk_timeout") == 0) {
+ if (nf != 2)
+ goto format;
+ CFG_GET_UINT32(argv[1], &uv1);
+ return (__env_set_timeout(
+ dbenv, (u_int32_t)uv1, DB_SET_MUTEX_FAILCHK_TIMEOUT));
+ }
+
/* set_reg_timeout <unsigned timeout> */
if (strcasecmp(argv[0], "set_reg_timeout") == 0) {
if (nf != 2)
diff --git a/src/env/env_failchk.c b/src/env/env_failchk.c
index 05752f07..ad9bed0b 100644
--- a/src/env/env_failchk.c
+++ b/src/env/env_failchk.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2005, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -22,9 +22,26 @@ static int __env_in_api __P((ENV *));
static void __env_clear_state __P((ENV *));
/*
+ * When failchk broadcast is enabled continue after the first error, to try to
+ * find all of them; without broadcasting stop at the first failure.
+ */
+#ifdef HAVE_FAILCHK_BROADCAST
+#define FAILCHK_PROCESS_ERROR(t_ret, ret) \
+ if ((t_ret) != 0 && (ret) == 0) (ret) = (t_ret)
+#else
+#define FAILCHK_PROCESS_ERROR(t_ret, ret) \
+ if (((ret) = (t_ret)) != 0) goto err
+#endif
+
+/*
* __env_failchk_pp --
* ENV->failchk pre/post processing.
*
+ * Single process failchk continues after recoverable failures but stops as
+ * soon as recovery is required. Broadcast failchks continue even after
+ * DB_RUNRECOVERY failures are detected, to maximize the possibility to
+ * wake up processes blocked on dead resources, e.g. mutexes.
+ *
* PUBLIC: int __env_failchk_pp __P((DB_ENV *, u_int32_t));
*/
int
@@ -46,7 +63,7 @@ __env_failchk_pp(dbenv, flags)
*/
if (!ALIVE_ON(env)) {
__db_errx(env, DB_STR("1503",
- "DB_ENV->failchk requires DB_ENV->is_alive be configured"));
+ "DB_ENV->failchk requires DB_ENV->is_alive be configured"));
return (EINVAL);
}
@@ -59,10 +76,14 @@ __env_failchk_pp(dbenv, flags)
ENV_LEAVE(env, ip);
return (ret);
}
+
/*
* __env_failchk_int --
* Process the subsystem failchk routines
*
+ * The FAILCHK_PROCESS_ERROR macro (defined at the top of this file)
+ * differs between the broadcast and single process versions of failchk.
+ *
* PUBLIC: int __env_failchk_int __P((DB_ENV *));
*/
int
@@ -70,42 +91,52 @@ __env_failchk_int(dbenv)
DB_ENV *dbenv;
{
ENV *env;
- int ret;
+ int ret, t_ret;
env = dbenv->env;
+ ret = 0;
F_SET(dbenv, DB_ENV_FAILCHK);
/*
- * We check for dead threads in the API first as this would be likely
- * to hang other things we try later, like locks and transactions.
+ * We check for dead threads in the API first as this would likely
+ * hang other things we try later, like locks and transactions.
*/
- if ((ret = __env_in_api(env)) != 0)
+ if ((ret = __env_in_api(env)) != 0) {
+ __db_err(env, ret, "__env_in_api");
goto err;
+ }
- if (LOCKING_ON(env) && (ret = __lock_failchk(env)) != 0)
- goto err;
+ if (LOCKING_ON(env) && (t_ret = __lock_failchk(env)) != 0)
+ FAILCHK_PROCESS_ERROR(t_ret, ret);
- if (TXN_ON(env) &&
- ((ret = __txn_failchk(env)) != 0 ||
- (ret = __dbreg_failchk(env)) != 0))
- goto err;
+ if (TXN_ON(env) && ret == 0 && ((t_ret = __txn_failchk(env)) != 0 ||
+ (t_ret = __dbreg_failchk(env)) != 0))
+ FAILCHK_PROCESS_ERROR(t_ret, ret);
- if ((ret = __memp_failchk(env)) != 0)
- goto err;
+ if ((t_ret = __memp_failchk(env)) != 0)
+ FAILCHK_PROCESS_ERROR(t_ret, ret);
#ifdef HAVE_REPLICATION_THREADS
- if (REP_ON(env) && (ret = __repmgr_failchk(env)) != 0)
- goto err;
+ if (REP_ON(env) && (t_ret = __repmgr_failchk(env)) != 0)
+ FAILCHK_PROCESS_ERROR(t_ret, ret);
#endif
- /* Mark any dead blocked threads as dead. */
- __env_clear_state(env);
+err:
#ifdef HAVE_MUTEX_SUPPORT
- ret = __mut_failchk(env);
+ if ((t_ret = __mutex_failchk(env)) != 0 && ret == 0)
+ ret = t_ret;
#endif
-err: F_CLR(dbenv, DB_ENV_FAILCHK);
+ /* Any dead blocked thread slots are no longer needed; allow reuse. */
+ if (ret == 0)
+ __env_clear_state(env);
+ if (ret == DB_RUNRECOVERY) {
+ /* Announce a panic; avoid __env_panic()'s diag core dump. */
+ __env_panic_set(env, 1);
+ __env_panic_event(env, ret);
+ }
+ F_CLR(dbenv, DB_ENV_FAILCHK);
return (ret);
}
@@ -312,7 +343,8 @@ __env_in_api(env)
REGINFO *infop;
THREAD_INFO *thread;
u_int32_t i;
- int unpin, ret;
+ pid_t pid;
+ int unpin, ret, t_ret;
if ((htab = env->thr_hashtab) == NULL)
return (EINVAL);
@@ -322,10 +354,13 @@ __env_in_api(env)
renv = infop->primary;
thread = R_ADDR(infop, renv->thread_off);
unpin = 0;
+ ret = 0;
for (i = 0; i < env->thr_nbucket; i++)
SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info) {
+ pid = ip->dbth_pid;
if (ip->dbth_state == THREAD_SLOT_NOT_IN_USE ||
+ ip->dbth_state == THREAD_BLOCKED_DEAD ||
(ip->dbth_state == THREAD_OUT &&
thread->thr_count < thread->thr_max))
continue;
@@ -341,26 +376,63 @@ __env_in_api(env)
ip->dbth_state = THREAD_SLOT_NOT_IN_USE;
continue;
}
- return (__db_failed(env, DB_STR("1507",
+ /*
+ * The above tests are not atomic, so it is possible that
+ * the process pointed by ip has changed during the tests.
+ * In particular, if the process pointed by ip when is_alive
+ * was executed terminated normally, a new process may reuse
+ * the same ip structure and change its dbth_state before the
+ * next two tests were performed. Therefore, we need to test
+ * here that all four tests above are done on the same process.
+ * If the process pointed by ip changed, all tests are invalid
+ * and can be ignored.
+ * Similarly, it's also possible for two processes racing to
+ * change the dbth_state of the same ip structure. For example,
+ * both process A and B reach the above test for the same
+ * terminated process C where C's dbth_state is THREAD_OUT.
+ * If A goes into the 'if' block and changes C's dbth_state to
+ * THREAD_SLOT_NOT_IN_USE before B checks the condition, B
+ * would incorrectly fail the test and run into this line.
+ * Therefore, we need to check C's dbth_state again and fail
+ * the db only if C's dbth_state is indeed THREAD_ACTIVE.
+ */
+ if (ip->dbth_state != THREAD_ACTIVE || ip->dbth_pid != pid)
+ continue;
+ __os_gettime(env, &ip->dbth_failtime, 0);
+ t_ret = __db_failed(env, DB_STR("1507",
"Thread died in Berkeley DB library"),
- ip->dbth_pid, ip->dbth_tid));
+ ip->dbth_pid, ip->dbth_tid);
+ if (ret == 0)
+ ret = t_ret;
+ /*
+ * Classic failchk stop after one dead thread in the
+ * api, but broadcasting looks for all.
+ */
+#ifndef HAVE_FAILCHK_BROADCAST
+ return (ret);
+#endif
}
if (unpin == 0)
- return (0);
+ return (ret);
for (i = 0; i < env->thr_nbucket; i++)
SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info)
if (ip->dbth_state == THREAD_BLOCKED_DEAD &&
- (ret = __memp_unpin_buffers(env, ip)) != 0)
+ (t_ret = __memp_unpin_buffers(env, ip)) != 0) {
+ if (ret == 0)
+ ret = t_ret;
+#ifndef HAVE_FAILCHK_BROADCAST
return (ret);
+#endif
+ }
- return (0);
+ return (ret);
}
/*
* __env_clear_state --
- * Look for threads which died while blockedi and clear them..
+ * Look for threads which died while blocked and clear them..
*/
static void
__env_clear_state(env)
@@ -441,6 +513,9 @@ __env_set_state(env, ipp, state)
#endif
}
+ /* A failchk thread must not block on a lock -- that would be faulty. */
+ if (state == THREAD_BLOCKED && ip != NULL)
+ DB_ASSERT(env, ip->dbth_state != THREAD_FAILCHK);
/*
* If ipp is not null, return the thread control block if found.
* Check to ensure the thread of control has been registered.
@@ -457,7 +532,9 @@ __env_set_state(env, ipp, state)
*ipp = NULL;
ret = 0;
- if (ip == NULL) {
+ if (ip != NULL)
+ ip->dbth_state = state;
+ else {
infop = env->reginfo;
renv = infop->primary;
thread = R_ADDR(infop, renv->thread_off);
@@ -503,11 +580,13 @@ __env_set_state(env, ipp, state)
init: ip->dbth_pid = id.pid;
ip->dbth_tid = id.tid;
ip->dbth_state = state;
+ for (indx = 0; indx != MUTEX_STATE_MAX; indx++)
+ ip->dbth_latches[indx].mutex = MUTEX_INVALID;
SH_TAILQ_INIT(&ip->dbth_xatxn);
}
MUTEX_UNLOCK(env, renv->mtx_regenv);
- } else
- ip->dbth_state = state;
+ }
+
*ipp = ip;
DB_ASSERT(env, ret == 0);
@@ -535,7 +614,7 @@ __env_thread_id_string(dbenv, pid, tid, buf)
#ifdef UINT64_FMT
char fmt[20];
- snprintf(fmt, sizeof(fmt), "%s/%s", UINT64_FMT, UINT64_FMT);
+ snprintf(fmt, sizeof(fmt), "%s/%s", INT64_FMT, UINT64_FMT);
snprintf(buf,
DB_THREADID_STRLEN, fmt, (u_int64_t)pid, (u_int64_t)(uintptr_t)tid);
#else
diff --git a/src/env/env_file.c b/src/env/env_file.c
index b102404d..d6e29b21 100644
--- a/src/env/env_file.c
+++ b/src/env/env_file.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2002, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -12,7 +12,7 @@
/*
* __db_file_extend --
- * Initialize a regular file by writing the last page of the file.
+ * Initialize or extend a regular file by writing to its last page.
*
* PUBLIC: int __db_file_extend __P((ENV *, DB_FH *, size_t));
*/
@@ -27,7 +27,19 @@ __db_file_extend(env, fhp, size)
u_int32_t relative;
int ret;
char buf;
+#ifdef HAVE_MMAP_EXTEND
+ unsigned pagesize;
+ /*
+ * Round up size to the VM pagesize. If it isn't aligned, then the bytes
+ * ending the mapping might have no corresponding backing location on
+ * disk, and could be silently lost when the process exits. [#23290]
+ */
+ if (F_ISSET(fhp, DB_FH_REGION)) {
+ pagesize = (unsigned)getpagesize();
+ size = DB_ALIGN(size, pagesize);
+ }
+#endif
buf = '\0';
/*
* Extend the file by writing the last page. If the region is >4Gb,
diff --git a/src/env/env_globals.c b/src/env/env_globals.c
index 955e6738..2d665661 100644
--- a/src/env/env_globals.c
+++ b/src/env/env_globals.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -31,14 +31,21 @@ DB_GLOBALS __db_global_values = {
"=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=", /* db_line */
{ 0 }, /* error_buf */
- 0, /* uid_init */
- 0, /* rand_next */
+ 0, /* random_seeded */
+#if defined(HAVE_RANDOM_R)
+ { 0 }, /* random_r random_data */
+ { 0 }, /* random_r state */
+#elif !defined(HAVE_RAND) && !defined(HAVE_RANDOM)
+ 0, /* rand/srand value */
+#endif
0, /* fid_serial */
0, /* db_errno */
- 0, /* num_active_pids */
- 0, /* size_active_pids */
- NULL, /* active_pids */
NULL, /* saved_errstr */
+ "%m/%d %H:%M:%S", /* strftime format for dates */
+#if defined(HAVE_ERROR_HISTORY)
+ 0, /* thread local msgs_key */
+ PTHREAD_ONCE_INIT, /* pthread_once initializer */
+#endif
NULL, /* j_assert */
NULL, /* j_close */
NULL, /* j_dirfree */
diff --git a/src/env/env_method.c b/src/env/env_method.c
index 63deacea..c246febc 100644
--- a/src/env/env_method.c
+++ b/src/env/env_method.c
@@ -1,9 +1,9 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015 Oracle and/or its affiliates. All rights reserved.
*
- * $Id: env_method.c,v dabaaeb7d839 2010/08/03 17:28:53 mike $
+ * $Id$
*/
#include "db_config.h"
@@ -40,6 +40,7 @@ static int __env_get_app_dispatch
__P((DB_ENV *, int (**)(DB_ENV *, DBT *, DB_LSN *, db_recops)));
static int __env_set_app_dispatch
__P((DB_ENV *, int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops)));
+static int __env_get_blob_dir __P((DB_ENV *, const char **));
static int __env_set_event_notify
__P((DB_ENV *, void (*)(DB_ENV *, u_int32_t, void *)));
static int __env_get_feedback __P((DB_ENV *, void (**)(DB_ENV *, int, int)));
@@ -81,6 +82,11 @@ db_env_create(dbenvpp, flags)
if (flags != 0)
return (EINVAL);
+#ifdef HAVE_ERROR_HISTORY
+ /* Call thread local storage initializer at least once per process. */
+ __db_thread_init();
+#endif
+
/* Allocate the DB_ENV and ENV structures -- we always have both. */
if ((ret = __os_calloc(NULL, 1, sizeof(DB_ENV), &dbenv)) != 0)
return (ret);
@@ -159,7 +165,7 @@ __db_env_init(dbenv)
*/
/* DB_ENV PUBLIC HANDLE LIST BEGIN */
dbenv->add_data_dir = __env_add_data_dir;
- dbenv->backup = __db_backup;
+ dbenv->backup = __db_backup_pp;
dbenv->dbbackup = __db_dbbackup_pp;
dbenv->cdsgroup_begin = __cdsgroup_begin_pp;
dbenv->close = __env_close_pp;
@@ -175,6 +181,8 @@ __db_env_init(dbenv)
dbenv->get_cachesize = __memp_get_cachesize;
dbenv->get_backup_callbacks = __env_get_backup_callbacks;
dbenv->get_backup_config = __env_get_backup_config;
+ dbenv->get_blob_dir = __env_get_blob_dir;
+ dbenv->get_blob_threshold = __env_get_blob_threshold_pp;
dbenv->get_create_dir = __env_get_create_dir;
dbenv->get_data_dirs = __env_get_data_dirs;
dbenv->get_data_len = __env_get_data_len;
@@ -269,7 +277,7 @@ __db_env_init(dbenv)
dbenv->open = __env_open_pp;
dbenv->remove = __env_remove;
dbenv->rep_elect = __rep_elect_pp;
- dbenv->rep_flush = __rep_flush;
+ dbenv->rep_flush = __rep_flush_pp;
dbenv->rep_get_clockskew = __rep_get_clockskew;
dbenv->rep_get_config = __rep_get_config;
dbenv->rep_get_limit = __rep_get_limit;
@@ -282,29 +290,34 @@ __db_env_init(dbenv)
dbenv->rep_set_config = __rep_set_config;
dbenv->rep_set_limit = __rep_set_limit;
dbenv->rep_set_nsites = __rep_set_nsites_pp;
- dbenv->rep_set_priority = __rep_set_priority;
+ dbenv->rep_set_priority = __rep_set_priority_pp;
dbenv->rep_set_request = __rep_set_request;
- dbenv->rep_set_timeout = __rep_set_timeout;
+ dbenv->rep_set_timeout = __rep_set_timeout_pp;
dbenv->rep_set_transport = __rep_set_transport_pp;
+ dbenv->rep_set_view = __rep_set_view;
dbenv->rep_start = __rep_start_pp;
dbenv->rep_stat = __rep_stat_pp;
dbenv->rep_stat_print = __rep_stat_print_pp;
dbenv->rep_sync = __rep_sync;
dbenv->repmgr_channel = __repmgr_channel;
dbenv->repmgr_get_ack_policy = __repmgr_get_ack_policy;
+ dbenv->repmgr_get_incoming_queue_max = __repmgr_get_incoming_queue_max;
dbenv->repmgr_local_site = __repmgr_local_site;
dbenv->repmgr_msg_dispatch = __repmgr_set_msg_dispatch;
dbenv->repmgr_set_ack_policy = __repmgr_set_ack_policy;
+ dbenv->repmgr_set_incoming_queue_max = __repmgr_set_incoming_queue_max;
dbenv->repmgr_site = __repmgr_site;
dbenv->repmgr_site_by_eid = __repmgr_site_by_eid;
- dbenv->repmgr_site_list = __repmgr_site_list;
- dbenv->repmgr_start = __repmgr_start;
+ dbenv->repmgr_site_list = __repmgr_site_list_pp;
+ dbenv->repmgr_start = __repmgr_start_pp;
dbenv->repmgr_stat = __repmgr_stat_pp;
dbenv->repmgr_stat_print = __repmgr_stat_print_pp;
dbenv->set_alloc = __env_set_alloc;
dbenv->set_app_dispatch = __env_set_app_dispatch;
dbenv->set_backup_callbacks = __env_set_backup_callbacks;
dbenv->set_backup_config = __env_set_backup_config;
+ dbenv->set_blob_dir = __env_set_blob_dir;
+ dbenv->set_blob_threshold = __env_set_blob_threshold;
dbenv->set_cache_max = __memp_set_cache_max;
dbenv->set_cachesize = __memp_set_cachesize;
dbenv->set_create_dir = __env_set_create_dir;
@@ -370,10 +383,11 @@ __db_env_init(dbenv)
dbenv->thread_id = __os_id;
dbenv->thread_id_string = __env_thread_id_string;
+ dbenv->mutex_failchk_timeout = US_PER_SEC;
+
env = dbenv->env;
__os_id(NULL, &env->pid_cache, NULL);
- env->db_ref = 0;
env->log_verify_wrap = __log_verify_wrap;
env->data_len = ENV_DEF_DATA_LEN;
TAILQ_INIT(&env->fdlist);
@@ -561,6 +575,97 @@ __env_get_memory_init(dbenv, type, countp)
}
/*
+ * __env_get_blob_threshold_pp --
+ * Get the blob threshold for the environment. Any data item larger
+ * than the blob threshold is automatically saved as a blob file.
+ *
+ * PUBLIC: int __env_get_blob_threshold_pp
+ * PUBLIC: __P ((DB_ENV *, u_int32_t *));
+ */
+int
+__env_get_blob_threshold_pp(dbenv, bytes)
+ DB_ENV *dbenv;
+ u_int32_t *bytes;
+{
+ ENV *env;
+ DB_THREAD_INFO *ip;
+ int ret;
+
+ env = dbenv->env;
+
+ ENV_ENTER(env, ip);
+ ret = __env_get_blob_threshold_int(env, bytes);
+ ENV_LEAVE(env, ip);
+
+ return (ret);
+}
+
+/*
+ * __env_get_blob_threshold_int --
+ * Get the blob threshold for the environment. Any data item larger
+ * than the blob threshold is automatically saved as a blob file.
+ *
+ * PUBLIC: int __env_get_blob_threshold_int
+ * PUBLIC: __P ((ENV *, u_int32_t *));
+ */
+int
+__env_get_blob_threshold_int(env, bytes)
+ ENV *env;
+ u_int32_t *bytes;
+{
+ REGENV *renv;
+ REGINFO *infop;
+
+ if (F_ISSET(env, ENV_OPEN_CALLED)) {
+ infop = env->reginfo;
+ renv = infop->primary;
+ MUTEX_LOCK(env, renv->mtx_regenv);
+ *bytes = renv->blob_threshold;
+ MUTEX_UNLOCK(env, renv->mtx_regenv);
+ } else
+ *bytes = env->dbenv->blob_threshold;
+
+ return (0);
+}
+
+/*
+ * __env_set_blob_threshold --
+ * Set the default blob threshold for the environment. Any data item larger
+ * than the blob threshold is automatically saved as a blob file.
+ *
+ * PUBLIC: int __env_set_blob_threshold __P((DB_ENV *, u_int32_t, u_int32_t));
+ */
+int
+__env_set_blob_threshold(dbenv, bytes, flags)
+ DB_ENV *dbenv;
+ u_int32_t bytes;
+ u_int32_t flags;
+{
+ ENV *env;
+ REGENV *renv;
+ REGINFO *infop;
+ DB_THREAD_INFO *ip;
+
+ env = dbenv->env;
+
+ if (__db_fchk(dbenv->env, "DB_ENV->set_blob_threshold", flags, 0) != 0)
+ return (EINVAL);
+
+ if (F_ISSET(env, ENV_OPEN_CALLED)) {
+ infop = env->reginfo;
+ renv = infop->primary;
+ ENV_ENTER(env, ip);
+ MUTEX_LOCK(env, renv->mtx_regenv);
+ renv->blob_threshold = bytes;
+ MUTEX_UNLOCK(env, renv->mtx_regenv);
+ ENV_LEAVE(env, ip);
+ } else
+ dbenv->blob_threshold = bytes;
+
+ return (0);
+}
+
+/*
* __env_set_memory_init --
* DB_ENV->set_memory_init.
*
@@ -697,6 +802,43 @@ __env_set_app_dispatch(dbenv, app_dispatch)
}
/*
+ * __env_set_blob_dir --
+ * API to allow the user to override the default blob file
+ * root directory. Must be set if blobs are enabled and an
+ * unnamed environment is created.
+ *
+ * PUBLIC: int __env_set_blob_dir __P((DB_ENV *, const char *));
+ */
+int
+__env_set_blob_dir(dbenv, dir)
+ DB_ENV *dbenv;
+ const char *dir;
+{
+ ENV *env;
+
+ env = dbenv->env;
+
+ ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_blob_dir");
+
+ if (dbenv->db_blob_dir != NULL)
+ __os_free(env, dbenv->db_blob_dir);
+ return (__os_strdup(env, dir, &dbenv->db_blob_dir));
+}
+
+/*
+ * __env_get_blob_dir --
+ * Get the blob file root directory.
+ */
+static int
+__env_get_blob_dir(dbenv, dirp)
+ DB_ENV *dbenv;
+ const char **dirp;
+{
+ *dirp = dbenv->db_blob_dir;
+ return (0);
+}
+
+/*
* __env_get_encrypt_flags --
* {DB_ENV,DB}->get_encrypt_flags.
*
@@ -1061,6 +1203,10 @@ __env_set_backup(env, on)
return (EINVAL);
}
+ /*
+ * This code does not need env_rep_enter for the checkpoint because
+ * it can only happen if there is an active bulk txn existing.
+ */
if (needs_checkpoint && (ret = __txn_checkpoint(env, 0, 0, 0)))
return (ret);
return (0);
@@ -1244,6 +1390,11 @@ __env_set_data_len(dbenv, data_len)
DB_ENV *dbenv;
u_int32_t data_len;
{
+ if (data_len == 0) {
+ __db_errx(dbenv->env, DB_STR("1593",
+"Maximum number of bytes to display for each key/data item can not be 0."));
+ return (EINVAL);
+ }
dbenv->env->data_len = data_len;
return (0);
@@ -1720,6 +1871,7 @@ __env_get_verbose(dbenv, which, onoffp)
case DB_VERB_DEADLOCK:
case DB_VERB_FILEOPS:
case DB_VERB_FILEOPS_ALL:
+ case DB_VERB_MVCC:
case DB_VERB_RECOVERY:
case DB_VERB_REGISTER:
case DB_VERB_REPLICATION:
@@ -1758,6 +1910,7 @@ __env_set_verbose(dbenv, which, on)
case DB_VERB_DEADLOCK:
case DB_VERB_FILEOPS:
case DB_VERB_FILEOPS_ALL:
+ case DB_VERB_MVCC:
case DB_VERB_RECOVERY:
case DB_VERB_REGISTER:
case DB_VERB_REPLICATION:
@@ -1888,9 +2041,15 @@ __env_get_timeout(dbenv, timeoutp, flags)
int ret;
ret = 0;
- if (flags == DB_SET_REG_TIMEOUT) {
+ if (flags == DB_SET_REG_TIMEOUT)
*timeoutp = dbenv->envreg_timeout;
- } else
+ else if (flags == DB_SET_MUTEX_FAILCHK_TIMEOUT)
+#ifdef HAVE_FAILCHK_BROADCAST
+ *timeoutp = dbenv->mutex_failchk_timeout;
+#else
+ ret = USR_ERR(dbenv->env, DB_OPNOTSUP);
+#endif
+ else
ret = __lock_get_env_timeout(dbenv, timeoutp, flags);
return (ret);
}
@@ -1912,6 +2071,12 @@ __env_set_timeout(dbenv, timeout, flags)
ret = 0;
if (flags == DB_SET_REG_TIMEOUT)
dbenv->envreg_timeout = timeout;
+ else if (flags == DB_SET_MUTEX_FAILCHK_TIMEOUT)
+#ifdef HAVE_FAILCHK_BROADCAST
+ dbenv->mutex_failchk_timeout = timeout;
+#else
+ ret = USR_ERR(dbenv->env, DB_OPNOTSUP);
+#endif
else
ret = __lock_set_env_timeout(dbenv, timeout, flags);
return (ret);
diff --git a/src/env/env_name.c b/src/env/env_name.c
index a3a0b371..d0dd5635 100644
--- a/src/env/env_name.c
+++ b/src/env/env_name.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -9,6 +9,7 @@
#include "db_config.h"
#include "db_int.h"
+#include "dbinc/blob.h"
static int __db_fullpath
__P((ENV *, const char *, const char *, int, int, char **));
@@ -122,7 +123,7 @@ __db_appname(env, appname, file, dirp, namep)
{
DB_ENV *dbenv;
char **ddp;
- const char *dir;
+ const char *blob_dir, *dir;
int ret;
dbenv = env->dbenv;
@@ -141,6 +142,8 @@ __db_appname(env, appname, file, dirp, namep)
/*
* DB_APP_NONE:
* DB_HOME/file
+ * DB_APP_BLOB:
+ * DB_HOME/DB_BLOB_DIR/file
* DB_APP_DATA:
* DB_HOME/DB_DATA_DIR/file
* DB_APP_LOG:
@@ -151,6 +154,12 @@ __db_appname(env, appname, file, dirp, namep)
switch (appname) {
case DB_APP_NONE:
break;
+ case DB_APP_BLOB:
+ if (dbenv != NULL && dbenv->db_blob_dir != NULL)
+ dir = dbenv->db_blob_dir;
+ else
+ dir = BLOB_DEFAULT_DIR;
+ break;
case DB_APP_RECOVER:
case DB_APP_DATA:
/*
@@ -164,6 +173,13 @@ __db_appname(env, appname, file, dirp, namep)
/* Second, look in the environment home directory. */
DB_CHECKFILE(file, NULL, 1, 0, namep, dirp);
+ /* Third, check the blob directory. */
+ if (dbenv != NULL && dbenv->db_blob_dir != NULL)
+ blob_dir = dbenv->db_blob_dir;
+ else
+ blob_dir = BLOB_DEFAULT_DIR;
+ DB_CHECKFILE(file, blob_dir, 1, 0, namep, dirp);
+
/*
* Otherwise, we're going to create. Use the specified
* directory unless we're in recovery and it doesn't exist.
diff --git a/src/env/env_open.c b/src/env/env_open.c
index 7eddca3a..85189369 100644
--- a/src/env/env_open.c
+++ b/src/env/env_open.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -107,10 +107,16 @@ __env_open_pp(dbenv, db_home, flags, mode)
__db_errx(env, DB_STR("1589", "DB_PRIVATE is not "
"supported by 64-bit applications in "
"mixed-size-addressing mode"));
- return (EINVAL);
- }
+ return (EINVAL);
+ }
#endif
+ if (LF_ISSET(DB_PRIVATE) && PREFMAS_IS_SET(env)) {
+ __db_errx(env, DB_STR("1594", "DB_PRIVATE is not "
+ "supported in Replication Manager preferred master mode"));
+ return (EINVAL);
+ }
+
return (__env_open(dbenv, db_home, flags, mode));
}
@@ -129,12 +135,20 @@ __env_open(dbenv, db_home, flags, mode)
{
DB_THREAD_INFO *ip;
ENV *env;
- u_int32_t orig_flags;
- int register_recovery, ret, t_ret;
+ u_int32_t orig_flags, retry_flags;
+ int recovery_failed, register_recovery, ret, t_ret;
+ char *old_passwd;
+ size_t old_passwd_len;
+ u_int32_t old_encrypt_flags;
ip = NULL;
env = dbenv->env;
+ recovery_failed = 1;
register_recovery = 0;
+ retry_flags = 0;
+ old_passwd = NULL;
+ old_passwd_len = 0;
+ old_encrypt_flags = 0;
/* Initial configuration. */
if ((ret = __env_config(dbenv, db_home, &flags, mode)) != 0)
@@ -171,13 +185,27 @@ __env_open(dbenv, db_home, flags, mode)
dbenv->is_alive = __envreg_isalive;
}
- if ((ret =
- __envreg_register(env, &register_recovery, flags)) != 0)
+ /*
+ * Backup the current key, because it would be consumed by
+ * __envreg_register below
+ */
+ if (dbenv->passwd != NULL) {
+ if ((ret = __os_strdup(env, dbenv->passwd, &old_passwd)) != 0)
+ goto err;
+ old_passwd_len = dbenv->passwd_len;
+ (void)__env_get_encrypt_flags(dbenv, &old_encrypt_flags);
+ }
+
+ F_SET(dbenv, DB_ENV_NOPANIC);
+ ret = __envreg_register(env, &register_recovery, flags);
+ dbenv->flags = orig_flags;
+ if (ret != 0)
goto err;
if (register_recovery) {
if (!LF_ISSET(DB_RECOVER)) {
__db_errx(env, DB_STR("1567",
"The DB_RECOVER flag was not specified, and recovery is needed"));
+ recovery_failed = 0;
ret = DB_RUNRECOVERY;
goto err;
}
@@ -197,16 +225,27 @@ __env_open(dbenv, db_home, flags, mode)
* want to remove files left over for any reason, from any session.
*/
retry: if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL))
+ if (
#ifdef HAVE_REPLICATION
- if ((ret = __rep_reset_init(env)) != 0 ||
- (ret = __env_remove_env(env)) != 0 ||
-#else
- if ((ret = __env_remove_env(env)) != 0 ||
+ (ret = __rep_reset_init(env)) != 0 ||
#endif
- (ret = __env_refresh(dbenv, orig_flags, 0)) != 0)
+ (ret = __env_remove_env(env)) != 0 ||
+ (ret = __env_refresh(dbenv,
+ orig_flags | retry_flags, 0)) != 0)
goto err;
- if ((ret = __env_attach_regions(dbenv, flags, orig_flags, 1)) != 0)
+ /* Restore the database key. */
+ if (LF_ISSET(DB_REGISTER) && old_passwd != NULL) {
+ ret = __env_set_encrypt(dbenv, old_passwd, old_encrypt_flags);
+ memset(old_passwd, 0xff, old_passwd_len - 1);
+ __os_free(env, old_passwd);
+ if (ret != 0)
+ goto err;
+ }
+
+ DB_ASSERT(env, ret == 0);
+ if ((ret = __env_attach_regions(dbenv,
+ flags, orig_flags | retry_flags, 1)) != 0)
goto err;
/*
@@ -216,8 +255,18 @@ retry: if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL))
*/
if (LF_ISSET(DB_FAILCHK) && !register_recovery) {
ENV_ENTER(env, ip);
- if ((ret = __env_failchk_int(dbenv)) != 0)
+ /*
+ * Set the thread state so that any waiting for a potentially
+ * dead thread will call is_alive() in order to avoid hanging.
+ */
+ FAILCHK_THREAD(env, ip);
+ ret = __env_failchk_int(dbenv);
+ if (ret != 0) {
+ __db_err(env, ret,
+ DB_STR("1595",
+ "failchk crash after clean registry"));
goto err;
+ }
ENV_LEAVE(env, ip);
}
@@ -230,12 +279,12 @@ err: if (ret != 0)
* processes can now proceed.
*
* If recovery failed, unregister now and let another process
- * clean up.
+ * clean up and run recovery.
*/
if (ret == 0 && (t_ret = __envreg_xunlock(env)) != 0)
ret = t_ret;
if (ret != 0)
- (void)__envreg_unregister(env, 1);
+ (void)__envreg_unregister(env, recovery_failed);
}
/*
@@ -247,7 +296,11 @@ err: if (ret != 0)
*/
if (ret == DB_RUNRECOVERY && !register_recovery &&
!LF_ISSET(DB_RECOVER) && LF_ISSET(DB_REGISTER)) {
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, DB_STR("1596",
+ "env_open DB_REGISTER w/o RECOVER panic: trying w/recovery"));
LF_SET(DB_RECOVER);
+ retry_flags = DB_ENV_NOPANIC;
goto retry;
}
@@ -304,6 +357,9 @@ __env_open_arg(dbenv, flags)
"replication requires transaction support"));
return (EINVAL);
}
+ if ((ret =
+ __log_set_config_int(dbenv, DB_LOG_BLOB, 1, 1)) != 0)
+ return (ret);
}
if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) {
if ((ret = __db_fcchk(env,
@@ -349,30 +405,6 @@ __env_open_arg(dbenv, flags)
}
#endif
-#ifdef HAVE_MUTEX_FCNTL
- /*
- * !!!
- * We need a file descriptor for fcntl(2) locking. We use the file
- * handle from the REGENV file for this purpose.
- *
- * Since we may be using shared memory regions, e.g., shmget(2), and
- * not a mapped-in regular file, the backing file may be only a few
- * bytes in length. So, this depends on the ability to call fcntl to
- * lock file offsets much larger than the actual physical file. I
- * think that's safe -- besides, very few systems actually need this
- * kind of support, SunOS is the only one still in wide use of which
- * I'm aware.
- *
- * The error case is if an application lacks spinlocks and wants to be
- * threaded. That doesn't work because fcntl will lock the underlying
- * process, including all its threads.
- */
- if (F_ISSET(env, ENV_THREAD)) {
- __db_errx(env, DB_STR("1578",
- "architecture lacks fast mutexes: applications cannot be threaded"));
- return (EINVAL);
- }
-#endif
return (ret);
}
@@ -506,7 +538,7 @@ __env_close_pp(dbenv, flags)
{
DB_THREAD_INFO *ip;
ENV *env;
- int rep_check, ret, t_ret;
+ int ret, t_ret;
u_int32_t close_flags, flags_orig;
env = dbenv->env;
@@ -517,65 +549,75 @@ __env_close_pp(dbenv, flags)
* Validate arguments, but as a DB_ENV handle destructor, we can't
* fail.
*/
- if (flags != 0 && flags != DB_FORCESYNC &&
- (t_ret = __db_ferr(env, "DB_ENV->close", 0)) != 0 && ret == 0)
- ret = t_ret;
+#undef OKFLAGS
+#define OKFLAGS (DB_FORCESYNC | DB_FORCESYNCENV)
+
+ ret = __db_fchk(env, "DB_ENV->close", flags, OKFLAGS);
#define DBENV_FORCESYNC 0x00000001
#define DBENV_CLOSE_REPCHECK 0x00000010
- if (flags == DB_FORCESYNC)
+ if (LF_ISSET(DB_FORCESYNC))
close_flags |= DBENV_FORCESYNC;
+ if (LF_ISSET(DB_FORCESYNCENV))
+ F_SET(env, ENV_FORCESYNCENV);
+
+ /*
+ * Call __env_close() to clean up resources even though the open
+ * didn't fully succeed.
+ * */
+ if (!F_ISSET(env, ENV_OPEN_CALLED))
+ goto do_close;
/*
* If the environment has panic'd, all we do is try and discard
* the important resources.
*/
if (PANIC_ISSET(env)) {
+ /*
+ * Temporarily set no panic so we do not trigger the
+ * LAST_PANIC_CHECK_BEFORE_IO check in __os_physwrite thus
+ * allowing the unregister to happen correctly.
+ */
+ flags_orig = dbenv->flags;
+ F_SET(dbenv, DB_ENV_NOPANIC);
+ ENV_ENTER(env, ip);
/* clean up from registry file */
- if (dbenv->registry != NULL) {
- /*
- * Temporarily set no panic so we do not trigger the
- * LAST_PANIC_CHECK_BEFORE_IO check in __os_physwr
- * thus allowing the unregister to happen correctly.
- */
- flags_orig = F_ISSET(dbenv, DB_ENV_NOPANIC);
- F_SET(dbenv, DB_ENV_NOPANIC);
+ if (dbenv->registry != NULL)
(void)__envreg_unregister(env, 0);
- dbenv->registry = NULL;
- if (!flags_orig)
- F_CLR(dbenv, DB_ENV_NOPANIC);
- }
/* Close all underlying threads and sockets. */
- if (IS_ENV_REPLICATED(env))
- (void)__repmgr_close(env);
+ (void)__repmgr_close(env);
/* Close all underlying file handles. */
(void)__file_handle_cleanup(env);
+ ENV_LEAVE(env, ip);
+
+ dbenv->flags = flags_orig;
+ (void)__env_region_cleanup(env);
- PANIC_CHECK(env);
+ return (__env_panic_msg(env));
}
ENV_ENTER(env, ip);
- rep_check = IS_ENV_REPLICATED(env) ? 1 : 0;
- if (rep_check) {
#ifdef HAVE_REPLICATION_THREADS
- /*
- * Shut down Replication Manager threads first of all. This
- * must be done before __env_rep_enter to avoid a deadlock that
- * could occur if repmgr's background threads try to do a rep
- * operation that needs __rep_lockout.
- */
- if ((t_ret = __repmgr_close(env)) != 0 && ret == 0)
- ret = t_ret;
+ /*
+ * Shut down Replication Manager threads first of all. This
+ * must be done before __env_rep_enter to avoid a deadlock that
+ * could occur if repmgr's background threads try to do a rep
+ * operation that needs __rep_lockout.
+ */
+ if ((t_ret = __repmgr_close(env)) != 0 && ret == 0)
+ ret = t_ret;
#endif
+ if (IS_ENV_REPLICATED(env)) {
if ((t_ret = __env_rep_enter(env, 0)) != 0 && ret == 0)
ret = t_ret;
+ if (ret == 0)
+ close_flags |= DBENV_CLOSE_REPCHECK;
}
- if (rep_check)
- close_flags |= DBENV_CLOSE_REPCHECK;
+do_close:
if ((t_ret = __env_close(dbenv, close_flags)) != 0 && ret == 0)
ret = t_ret;
@@ -640,8 +682,11 @@ __env_close(dbenv, flags)
t_ret = dbp->alt_close(dbp, close_flags);
else
t_ret = __db_close(dbp, NULL, close_flags);
- if (t_ret != 0 && ret == 0)
- ret = t_ret;
+ if (t_ret != 0) {
+ if (ret == 0)
+ ret = t_ret;
+ break;
+ }
}
/*
@@ -661,10 +706,8 @@ __env_close(dbenv, flags)
#endif
/* If we're registered, clean up. */
- if (dbenv->registry != NULL) {
+ if (dbenv->registry != NULL)
(void)__envreg_unregister(env, 0);
- dbenv->registry = NULL;
- }
/* Check we've closed all underlying file handles. */
if ((t_ret = __file_handle_cleanup(env)) != 0 && ret == 0)
@@ -680,6 +723,9 @@ __env_close(dbenv, flags)
if (dbenv->db_md_dir != NULL)
__os_free(env, dbenv->db_md_dir);
dbenv->db_md_dir = NULL;
+ if (dbenv->db_blob_dir != NULL)
+ __os_free(env, dbenv->db_blob_dir);
+ dbenv->db_blob_dir = NULL;
if (dbenv->db_data_dir != NULL) {
for (p = dbenv->db_data_dir; *p != NULL; ++p)
__os_free(env, *p);
@@ -761,9 +807,7 @@ __env_refresh(dbenv, orig_flags, rep_check)
ret = t_ret;
}
- /* Discard the DB_ENV, ENV handle mutexes. */
- if ((t_ret = __mutex_free(env, &dbenv->mtx_db_env)) != 0 && ret == 0)
- ret = t_ret;
+ /* Discard the ENV handle mutex. */
if ((t_ret = __mutex_free(env, &env->mtx_env)) != 0 && ret == 0)
ret = t_ret;
@@ -936,17 +980,38 @@ __file_handle_cleanup(env)
ENV *env;
{
DB_FH *fhp;
+ DB_MPOOL *dbmp;
+ u_int i;
- if (TAILQ_FIRST(&env->fdlist) == NULL)
+ if (TAILQ_EMPTY(&env->fdlist))
return (0);
- __db_errx(env, DB_STR("1581",
- "File handles still open at environment close"));
+ __db_errx(env,
+ DB_STR("1581", "File handles still open at environment close"));
while ((fhp = TAILQ_FIRST(&env->fdlist)) != NULL) {
- __db_errx(env, DB_STR_A("1582", "Open file handle: %s", "%s"),
- fhp->name);
- (void)__os_closehandle(env, fhp);
+ __db_errx(env,
+ DB_STR_A("1582", "Open file handle: %s", "%s"), fhp->name);
+ if (__os_closehandle(env, fhp) != 0)
+ break;
}
+ if (env->lockfhp != NULL)
+ env->lockfhp = NULL;
+ /* Invalidate saved pointers to the regions' files: all are closed. */
+ if (env->reginfo != NULL)
+ env->reginfo->fhp = NULL;
+ if (env->lg_handle != NULL)
+ env->lg_handle->reginfo.fhp = NULL;
+ if (env->lk_handle != NULL)
+ env->lk_handle->reginfo.fhp = NULL;
+#ifdef HAVE_MUTEX_SUPPORT
+ if (env->mutex_handle != NULL)
+ env->mutex_handle->reginfo.fhp = NULL;
+#endif
+ if (env->tx_handle != NULL)
+ env->tx_handle->reginfo.fhp = NULL;
+ if ((dbmp = env->mp_handle) != NULL && dbmp->reginfo != NULL)
+ for (i = 0; i < env->dbenv->mp_ncache; ++i)
+ dbmp->reginfo[i].fhp = NULL;
return (EINVAL);
}
@@ -1109,11 +1174,9 @@ __env_attach_regions(dbenv, flags, orig_flags, retry_ok)
goto err;
/*
- * Initialize the handle mutexes.
+ * Initialize the handle mutex.
*/
if ((ret = __mutex_alloc(env,
- MTX_ENV_HANDLE, DB_MUTEX_PROCESS_ONLY, &dbenv->mtx_db_env)) != 0 ||
- (ret = __mutex_alloc(env,
MTX_ENV_HANDLE, DB_MUTEX_PROCESS_ONLY, &env->mtx_env)) != 0)
goto err;
@@ -1125,8 +1188,15 @@ __env_attach_regions(dbenv, flags, orig_flags, retry_ok)
goto err;
rep_check = IS_ENV_REPLICATED(env) ? 1 : 0;
- if (rep_check && (ret = __env_rep_enter(env, 0)) != 0)
+ if (rep_check && (ret = __env_rep_enter(env, 0)) != 0) {
+ /*
+ * If we get an error we didn't increment handle_cnt,
+ * so we don't want to decrement it later. Turn off
+ * rep_check here.
+ */
+ rep_check = 0;
goto err;
+ }
if (LF_ISSET(DB_INIT_MPOOL)) {
if ((ret = __memp_open(env, create_ok)) != 0)
diff --git a/src/env/env_recover.c b/src/env/env_recover.c
index 9636554a..fb7ddee7 100644
--- a/src/env/env_recover.c
+++ b/src/env/env_recover.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -18,17 +18,15 @@
#include "dbinc/qam.h"
#include "dbinc/txn.h"
-#ifndef lint
-static const char copyright[] =
- "Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.\n";
-#endif
-
static int __db_log_corrupt __P((ENV *, DB_LSN *));
static int __env_init_rec_42 __P((ENV *));
static int __env_init_rec_43 __P((ENV *));
static int __env_init_rec_46 __P((ENV *));
static int __env_init_rec_47 __P((ENV *));
static int __env_init_rec_48 __P((ENV *));
+static int __env_init_rec_53 __P((ENV *));
+static int __env_init_rec_60 __P((ENV *));
+static int __env_init_rec_60p1 __P((ENV *));
static int __log_earliest __P((ENV *, DB_LOGC *, int32_t *, DB_LSN *));
static double __lsn_diff __P((DB_LSN *, DB_LSN *, DB_LSN *, u_int32_t, int));
@@ -632,6 +630,12 @@ err: if (logc != NULL && (t_ret = __logc_close(logc)) != 0 && ret == 0)
dbenv->tx_timestamp = 0;
+ /*
+ * Failure means that the env has panicked. Disable locking so that the
+ * env can close without its mutexes calls causing additional panics.
+ */
+ if (ret != 0)
+ F_SET(env->dbenv, DB_ENV_NOLOCKING);
F_CLR(env->lg_handle, DBLOG_RECOVER);
F_CLR(region, TXN_IN_RECOVERY);
@@ -690,7 +694,8 @@ __lsn_diff(low, high, current, max, is_forward)
* is trying to sync up with a master whose max LSN is less than this
* client's max lsn; we want to roll back everything after that.
*
- * Find the latest checkpoint whose ckp_lsn is less than the max lsn.
+ * Find the latest checkpoint less than or equal to max lsn and
+ * return the ckp_lsn from that checkpoint.
*/
static int
__log_backup(env, logc, max_lsn, start_lsn)
@@ -713,10 +718,11 @@ __log_backup(env, logc, max_lsn, start_lsn)
return (ret);
/*
* Follow checkpoints through the log until
- * we find one with a ckp_lsn less than
- * or equal max_lsn.
+ * we find one less than or equal max_lsn.
+ * Then return the ckp_lsn from that checkpoint as it
+ * is our earliest outstanding txn needed.
*/
- if (LOG_COMPARE(&ckp_args->ckp_lsn, max_lsn) <= 0) {
+ if (LOG_COMPARE(&lsn, max_lsn) <= 0) {
*start_lsn = ckp_args->ckp_lsn;
break;
}
@@ -727,7 +733,7 @@ __log_backup(env, logc, max_lsn, start_lsn)
* done. Break with DB_NOTFOUND.
*/
if (IS_ZERO_LSN(lsn)) {
- ret = DB_NOTFOUND;
+ ret = USR_ERR(env, DB_NOTFOUND);
break;
}
__os_free(env, ckp_args);
@@ -880,6 +886,9 @@ __db_log_corrupt(env, lsnp)
/*
* __env_init_rec --
*
+ * Install recover functions in the environment. Whenever this is updated,
+ * corresponding changes are needed by db_printlog's env_init_print().
+ *
* PUBLIC: int __env_init_rec __P((ENV *, u_int32_t));
*/
int
@@ -924,6 +933,29 @@ __env_init_rec(env, version)
* oldest revision that applies must be used. Therefore we override
* the recovery functions in reverse log version order.
*/
+ if (version == DB_LOGVERSION)
+ goto done;
+
+ /* DB_LOGVERSION_61 add the blob file id to the dbreg logs. */
+ if (version > DB_LOGVERSION_60p1)
+ goto done;
+ if ((ret = __env_init_rec_60p1(env)) != 0)
+ goto err;
+
+ /*
+ * DB_LOGVERSION_60p1 changed the two u_int32_t offset fields in the
+ * log for fop_write_file into a single int64.
+ */
+ if (version > DB_LOGVERSION_60)
+ goto done;
+ if ((ret = __env_init_rec_60(env)) != 0)
+ goto err;
+
+ /* DB_LOGVERSION_53 changed the heap addrem log record. */
+ if (version > DB_LOGVERSION_53)
+ goto done;
+ if ((ret = __env_init_rec_53(env)) != 0)
+ goto err;
/*
* DB_LOGVERSION_53 is a strict superset of DB_LOGVERSION_50.
* So, only check > DB_LOGVERSION_48p2. If/When log records are
@@ -931,6 +963,8 @@ __env_init_rec(env, version)
*/
if (version > DB_LOGVERSION_48p2)
goto done;
+ if (version >= DB_LOGVERSION_50)
+ goto done;
if ((ret = __env_init_rec_48(env)) != 0)
goto err;
/*
@@ -1091,3 +1125,77 @@ __env_init_rec_48(env)
err:
return (ret);
}
+
+static int
+__env_init_rec_53(env)
+ ENV *env;
+{
+ int ret;
+
+#ifdef HAVE_HEAP
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __heap_addrem_50_recover, DB___heap_addrem_50)) != 0)
+ goto err;
+#else
+ COMPQUIET(env, NULL);
+ COMPQUIET(ret, 0);
+ goto err;
+#endif
+err:
+ return (ret);
+}
+
+static int
+__env_init_rec_60(env)
+ ENV *env;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __fop_create_60_recover, DB___fop_create_60)) != 0)
+ goto err;
+
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __fop_remove_60_recover, DB___fop_remove_60)) != 0)
+ goto err;
+
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __fop_rename_60_recover, DB___fop_rename_60)) != 0)
+ goto err;
+
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __fop_rename_noundo_60_recover, DB___fop_rename_noundo_60)) != 0)
+ goto err;
+
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __fop_file_remove_60_recover, DB___fop_file_remove_60)) != 0)
+ goto err;
+
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __fop_write_60_recover, DB___fop_write_60)) != 0)
+ goto err;
+
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __fop_write_file_60_recover, DB___fop_write_file_60)) != 0)
+ goto err;
+err:
+ return (ret);
+}
+
+static int
+__env_init_rec_60p1(env)
+ ENV *env;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __dbreg_register_42_recover, DB___dbreg_register_42)) != 0)
+ goto err;
+#ifdef HAVE_HEAP
+ if ((ret = __db_add_recovery_int(env, &env->recover_dtab,
+ __heap_addrem_60_recover, DB___heap_addrem_60)) != 0)
+ goto err;
+#endif
+err:
+ return (ret);
+}
diff --git a/src/env/env_region.c b/src/env/env_region.c
index 113bea21..cf7085b7 100644
--- a/src/env/env_region.c
+++ b/src/env/env_region.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -90,8 +90,11 @@ loop: renv = NULL;
* it's actually a creation or not, and we'll have to fall-back to a
* join if it's not a create.
*/
- if (F_ISSET(env, ENV_PRIVATE) || DB_GLOBAL(j_region_map) != NULL)
+ if (F_ISSET(env, ENV_PRIVATE) || DB_GLOBAL(j_region_map) != NULL) {
+ DB_DEBUG_MSG(env, "env_attach: creating %s",
+ F_ISSET(env, ENV_PRIVATE) ? "private" : "user map func");
goto creation;
+ }
/*
* Try to create the file, if we have the authority. We have to ensure
@@ -179,14 +182,15 @@ loop: renv = NULL;
* something in the region file other than meta-data and that
* shouldn't happen.
*/
- if (size < sizeof(ref))
+ if (size < sizeof(ref)) {
+ DB_DEBUG_MSG(env, "region size %d is too small", (int)size);
goto retry;
- else {
+ } else {
if (size == sizeof(ref))
F_SET(env, ENV_SYSTEM_MEM);
else if (F_ISSET(env, ENV_SYSTEM_MEM)) {
- ret = EINVAL;
+ ret = USR_ERR(env, EINVAL);
__db_err(env, ret, DB_STR_A("1535",
"%s: existing environment not created in system memory",
"%s"), infop->name);
@@ -197,6 +201,7 @@ loop: renv = NULL;
nrw < (size_t)sizeof(rbuf) ||
(ret = __os_seek(env,
env->lockfhp, 0, 0, rbuf.region_off)) != 0) {
+ ret = USR_ERR(env, ret);
__db_err(env, ret, DB_STR_A("1536",
"%s: unable to read region info", "%s"),
infop->name);
@@ -207,7 +212,8 @@ loop: renv = NULL;
if ((ret = __os_read(env, env->lockfhp, &ref,
sizeof(ref), &nrw)) != 0 || nrw < (size_t)sizeof(ref)) {
if (ret == 0)
- ret = EIO;
+ ret = USR_ERR(env, EIO);
+ (void)USR_ERR(env, ret);
__db_err(env, ret, DB_STR_A("1537",
"%s: unable to read system-memory information",
"%s"), infop->name);
@@ -218,18 +224,16 @@ loop: renv = NULL;
segid = ref.segid;
}
-#ifndef HAVE_MUTEX_FCNTL
/*
- * If we're not doing fcntl locking, we can close the file handle. We
- * no longer need it and the less contact between the buffer cache and
- * the VM, the better.
+ * We no longer need the file handle; the less contact between the
+ * buffer cache and the VM, the better.
*/
(void)__os_closehandle(env, env->lockfhp);
env->lockfhp = NULL;
-#endif
/* Call the region join routine to acquire the region. */
memset(&tregion, 0, sizeof(tregion));
+ tregion.type = REGION_TYPE_ENV;
tregion.size = (roff_t)size;
tregion.max = (roff_t)max;
tregion.segid = segid;
@@ -257,15 +261,15 @@ user_map_functions:
"Program version %d.%d doesn't match environment version %d.%d",
"%d %d %d %d"), DB_VERSION_MAJOR, DB_VERSION_MINOR,
renv->majver, renv->minver);
- ret = DB_VERSION_MISMATCH;
+ ret = USR_ERR(env, DB_VERSION_MISMATCH);
} else
- ret = EINVAL;
+ ret = USR_ERR(env, EINVAL);
goto err;
}
if (renv->signature != signature) {
__db_errx(env, DB_STR("1539",
"Build signature doesn't match environment"));
- ret = DB_VERSION_MISMATCH;
+ ret = USR_ERR(env, DB_VERSION_MISMATCH);
goto err;
}
@@ -287,8 +291,16 @@ user_map_functions:
ret = __env_panic_msg(env);
goto err;
}
- if (renv->magic != DB_REGION_MAGIC)
+ if (renv->magic != DB_REGION_MAGIC) {
+ DB_DEBUG_MSG(env,
+ "attach sees bad region magic 0x%lx", (u_long)renv->magic);
goto retry;
+ }
+
+ if (dbenv->blob_threshold != 0 &&
+ renv->blob_threshold != dbenv->blob_threshold)
+ __db_msg(env, DB_STR("1591",
+"Warning: Ignoring blob_threshold size when joining environment"));
/*
* Get a reference to the underlying REGION information for this
@@ -329,7 +341,7 @@ user_map_functions:
if (*init_flagsp != 0) {
__db_errx(env, DB_STR("1540",
"configured environment flags incompatible with existing environment"));
- ret = EINVAL;
+ ret = USR_ERR(env, EINVAL);
goto err;
}
*init_flagsp = renv->init_flags;
@@ -437,6 +449,8 @@ creation:
renv->minver = (u_int32_t)minver;
renv->patchver = (u_int32_t)patchver;
renv->signature = signature;
+ renv->failure_panic = 0;
+ renv->failure_symptom[0] = '\0';
(void)time(&renv->timestamp);
__os_unique_id(env, &renv->envid);
@@ -447,6 +461,8 @@ creation:
*/
renv->init_flags = (init_flagsp == NULL) ? 0 : *init_flagsp;
+ renv->blob_threshold = dbenv->blob_threshold;
+
/*
* Set up the region array. We use an array rather than a linked list
* as we have to traverse this list after failure in some cases, and
@@ -513,17 +529,14 @@ find_err: __db_errx(env, DB_STR_A("1544",
}
}
-#ifndef HAVE_MUTEX_FCNTL
/*
- * If we're not doing fcntl locking, we can close the file handle. We
- * no longer need it and the less contact between the buffer cache and
- * the VM, the better.
+ * We no longer need the file handle and the less contact between the
+ * buffer cache and the VM, the better.
*/
if (env->lockfhp != NULL) {
(void)__os_closehandle(env, env->lockfhp);
env->lockfhp = NULL;
}
-#endif
/* Everything looks good, we're done. */
env->reginfo = infop;
@@ -550,7 +563,7 @@ retry: /* Close any open file handle. */
(void)__env_sys_detach(env,
infop, F_ISSET(infop, REGION_CREATE));
- if (rp != NULL && F_ISSET(env, DB_PRIVATE))
+ if (rp != NULL && F_ISSET(env, ENV_PRIVATE))
__env_alloc_free(infop, rp);
}
@@ -674,8 +687,23 @@ __env_panic_set(env, on)
ENV *env;
int on;
{
- if (env != NULL && env->reginfo != NULL)
- ((REGENV *)env->reginfo->primary)->panic = on ? 1 : 0;
+ REGENV *renv;
+
+ if (env != NULL && env->reginfo != NULL) {
+ /*
+ * Remember it in the process' env as well, so that the
+ * panic-ness is still known on exit from the final close.
+ */
+ renv = env->reginfo->primary;
+ if (on) {
+ F_SET(env, ENV_REMEMBER_PANIC);
+ if (F_ISSET(env->dbenv, DB_ENV_FAILCHK))
+ renv->failure_panic = 1;
+ }
+ else
+ F_CLR(env, ENV_REMEMBER_PANIC);
+ renv->panic = on ? 1 : 0;
+ }
}
/*
@@ -775,6 +803,31 @@ __env_ref_get(dbenv, countp)
}
/*
+ * __env_region_cleanup --
+ * Detach from any regions, e.g., when closing after a panic.
+ *
+ * PUBLIC: int __env_region_cleanup __P((ENV *));
+ */
+int
+__env_region_cleanup(env)
+ ENV *env;
+{
+ if (env->reginfo != NULL) {
+#ifdef HAVE_MUTEX_SUPPORT
+ (void)__lock_region_detach(env, env->lk_handle);
+ (void)__mutex_region_detach(env, env->mutex_handle);
+#endif
+ (void)__log_region_detach(env, env->lg_handle);
+ (void)__memp_region_detach(env, env->mp_handle);
+ (void)__txn_region_detach(env, env->tx_handle);
+ (void)__env_detach(env, 0);
+ /* Remember the panic state after detaching. */
+ F_SET(env, ENV_REMEMBER_PANIC);
+ }
+ return (0);
+}
+
+/*
* __env_detach --
* Detach from the environment.
*
@@ -796,9 +849,7 @@ __env_detach(env, destroy)
/* Close the locking file handle. */
if (env->lockfhp != NULL) {
- if ((t_ret =
- __os_closehandle(env, env->lockfhp)) != 0 && ret == 0)
- ret = t_ret;
+ ret = __os_closehandle(env, env->lockfhp);
env->lockfhp = NULL;
}
@@ -1249,13 +1300,13 @@ __env_sys_attach(env, infop, rp)
__db_errx(env, DB_STR_A("1548",
"region size %lu is too large; maximum is %lu", "%lu %lu"),
(u_long)rp->size, (u_long)DB_REGIONSIZE_MAX);
- return (EINVAL);
+ return (USR_ERR(env, EINVAL));
}
if (rp->max > DB_REGIONSIZE_MAX) {
__db_errx(env, DB_STR_A("1549",
"region max %lu is too large; maximum is %lu", "%lu %lu"),
(u_long)rp->max, (u_long)DB_REGIONSIZE_MAX);
- return (EINVAL);
+ return (USR_ERR(env, EINVAL));
}
#endif
@@ -1281,7 +1332,7 @@ __env_sys_attach(env, infop, rp)
"architecture does not support locks inside process-local (malloc) memory"));
__db_errx(env, DB_STR("1551",
"application may not specify both DB_PRIVATE and DB_THREAD"));
- return (EINVAL);
+ return (USR_ERR(env, EINVAL));
}
#endif
if ((ret = __os_malloc(
@@ -1310,7 +1361,7 @@ __env_sys_attach(env, infop, rp)
"region memory was not correctly aligned"));
(void)__env_sys_detach(env, infop,
F_ISSET(infop, REGION_CREATE));
- return (EINVAL);
+ return (USR_ERR(env, EINVAL));
}
return (0);
@@ -1402,7 +1453,7 @@ __env_des_get(env, env_infop, infop, rpp)
* the region, fail. The caller generates any error message.
*/
if (!F_ISSET(infop, REGION_CREATE_OK))
- return (ENOENT);
+ return (USR_ERR(env, ENOENT));
/*
* If we didn't find a region and don't have room to create the region
@@ -1411,7 +1462,7 @@ __env_des_get(env, env_infop, infop, rpp)
if (empty_slot == NULL) {
__db_errx(env, DB_STR("1553",
"no room remaining for additional REGIONs"));
- return (ENOENT);
+ return (USR_ERR(env, ENOENT));
}
/*
diff --git a/src/env/env_register.c b/src/env/env_register.c
index 7475444d..731ddd1f 100644
--- a/src/env/env_register.c
+++ b/src/env/env_register.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2004, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2004, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -30,6 +30,7 @@
static int __envreg_add __P((ENV *, int *, u_int32_t));
static int __envreg_pid_compare __P((const void *, const void *));
static int __envreg_create_active_pid __P((ENV *, char *));
+static int __envreg_add_active_pid __P((ENV*, char *));
/*
* Support for portable, multi-process database environment locking, based on
@@ -137,7 +138,7 @@ __envreg_register(env, need_recoveryp, flags)
if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
__db_msg(env, DB_STR_A("1524",
- "%lu: register environment", "%lu"), (u_long)pid);
+ "%lu: register environment", "%lu"), (u_long)pid);
/* Build the path name and open the registry file. */
if ((ret = __db_appname(env,
@@ -176,7 +177,6 @@ __envreg_register(env, need_recoveryp, flags)
/* Register this process. */
if ((ret = __envreg_add(env, need_recoveryp, flags)) != 0)
goto err;
-
/*
* Release our exclusive lock if we don't need to run recovery. If
* we need to run recovery, ENV->open will call back into register
@@ -186,8 +186,7 @@ __envreg_register(env, need_recoveryp, flags)
goto err;
if (0) {
-err: *need_recoveryp = 0;
-
+err:
/*
* !!!
* Closing the file handle must release all of our locks.
@@ -196,7 +195,6 @@ err: *need_recoveryp = 0;
(void)__os_closehandle(env, dbenv->registry);
dbenv->registry = NULL;
}
-
if (pp != NULL)
__os_free(env, pp);
@@ -222,11 +220,11 @@ __envreg_add(env, need_recoveryp, flags)
size_t nr, nw;
u_int lcnt;
u_int32_t bytes, mbytes, orig_flags;
- int need_recovery, ret, t_ret;
+ int need_failchk, ret, t_ret;
char *p, buf[PID_LEN + 10], pid_buf[PID_LEN + 10];
dbenv = env->dbenv;
- need_recovery = 0;
+ need_failchk = t_ret = 0;
COMPQUIET(dead, 0);
COMPQUIET(p, NULL);
ip = NULL;
@@ -269,7 +267,7 @@ kill_all: /*
* registering.
*/
if (nr != PID_LEN) {
- need_recovery = 1;
+ need_failchk = 1;
break;
}
@@ -299,7 +297,7 @@ kill_all: /*
}
#if DB_ENVREG_KILL_ALL
- if (need_recovery) {
+ if (need_failchk) {
pid = (pid_t)strtoul(buf, NULL, 10);
(void)kill(pid, SIGKILL);
@@ -318,7 +316,7 @@ kill_all: /*
__db_msg(env, DB_STR_A("1530",
"%02u: %s: FAILED", "%02u %s"), lcnt, p);
- need_recovery = 1;
+ need_failchk = 1;
dead = pos;
#if DB_ENVREG_KILL_ALL
goto kill_all;
@@ -331,16 +329,27 @@ kill_all: /*
"%02u: %s: LOCKED", "%02u %s"), lcnt, p);
}
+ /* Check for a panic; if so there's no need to call failchk. */
+ if (__env_attach(env, NULL, 0, 0) != 0)
+ goto sig_proc;
+ infop = env->reginfo;
+ renv = infop->primary;
+ *need_recoveryp = renv->panic != 0;
+ (void)__env_detach(env, 0);
+ if (*need_recoveryp)
+ return (0);
+
/*
- * If we have to perform recovery...
+ * If we have to perform failchk...
*
* Mark all slots empty. Registry ignores empty slots we can't lock,
* so it doesn't matter if any of the processes are in the middle of
* exiting Berkeley DB -- they'll discard their lock when they exit.
*/
- if (need_recovery) {
+ if (need_failchk) {
if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
- __db_msg(env, "%lu: recovery required", (u_long)pid);
+ __db_msg(env,
+ "%lu: failchk recovery required", (u_long)pid);
if (LF_ISSET(DB_FAILCHK) || LF_ISSET(DB_FAILCHK_ISALIVE)) {
if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
@@ -352,13 +361,14 @@ kill_all: /*
env, pid_buf)) != 0)
goto sig_proc;
- /* The environment will already exist, so we do not
+ /*
+ * The environment will already exist, so we do not
* want DB_CREATE set, nor do we want any recovery at
* this point. No need to put values back as flags is
* passed in by value. Save original dbenv flags in
* case we need to recover/remove existing environment.
* Set DB_ENV_FAILCHK before attach to help ensure we
- * dont block on a mutex held by the dead process.
+ * don't block on a mutex held by the dead process.
*/
LF_CLR(DB_CREATE | DB_RECOVER | DB_RECOVER_FATAL);
orig_flags = dbenv->flags;
@@ -367,44 +377,53 @@ kill_all: /*
if ((ret = __env_attach_regions(
dbenv, flags, orig_flags, 0)) != 0)
goto sig_proc;
- if ((t_ret =
- __env_set_state(env, &ip, THREAD_FAILCHK)) != 0 &&
- ret == 0)
+ if ((t_ret = __env_set_state(env,
+ &ip, THREAD_FAILCHK)) != 0 && ret == 0)
ret = t_ret;
- if ((t_ret =
- __env_failchk_int(dbenv)) != 0 && ret == 0)
+ if (ret == 0 && (t_ret = __env_failchk_int(dbenv)) != 0)
ret = t_ret;
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env,
+ "%lu: failchk returned %d, ret is %d",
+ (u_long)pid, t_ret, ret);
/* Free active pid array if used. */
if (LF_ISSET(DB_FAILCHK_ISALIVE)) {
- DB_GLOBAL(num_active_pids) = 0;
- DB_GLOBAL(size_active_pids) = 0;
- __os_free( env, DB_GLOBAL(active_pids));
+ env->num_active_pids = 0;
+ env->size_active_pids = 0;
+ __os_free(env, env->active_pids);
+ env->active_pids = NULL;
}
/* Detach from environment and deregister thread. */
- if ((t_ret =
- __env_refresh(dbenv, orig_flags, 0)) != 0 &&
- ret == 0)
+ if ((t_ret = __env_refresh(dbenv,
+ orig_flags, 0)) != 0 && ret == 0)
ret = t_ret;
+ F_CLR(env, ENV_OPEN_CALLED);
+
if (ret == 0) {
if ((ret = __os_seek(env, dbenv->registry,
- 0, 0,(u_int32_t)dead)) != 0 ||
+ 0, 0, (u_int32_t)dead)) != 0 ||
(ret = __os_write(env, dbenv->registry,
PID_EMPTY, PID_LEN, &nw)) != 0)
return (ret);
- need_recovery = 0;
+ need_failchk = 0;
goto add;
}
}
/* If we can't attach, then we cannot set DB_REGISTER panic. */
-sig_proc: if (__env_attach(env, NULL, 0, 0) == 0) {
+sig_proc:
+ if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER))
+ __db_msg(env, "%lu: sig_proc attaching errs %s/ret %s",
+ (u_long)pid, db_strerror(t_ret), db_strerror(ret));
+ if (__env_attach(env, NULL, 0, 0) == 0) {
infop = env->reginfo;
renv = infop->primary;
- /* Indicate DB_REGSITER panic. Also, set environment
- * panic as this is the panic trigger mechanism in
- * the code that everything looks for.
+ /*
+ * Indicate DB_REGISTER panic. Also, set (or re-set)
+ * environment panic as this is the panic trigger
+ * mechanism in the code that everything looks for.
*/
renv->reg_panic = 1;
renv->panic = 1;
@@ -484,7 +503,7 @@ add: if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0)
}
}
- if (need_recovery)
+ if (need_failchk)
*need_recoveryp = 1;
return (ret);
@@ -543,8 +562,9 @@ __envreg_unregister(env, recovery_failed)
* also releasing our slot lock, we could race. That can't happen, I
* don't think.
*/
-err: if ((t_ret =
- __os_closehandle(env, dbenv->registry)) != 0 && ret == 0)
+err:
+ if (dbenv->registry != NULL &&
+ (t_ret = __os_closehandle(env, dbenv->registry)) != 0 && ret == 0)
ret = t_ret;
dbenv->registry = NULL;
@@ -610,6 +630,10 @@ __envreg_isalive(dbenv, pid, tid, flags )
db_threadid_t tid;
u_int32_t flags;
{
+ ENV *env;
+
+ env = dbenv->env;
+
/* in this case we really do not care about tid, simply for lint */
DB_THREADID_INIT(tid);
@@ -617,15 +641,14 @@ __envreg_isalive(dbenv, pid, tid, flags )
if (!((flags == 0) || (flags == DB_MUTEX_PROCESS_ONLY)))
return (EINVAL);
- if (DB_GLOBAL(active_pids) == NULL ||
- DB_GLOBAL(num_active_pids) == 0 || dbenv == NULL)
+ if (env->active_pids == NULL || env->num_active_pids == 0)
return (0);
/*
* bsearch returns a pointer to an entry in active_pids if a match
* is found on pid, else no match found it returns NULL. This
* routine will return a 1 if a match is found, else a 0.
*/
- if (bsearch(&pid, DB_GLOBAL(active_pids), DB_GLOBAL(num_active_pids),
+ if (bsearch(&pid, env->active_pids, env->num_active_pids,
sizeof(pid_t), __envreg_pid_compare))
return 1;
@@ -635,7 +658,8 @@ __envreg_isalive(dbenv, pid, tid, flags )
/*
* __envreg_create_active_pid --
* Create array of pids, if need more room in array then double size.
- * Only add active pids from DB_REGISTER file into array.
+ * Only add active pids from DB_REGISTER file into array. The given
+ * active my_pid is also added into array.
*/
static int
__envreg_create_active_pid(env, my_pid)
@@ -646,8 +670,7 @@ __envreg_create_active_pid(env, my_pid)
char buf[PID_LEN + 10];
int ret;
off_t pos;
- pid_t pid, *tmparray;
- size_t tmpsize, nr;
+ size_t nr;
u_int lcnt;
dbenv = env->dbenv;
@@ -655,6 +678,15 @@ __envreg_create_active_pid(env, my_pid)
ret = 0;
/*
+ * The process getting here has not been added to the DB_REGISTER
+ * file yet, so include it as the first item in array
+ */
+ if (env->num_active_pids == 0) {
+ if ((ret = __envreg_add_active_pid(env, my_pid)) != 0)
+ return (ret);
+ }
+
+ /*
* Walk through DB_REGISTER file, we grab pid entries that are locked
* as those represent processes that are still alive. Ignore empty
* slots, or those that are unlocked.
@@ -678,53 +710,50 @@ __envreg_create_active_pid(env, my_pid)
if ((ret = REGISTRY_UNLOCK(env, pos)) != 0)
return (ret);
} else {
- /* first, check to make sure we have room in arrary */
- if (DB_GLOBAL(num_active_pids) + 1 >
- DB_GLOBAL(size_active_pids)) {
- tmpsize =
- DB_GLOBAL(size_active_pids) * sizeof(pid_t);
-
- /* start with 512, then double if must grow */
- tmpsize = tmpsize>0 ? tmpsize*2 : 512;
- if ((ret = __os_malloc
- (env, tmpsize, &tmparray )) != 0)
- return (ret);
-
- /* if array exists, then copy and free */
- if (DB_GLOBAL(active_pids)) {
- memcpy( tmparray,
- DB_GLOBAL(active_pids),
- DB_GLOBAL(num_active_pids) *
- sizeof(pid_t));
- __os_free( env, DB_GLOBAL(active_pids));
- }
-
- DB_GLOBAL(active_pids) = tmparray;
- DB_GLOBAL(size_active_pids) = tmpsize;
-
- /*
- * The process getting here has not been added
- * to the DB_REGISTER file yet, so include it
- * as the first item in array
- */
- if (DB_GLOBAL(num_active_pids) == 0) {
- pid = (pid_t)strtoul(my_pid, NULL, 10);
- DB_GLOBAL(active_pids)
- [DB_GLOBAL(num_active_pids)++] = pid;
- }
- }
-
- /* insert into array */
- pid = (pid_t)strtoul(buf, NULL, 10);
- DB_GLOBAL(active_pids)
- [DB_GLOBAL(num_active_pids)++] = pid;
-
+ if ((ret = __envreg_add_active_pid(env, buf)) != 0)
+ return (ret);
}
}
/* lets sort the array to allow for binary search in isalive func */
- qsort(DB_GLOBAL(active_pids), DB_GLOBAL(num_active_pids),
+ qsort(env->active_pids, env->num_active_pids,
sizeof(pid_t), __envreg_pid_compare);
return (ret);
}
+
+/*
+ * __envreg_add_active_pid --
+ * Add an active pid into array, if need more room in array then double size.
+ */
+static int
+__envreg_add_active_pid(env, pid)
+ ENV *env;
+ char *pid;
+{
+ int ret;
+ size_t tmpsize;
+
+ ret = 0;
+
+ /* first, check to make sure we have room in arrary */
+ if (env->num_active_pids + 1 >
+ env->size_active_pids) {
+ tmpsize =
+ env->size_active_pids * sizeof(pid_t);
+
+ /* start with 512, then double if must grow */
+ tmpsize = tmpsize > 0 ? tmpsize * 2 : 512;
+ if ((ret = __os_realloc
+ (env, tmpsize, &(env->active_pids) )) != 0)
+ return (ret);
+
+ env->size_active_pids = tmpsize / sizeof(pid_t);
+ }
+
+ /* insert into array */
+ env->active_pids
+ [env->num_active_pids++] = (pid_t)strtoul(pid, NULL, 10);
+
+ return (0);
+}
diff --git a/src/env/env_sig.c b/src/env/env_sig.c
index 6d127f85..57e64228 100644
--- a/src/env/env_sig.c
+++ b/src/env/env_sig.c
@@ -28,9 +28,9 @@
* shared memory.
*/
#ifdef HAVE_MIXED_SIZE_ADDRESSING
-#define __STRUCTURE_COUNT 41
+#define __STRUCTURE_COUNT 48
#else
-#define __STRUCTURE_COUNT (41 + 104)
+#define __STRUCTURE_COUNT (48 + 108)
#endif
/*
@@ -66,7 +66,11 @@ __env_struct_sig()
__ADD(__db_h_stat);
__ADD(__db_heap_stat);
__ADD(__db_qam_stat);
+#ifdef HAVE_MUTEX_SUPPORT
+ __ADD(__mutex_state);
+#endif
__ADD(__db_thread_info);
+ __ADD(__env_thread_info);
__ADD(__db_lockregion);
__ADD(__sh_dbt);
__ADD(__db_lockobj);
@@ -82,6 +86,9 @@ __env_struct_sig()
__ADD(__db_mutexregion);
#endif
#ifdef HAVE_MUTEX_SUPPORT
+ __ADD(__mutex_history);
+#endif
+#ifdef HAVE_MUTEX_SUPPORT
__ADD(__db_mutex_t);
#endif
__ADD(__db_reg_env);
@@ -92,6 +99,10 @@ __env_struct_sig()
#ifndef HAVE_MIXED_SIZE_ADDRESSING
__ADD(__db_dbt);
+#ifdef HAVE_MUTEX_SUPPORT
+ __ADD(__db_event_mutex_died_info);
+#endif
+ __ADD(__db_event_failchk_info);
__ADD(__db_lockreq);
__ADD(__db_log_cursor);
__ADD(__log_rec_spec);
@@ -113,6 +124,7 @@ __env_struct_sig()
__ADD(__cq_fq);
__ADD(__cq_aq);
__ADD(__cq_jq);
+ __ADD(__db_stream);
__ADD(__db_heap_rid);
__ADD(__dbc);
__ADD(__key_range);
@@ -125,7 +137,6 @@ __env_struct_sig()
__ADD(__fn);
__ADD(__db_msgbuf);
__ADD(__pin_list);
- __ADD(__env_thread_info);
__ADD(__flag_map);
__ADD(__db_backup_handle);
__ADD(__env);
diff --git a/src/env/env_stat.c b/src/env/env_stat.c
index 9bc3fe7e..094d0545 100644
--- a/src/env/env_stat.c
+++ b/src/env/env_stat.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -21,11 +21,9 @@ static int __env_print_dbenv_all __P((ENV *, u_int32_t));
static int __env_print_env_all __P((ENV *, u_int32_t));
static int __env_print_fh __P((ENV *));
static int __env_print_stats __P((ENV *, u_int32_t));
-static int __env_print_thread __P((ENV *));
static int __env_stat_print __P((ENV *, u_int32_t));
static char *__env_thread_state_print __P((DB_THREAD_STATE));
-static const char *
- __reg_type __P((reg_type_t));
+static const char * __reg_type __P((reg_type_t));
/*
* __env_stat_print_pp --
@@ -146,7 +144,6 @@ __env_stat_print(env, flags)
/*
* __env_print_stats --
* Display the default environment statistics.
- *
*/
static int
__env_print_stats(env, flags)
@@ -186,6 +183,10 @@ __env_print_stats(env, flags)
(u_long)0, (u_long)0, (u_long)infop->rp->size);
__db_dlbytes(env, "Maximum region size",
(u_long)0, (u_long)0, (u_long)infop->rp->max);
+ STAT_LONG("Process failure detected", renv->failure_panic);
+ if (renv->failure_symptom[0] != '\0')
+ __db_msg(env,
+ "%s:\tFirst failure symptom", renv->failure_symptom);
return (0);
}
@@ -267,8 +268,6 @@ __env_print_dbenv_all(env, flags)
__db_msg(env, "%s", DB_GLOBAL(db_line));
STAT_POINTER("ENV", dbenv->env);
- __mutex_print_debug_single(
- env, "DB_ENV handle mutex", dbenv->mtx_db_env, flags);
STAT_ISSET("Errcall", dbenv->db_errcall);
STAT_ISSET("Errfile", dbenv->db_errfile);
STAT_STRING("Errpfx", dbenv->db_errpfx);
@@ -286,6 +285,7 @@ __env_print_dbenv_all(env, flags)
STAT_ISSET("ThreadId", dbenv->thread_id);
STAT_ISSET("ThreadIdString", dbenv->thread_id_string);
+ STAT_STRING("Blob dir", dbenv->db_blob_dir);
STAT_STRING("Log dir", dbenv->db_log_dir);
STAT_STRING("Metadata dir", dbenv->db_md_dir);
STAT_STRING("Tmp dir", dbenv->db_tmp_dir);
@@ -304,6 +304,8 @@ __env_print_dbenv_all(env, flags)
STAT_ISSET("Password", dbenv->passwd);
+ STAT_ULONG("Blob threshold", dbenv->blob_threshold);
+
STAT_ISSET("App private", dbenv->app_private);
STAT_ISSET("Api1 internal", dbenv->api1_internal);
STAT_ISSET("Api2 internal", dbenv->api2_internal);
@@ -314,6 +316,7 @@ __env_print_dbenv_all(env, flags)
STAT_ULONG("Mutex cnt", dbenv->mutex_cnt);
STAT_ULONG("Mutex inc", dbenv->mutex_inc);
STAT_ULONG("Mutex tas spins", dbenv->mutex_tas_spins);
+ STAT_LONG("Mutex failchk timeout", dbenv->mutex_failchk_timeout);
STAT_ISSET("Lock conflicts", dbenv->lk_conflicts);
STAT_LONG("Lock modes", dbenv->lk_modes);
@@ -356,6 +359,7 @@ __env_print_dbenv_all(env, flags)
__db_prflags(env,
NULL, dbenv->flags, db_env_fn, NULL, "\tPublic environment flags");
+ COMPQUIET(flags, 0);
return (0);
}
@@ -507,6 +511,8 @@ __env_thread_state_print(state)
return ("blocked and dead");
case THREAD_OUT:
return ("out");
+ case THREAD_VERIFY:
+ return ("verify");
default:
return ("unknown");
}
@@ -516,14 +522,17 @@ __env_thread_state_print(state)
/*
* __env_print_thread --
* Display the thread block state.
+ *
+ * PUBLIC: int __env_print_thread __P((ENV *));
*/
-static int
+int
__env_print_thread(env)
ENV *env;
{
BH *bhp;
DB_ENV *dbenv;
DB_HASHTAB *htab;
+ DB_LOCKER *locker;
DB_MPOOL *dbmp;
DB_THREAD_INFO *ip;
PIN_LIST *list, *lp;
@@ -532,6 +541,7 @@ __env_print_thread(env)
THREAD_INFO *thread;
u_int32_t i;
char buf[DB_THREADID_STRLEN];
+ char time_buf[CTIME_BUFLEN];
dbenv = env->dbenv;
@@ -561,6 +571,10 @@ __env_print_thread(env)
dbenv->thread_id_string(
dbenv, ip->dbth_pid, ip->dbth_tid, buf),
__env_thread_state_print(ip->dbth_state));
+ if (timespecisset(&ip->dbth_failtime))
+ __db_msg(env, "Crashed at %s",
+ __db_ctimespec(&ip->dbth_failtime,
+ time_buf));
list = R_ADDR(env->reginfo, ip->dbth_pinlist);
for (lp = list; lp < &list[ip->dbth_pinmax]; lp++) {
if (lp->b_ref == INVALID_ROFF)
@@ -570,6 +584,18 @@ __env_print_thread(env)
__db_msg(env,
"\t\tpins: %lu", (u_long)bhp->pgno);
}
+ if (ip->dbth_local_locker != INVALID_ROFF) {
+ locker = (DB_LOCKER *)
+ R_ADDR(&env->lk_handle->reginfo,
+ ip->dbth_local_locker);
+ __db_msg(env, "\t\tcached locker %lx mtx %lu",
+ (u_long)locker->id,
+ (u_long)locker->mtx_locker);
+
+ }
+#ifdef HAVE_MUTEX_SUPPORT
+ (void)__mutex_record_print(env, ip);
+#endif
}
return (0);
}
@@ -846,6 +872,7 @@ __reg_type(t)
return ("Transaction");
case INVALID_REGION_TYPE:
return ("Invalid");
+ /*lint -e{787} */
}
return ("Unknown");
}