diff options
Diffstat (limited to 'src/env')
-rw-r--r-- | src/env/env_alloc.c | 2 | ||||
-rw-r--r-- | src/env/env_backup.c | 2 | ||||
-rw-r--r-- | src/env/env_config.c | 38 | ||||
-rw-r--r-- | src/env/env_failchk.c | 141 | ||||
-rw-r--r-- | src/env/env_file.c | 16 | ||||
-rw-r--r-- | src/env/env_globals.c | 19 | ||||
-rw-r--r-- | src/env/env_method.c | 187 | ||||
-rw-r--r-- | src/env/env_name.c | 20 | ||||
-rw-r--r-- | src/env/env_open.c | 250 | ||||
-rw-r--r-- | src/env/env_recover.c | 130 | ||||
-rw-r--r-- | src/env/env_region.c | 117 | ||||
-rw-r--r-- | src/env/env_register.c | 197 | ||||
-rw-r--r-- | src/env/env_sig.c | 17 | ||||
-rw-r--r-- | src/env/env_stat.c | 43 |
14 files changed, 892 insertions, 287 deletions
diff --git a/src/env/env_alloc.c b/src/env/env_alloc.c index 700bfb27..9c8fd046 100644 --- a/src/env/env_alloc.c +++ b/src/env/env_alloc.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ diff --git a/src/env/env_backup.c b/src/env/env_backup.c index 9c79dbb4..2940f44b 100644 --- a/src/env/env_backup.c +++ b/src/env/env_backup.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2011, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ diff --git a/src/env/env_config.c b/src/env/env_config.c index 57496909..56cebb63 100644 --- a/src/env/env_config.c +++ b/src/env/env_config.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -84,8 +84,10 @@ static const CFG_DESC config_descs[] = { { "rep_set_clockskew", CFG_2UINT, __rep_set_clockskew }, { "rep_set_limit", CFG_2UINT, __rep_set_limit }, { "rep_set_nsites", CFG_UINT, __rep_set_nsites_pp }, - { "rep_set_priority", CFG_UINT, __rep_set_priority }, + { "rep_set_priority", CFG_UINT, __rep_set_priority_pp }, { "rep_set_request", CFG_2UINT, __rep_set_request }, + { "set_blob_dir", CFG_STRING, __env_set_blob_dir }, + { "set_blob_threshold", CFG_2UINT, __env_set_blob_threshold }, { "set_cache_max", CFG_2UINT, __memp_set_cache_max }, { "set_create_dir", CFG_STRING, __env_set_create_dir }, { "set_data_dir", CFG_STRING, __env_set_data_dir }, @@ -133,11 +135,16 @@ static const FN config_rep_config[] = { { DB_REP_CONF_AUTOROLLBACK, "db_rep_conf_autorollback" }, { DB_REP_CONF_BULK, "db_rep_conf_bulk" }, { DB_REP_CONF_DELAYCLIENT, "db_rep_conf_delayclient" }, + { DB_REP_CONF_ELECT_LOGLENGTH, "db_rep_conf_elect_loglength" }, { DB_REP_CONF_INMEM, "db_rep_conf_inmem" }, { DB_REP_CONF_LEASE, "db_rep_conf_lease" }, { DB_REP_CONF_NOWAIT, "db_rep_conf_nowait" }, { DB_REPMGR_CONF_2SITE_STRICT, "db_repmgr_conf_2site_strict" }, { DB_REPMGR_CONF_ELECTIONS, "db_repmgr_conf_elections" }, + { DB_REPMGR_CONF_PREFMAS_CLIENT, + "db_repmgr_conf_prefmas_client" }, + { DB_REPMGR_CONF_PREFMAS_MASTER, + "db_repmgr_conf_prefmas_master" }, { 0, NULL } }; @@ -198,7 +205,9 @@ static const FN config_set_flags_forlog[] = { { DB_LOG_DIRECT, "db_direct_log" }, { DB_LOG_DSYNC, "db_dsync_log" }, { DB_LOG_AUTO_REMOVE, "db_log_autoremove" }, + { DB_LOG_BLOB, "db_log_blob" }, { DB_LOG_IN_MEMORY, "db_log_inmemory" }, + { DB_LOG_NOSYNC, "db_log_nosync" }, { 0, NULL } }; @@ -206,7 +215,9 @@ static const FN config_log_set_config[] = { { DB_LOG_DIRECT, "db_log_direct" }, { DB_LOG_DSYNC, "db_log_dsync" }, { DB_LOG_AUTO_REMOVE, "db_log_auto_remove" }, + { DB_LOG_BLOB, "db_log_blob" }, { DB_LOG_IN_MEMORY, "db_log_in_memory" }, + { DB_LOG_NOSYNC, "db_log_nosync" }, { DB_LOG_ZERO, "db_log_zero" }, { 0, NULL } }; @@ -237,6 +248,7 @@ static const FN config_set_verbose[] = { { DB_VERB_DEADLOCK, "db_verb_deadlock" }, { DB_VERB_FILEOPS, "db_verb_fileops" }, { DB_VERB_FILEOPS_ALL, "db_verb_fileops_all" }, + { DB_VERB_MVCC, "db_verb_mvcc" }, { DB_VERB_RECOVERY, "db_verb_recovery" }, { DB_VERB_REGISTER, "db_verb_register" }, { DB_VERB_REPLICATION, "db_verb_replication" }, @@ -462,7 +474,7 @@ format: __db_errx(env, DB_STR_A("1584", if ((lv1 = __db_name_to_val(config_rep_timeout, argv[1])) == -1) goto format; CFG_GET_UINT32(argv[2], &uv2); - return (__rep_set_timeout(dbenv, lv1, (db_timeout_t)uv2)); + return (__rep_set_timeout_pp(dbenv, lv1, (db_timeout_t)uv2)); } /* repmgr_set_ack_policy db_repmgr_acks_XXX */ @@ -475,6 +487,15 @@ format: __db_errx(env, DB_STR_A("1584", return (__repmgr_set_ack_policy(dbenv, lv1)); } + if (strcasecmp(argv[0], "repmgr_set_incoming_queue_max") == 0) { + if (nf != 3) + goto format; + CFG_GET_UINT32(argv[1], &uv1); + CFG_GET_UINT32(argv[2], &uv2); + return (__repmgr_set_incoming_queue_max( + dbenv, (u_int32_t)uv1, (u_int32_t)uv2)); + } + /* * Configure name/value pairs of config information for a site (local or * remote). @@ -503,7 +524,7 @@ format: __db_errx(env, DB_STR_A("1584", uv2 = 0; else CFG_GET_UINT32(argv[i + 1], &uv2); - if ((ret = __repmgr_site_config(site, + if ((ret = __repmgr_site_config_int(site, (u_int32_t)lv1, (u_int32_t)uv2)) != 0) break; } @@ -630,6 +651,15 @@ format: __db_errx(env, DB_STR_A("1584", dbenv, DB_REGION_INIT, lv1 == 0 ? 0 : 1)); } + /* set_mutex_failchk_timeout <unsigned timeout> */ + if (strcasecmp(argv[0], "set_mutex_failchk_timeout") == 0) { + if (nf != 2) + goto format; + CFG_GET_UINT32(argv[1], &uv1); + return (__env_set_timeout( + dbenv, (u_int32_t)uv1, DB_SET_MUTEX_FAILCHK_TIMEOUT)); + } + /* set_reg_timeout <unsigned timeout> */ if (strcasecmp(argv[0], "set_reg_timeout") == 0) { if (nf != 2) diff --git a/src/env/env_failchk.c b/src/env/env_failchk.c index 05752f07..ad9bed0b 100644 --- a/src/env/env_failchk.c +++ b/src/env/env_failchk.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2005, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -22,9 +22,26 @@ static int __env_in_api __P((ENV *)); static void __env_clear_state __P((ENV *)); /* + * When failchk broadcast is enabled continue after the first error, to try to + * find all of them; without broadcasting stop at the first failure. + */ +#ifdef HAVE_FAILCHK_BROADCAST +#define FAILCHK_PROCESS_ERROR(t_ret, ret) \ + if ((t_ret) != 0 && (ret) == 0) (ret) = (t_ret) +#else +#define FAILCHK_PROCESS_ERROR(t_ret, ret) \ + if (((ret) = (t_ret)) != 0) goto err +#endif + +/* * __env_failchk_pp -- * ENV->failchk pre/post processing. * + * Single process failchk continues after recoverable failures but stops as + * soon as recovery is required. Broadcast failchks continue even after + * DB_RUNRECOVERY failures are detected, to maximize the possibility to + * wake up processes blocked on dead resources, e.g. mutexes. + * * PUBLIC: int __env_failchk_pp __P((DB_ENV *, u_int32_t)); */ int @@ -46,7 +63,7 @@ __env_failchk_pp(dbenv, flags) */ if (!ALIVE_ON(env)) { __db_errx(env, DB_STR("1503", - "DB_ENV->failchk requires DB_ENV->is_alive be configured")); + "DB_ENV->failchk requires DB_ENV->is_alive be configured")); return (EINVAL); } @@ -59,10 +76,14 @@ __env_failchk_pp(dbenv, flags) ENV_LEAVE(env, ip); return (ret); } + /* * __env_failchk_int -- * Process the subsystem failchk routines * + * The FAILCHK_PROCESS_ERROR macro (defined at the top of this file) + * differs between the broadcast and single process versions of failchk. + * * PUBLIC: int __env_failchk_int __P((DB_ENV *)); */ int @@ -70,42 +91,52 @@ __env_failchk_int(dbenv) DB_ENV *dbenv; { ENV *env; - int ret; + int ret, t_ret; env = dbenv->env; + ret = 0; F_SET(dbenv, DB_ENV_FAILCHK); /* - * We check for dead threads in the API first as this would be likely - * to hang other things we try later, like locks and transactions. + * We check for dead threads in the API first as this would likely + * hang other things we try later, like locks and transactions. */ - if ((ret = __env_in_api(env)) != 0) + if ((ret = __env_in_api(env)) != 0) { + __db_err(env, ret, "__env_in_api"); goto err; + } - if (LOCKING_ON(env) && (ret = __lock_failchk(env)) != 0) - goto err; + if (LOCKING_ON(env) && (t_ret = __lock_failchk(env)) != 0) + FAILCHK_PROCESS_ERROR(t_ret, ret); - if (TXN_ON(env) && - ((ret = __txn_failchk(env)) != 0 || - (ret = __dbreg_failchk(env)) != 0)) - goto err; + if (TXN_ON(env) && ret == 0 && ((t_ret = __txn_failchk(env)) != 0 || + (t_ret = __dbreg_failchk(env)) != 0)) + FAILCHK_PROCESS_ERROR(t_ret, ret); - if ((ret = __memp_failchk(env)) != 0) - goto err; + if ((t_ret = __memp_failchk(env)) != 0) + FAILCHK_PROCESS_ERROR(t_ret, ret); #ifdef HAVE_REPLICATION_THREADS - if (REP_ON(env) && (ret = __repmgr_failchk(env)) != 0) - goto err; + if (REP_ON(env) && (t_ret = __repmgr_failchk(env)) != 0) + FAILCHK_PROCESS_ERROR(t_ret, ret); #endif - /* Mark any dead blocked threads as dead. */ - __env_clear_state(env); +err: #ifdef HAVE_MUTEX_SUPPORT - ret = __mut_failchk(env); + if ((t_ret = __mutex_failchk(env)) != 0 && ret == 0) + ret = t_ret; #endif -err: F_CLR(dbenv, DB_ENV_FAILCHK); + /* Any dead blocked thread slots are no longer needed; allow reuse. */ + if (ret == 0) + __env_clear_state(env); + if (ret == DB_RUNRECOVERY) { + /* Announce a panic; avoid __env_panic()'s diag core dump. */ + __env_panic_set(env, 1); + __env_panic_event(env, ret); + } + F_CLR(dbenv, DB_ENV_FAILCHK); return (ret); } @@ -312,7 +343,8 @@ __env_in_api(env) REGINFO *infop; THREAD_INFO *thread; u_int32_t i; - int unpin, ret; + pid_t pid; + int unpin, ret, t_ret; if ((htab = env->thr_hashtab) == NULL) return (EINVAL); @@ -322,10 +354,13 @@ __env_in_api(env) renv = infop->primary; thread = R_ADDR(infop, renv->thread_off); unpin = 0; + ret = 0; for (i = 0; i < env->thr_nbucket; i++) SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info) { + pid = ip->dbth_pid; if (ip->dbth_state == THREAD_SLOT_NOT_IN_USE || + ip->dbth_state == THREAD_BLOCKED_DEAD || (ip->dbth_state == THREAD_OUT && thread->thr_count < thread->thr_max)) continue; @@ -341,26 +376,63 @@ __env_in_api(env) ip->dbth_state = THREAD_SLOT_NOT_IN_USE; continue; } - return (__db_failed(env, DB_STR("1507", + /* + * The above tests are not atomic, so it is possible that + * the process pointed by ip has changed during the tests. + * In particular, if the process pointed by ip when is_alive + * was executed terminated normally, a new process may reuse + * the same ip structure and change its dbth_state before the + * next two tests were performed. Therefore, we need to test + * here that all four tests above are done on the same process. + * If the process pointed by ip changed, all tests are invalid + * and can be ignored. + * Similarly, it's also possible for two processes racing to + * change the dbth_state of the same ip structure. For example, + * both process A and B reach the above test for the same + * terminated process C where C's dbth_state is THREAD_OUT. + * If A goes into the 'if' block and changes C's dbth_state to + * THREAD_SLOT_NOT_IN_USE before B checks the condition, B + * would incorrectly fail the test and run into this line. + * Therefore, we need to check C's dbth_state again and fail + * the db only if C's dbth_state is indeed THREAD_ACTIVE. + */ + if (ip->dbth_state != THREAD_ACTIVE || ip->dbth_pid != pid) + continue; + __os_gettime(env, &ip->dbth_failtime, 0); + t_ret = __db_failed(env, DB_STR("1507", "Thread died in Berkeley DB library"), - ip->dbth_pid, ip->dbth_tid)); + ip->dbth_pid, ip->dbth_tid); + if (ret == 0) + ret = t_ret; + /* + * Classic failchk stop after one dead thread in the + * api, but broadcasting looks for all. + */ +#ifndef HAVE_FAILCHK_BROADCAST + return (ret); +#endif } if (unpin == 0) - return (0); + return (ret); for (i = 0; i < env->thr_nbucket; i++) SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info) if (ip->dbth_state == THREAD_BLOCKED_DEAD && - (ret = __memp_unpin_buffers(env, ip)) != 0) + (t_ret = __memp_unpin_buffers(env, ip)) != 0) { + if (ret == 0) + ret = t_ret; +#ifndef HAVE_FAILCHK_BROADCAST return (ret); +#endif + } - return (0); + return (ret); } /* * __env_clear_state -- - * Look for threads which died while blockedi and clear them.. + * Look for threads which died while blocked and clear them.. */ static void __env_clear_state(env) @@ -441,6 +513,9 @@ __env_set_state(env, ipp, state) #endif } + /* A failchk thread must not block on a lock -- that would be faulty. */ + if (state == THREAD_BLOCKED && ip != NULL) + DB_ASSERT(env, ip->dbth_state != THREAD_FAILCHK); /* * If ipp is not null, return the thread control block if found. * Check to ensure the thread of control has been registered. @@ -457,7 +532,9 @@ __env_set_state(env, ipp, state) *ipp = NULL; ret = 0; - if (ip == NULL) { + if (ip != NULL) + ip->dbth_state = state; + else { infop = env->reginfo; renv = infop->primary; thread = R_ADDR(infop, renv->thread_off); @@ -503,11 +580,13 @@ __env_set_state(env, ipp, state) init: ip->dbth_pid = id.pid; ip->dbth_tid = id.tid; ip->dbth_state = state; + for (indx = 0; indx != MUTEX_STATE_MAX; indx++) + ip->dbth_latches[indx].mutex = MUTEX_INVALID; SH_TAILQ_INIT(&ip->dbth_xatxn); } MUTEX_UNLOCK(env, renv->mtx_regenv); - } else - ip->dbth_state = state; + } + *ipp = ip; DB_ASSERT(env, ret == 0); @@ -535,7 +614,7 @@ __env_thread_id_string(dbenv, pid, tid, buf) #ifdef UINT64_FMT char fmt[20]; - snprintf(fmt, sizeof(fmt), "%s/%s", UINT64_FMT, UINT64_FMT); + snprintf(fmt, sizeof(fmt), "%s/%s", INT64_FMT, UINT64_FMT); snprintf(buf, DB_THREADID_STRLEN, fmt, (u_int64_t)pid, (u_int64_t)(uintptr_t)tid); #else diff --git a/src/env/env_file.c b/src/env/env_file.c index b102404d..d6e29b21 100644 --- a/src/env/env_file.c +++ b/src/env/env_file.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2002, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -12,7 +12,7 @@ /* * __db_file_extend -- - * Initialize a regular file by writing the last page of the file. + * Initialize or extend a regular file by writing to its last page. * * PUBLIC: int __db_file_extend __P((ENV *, DB_FH *, size_t)); */ @@ -27,7 +27,19 @@ __db_file_extend(env, fhp, size) u_int32_t relative; int ret; char buf; +#ifdef HAVE_MMAP_EXTEND + unsigned pagesize; + /* + * Round up size to the VM pagesize. If it isn't aligned, then the bytes + * ending the mapping might have no corresponding backing location on + * disk, and could be silently lost when the process exits. [#23290] + */ + if (F_ISSET(fhp, DB_FH_REGION)) { + pagesize = (unsigned)getpagesize(); + size = DB_ALIGN(size, pagesize); + } +#endif buf = '\0'; /* * Extend the file by writing the last page. If the region is >4Gb, diff --git a/src/env/env_globals.c b/src/env/env_globals.c index 955e6738..2d665661 100644 --- a/src/env/env_globals.c +++ b/src/env/env_globals.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -31,14 +31,21 @@ DB_GLOBALS __db_global_values = { "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=", /* db_line */ { 0 }, /* error_buf */ - 0, /* uid_init */ - 0, /* rand_next */ + 0, /* random_seeded */ +#if defined(HAVE_RANDOM_R) + { 0 }, /* random_r random_data */ + { 0 }, /* random_r state */ +#elif !defined(HAVE_RAND) && !defined(HAVE_RANDOM) + 0, /* rand/srand value */ +#endif 0, /* fid_serial */ 0, /* db_errno */ - 0, /* num_active_pids */ - 0, /* size_active_pids */ - NULL, /* active_pids */ NULL, /* saved_errstr */ + "%m/%d %H:%M:%S", /* strftime format for dates */ +#if defined(HAVE_ERROR_HISTORY) + 0, /* thread local msgs_key */ + PTHREAD_ONCE_INIT, /* pthread_once initializer */ +#endif NULL, /* j_assert */ NULL, /* j_close */ NULL, /* j_dirfree */ diff --git a/src/env/env_method.c b/src/env/env_method.c index 63deacea..c246febc 100644 --- a/src/env/env_method.c +++ b/src/env/env_method.c @@ -1,9 +1,9 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2015 Oracle and/or its affiliates. All rights reserved. * - * $Id: env_method.c,v dabaaeb7d839 2010/08/03 17:28:53 mike $ + * $Id$ */ #include "db_config.h" @@ -40,6 +40,7 @@ static int __env_get_app_dispatch __P((DB_ENV *, int (**)(DB_ENV *, DBT *, DB_LSN *, db_recops))); static int __env_set_app_dispatch __P((DB_ENV *, int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops))); +static int __env_get_blob_dir __P((DB_ENV *, const char **)); static int __env_set_event_notify __P((DB_ENV *, void (*)(DB_ENV *, u_int32_t, void *))); static int __env_get_feedback __P((DB_ENV *, void (**)(DB_ENV *, int, int))); @@ -81,6 +82,11 @@ db_env_create(dbenvpp, flags) if (flags != 0) return (EINVAL); +#ifdef HAVE_ERROR_HISTORY + /* Call thread local storage initializer at least once per process. */ + __db_thread_init(); +#endif + /* Allocate the DB_ENV and ENV structures -- we always have both. */ if ((ret = __os_calloc(NULL, 1, sizeof(DB_ENV), &dbenv)) != 0) return (ret); @@ -159,7 +165,7 @@ __db_env_init(dbenv) */ /* DB_ENV PUBLIC HANDLE LIST BEGIN */ dbenv->add_data_dir = __env_add_data_dir; - dbenv->backup = __db_backup; + dbenv->backup = __db_backup_pp; dbenv->dbbackup = __db_dbbackup_pp; dbenv->cdsgroup_begin = __cdsgroup_begin_pp; dbenv->close = __env_close_pp; @@ -175,6 +181,8 @@ __db_env_init(dbenv) dbenv->get_cachesize = __memp_get_cachesize; dbenv->get_backup_callbacks = __env_get_backup_callbacks; dbenv->get_backup_config = __env_get_backup_config; + dbenv->get_blob_dir = __env_get_blob_dir; + dbenv->get_blob_threshold = __env_get_blob_threshold_pp; dbenv->get_create_dir = __env_get_create_dir; dbenv->get_data_dirs = __env_get_data_dirs; dbenv->get_data_len = __env_get_data_len; @@ -269,7 +277,7 @@ __db_env_init(dbenv) dbenv->open = __env_open_pp; dbenv->remove = __env_remove; dbenv->rep_elect = __rep_elect_pp; - dbenv->rep_flush = __rep_flush; + dbenv->rep_flush = __rep_flush_pp; dbenv->rep_get_clockskew = __rep_get_clockskew; dbenv->rep_get_config = __rep_get_config; dbenv->rep_get_limit = __rep_get_limit; @@ -282,29 +290,34 @@ __db_env_init(dbenv) dbenv->rep_set_config = __rep_set_config; dbenv->rep_set_limit = __rep_set_limit; dbenv->rep_set_nsites = __rep_set_nsites_pp; - dbenv->rep_set_priority = __rep_set_priority; + dbenv->rep_set_priority = __rep_set_priority_pp; dbenv->rep_set_request = __rep_set_request; - dbenv->rep_set_timeout = __rep_set_timeout; + dbenv->rep_set_timeout = __rep_set_timeout_pp; dbenv->rep_set_transport = __rep_set_transport_pp; + dbenv->rep_set_view = __rep_set_view; dbenv->rep_start = __rep_start_pp; dbenv->rep_stat = __rep_stat_pp; dbenv->rep_stat_print = __rep_stat_print_pp; dbenv->rep_sync = __rep_sync; dbenv->repmgr_channel = __repmgr_channel; dbenv->repmgr_get_ack_policy = __repmgr_get_ack_policy; + dbenv->repmgr_get_incoming_queue_max = __repmgr_get_incoming_queue_max; dbenv->repmgr_local_site = __repmgr_local_site; dbenv->repmgr_msg_dispatch = __repmgr_set_msg_dispatch; dbenv->repmgr_set_ack_policy = __repmgr_set_ack_policy; + dbenv->repmgr_set_incoming_queue_max = __repmgr_set_incoming_queue_max; dbenv->repmgr_site = __repmgr_site; dbenv->repmgr_site_by_eid = __repmgr_site_by_eid; - dbenv->repmgr_site_list = __repmgr_site_list; - dbenv->repmgr_start = __repmgr_start; + dbenv->repmgr_site_list = __repmgr_site_list_pp; + dbenv->repmgr_start = __repmgr_start_pp; dbenv->repmgr_stat = __repmgr_stat_pp; dbenv->repmgr_stat_print = __repmgr_stat_print_pp; dbenv->set_alloc = __env_set_alloc; dbenv->set_app_dispatch = __env_set_app_dispatch; dbenv->set_backup_callbacks = __env_set_backup_callbacks; dbenv->set_backup_config = __env_set_backup_config; + dbenv->set_blob_dir = __env_set_blob_dir; + dbenv->set_blob_threshold = __env_set_blob_threshold; dbenv->set_cache_max = __memp_set_cache_max; dbenv->set_cachesize = __memp_set_cachesize; dbenv->set_create_dir = __env_set_create_dir; @@ -370,10 +383,11 @@ __db_env_init(dbenv) dbenv->thread_id = __os_id; dbenv->thread_id_string = __env_thread_id_string; + dbenv->mutex_failchk_timeout = US_PER_SEC; + env = dbenv->env; __os_id(NULL, &env->pid_cache, NULL); - env->db_ref = 0; env->log_verify_wrap = __log_verify_wrap; env->data_len = ENV_DEF_DATA_LEN; TAILQ_INIT(&env->fdlist); @@ -561,6 +575,97 @@ __env_get_memory_init(dbenv, type, countp) } /* + * __env_get_blob_threshold_pp -- + * Get the blob threshold for the environment. Any data item larger + * than the blob threshold is automatically saved as a blob file. + * + * PUBLIC: int __env_get_blob_threshold_pp + * PUBLIC: __P ((DB_ENV *, u_int32_t *)); + */ +int +__env_get_blob_threshold_pp(dbenv, bytes) + DB_ENV *dbenv; + u_int32_t *bytes; +{ + ENV *env; + DB_THREAD_INFO *ip; + int ret; + + env = dbenv->env; + + ENV_ENTER(env, ip); + ret = __env_get_blob_threshold_int(env, bytes); + ENV_LEAVE(env, ip); + + return (ret); +} + +/* + * __env_get_blob_threshold_int -- + * Get the blob threshold for the environment. Any data item larger + * than the blob threshold is automatically saved as a blob file. + * + * PUBLIC: int __env_get_blob_threshold_int + * PUBLIC: __P ((ENV *, u_int32_t *)); + */ +int +__env_get_blob_threshold_int(env, bytes) + ENV *env; + u_int32_t *bytes; +{ + REGENV *renv; + REGINFO *infop; + + if (F_ISSET(env, ENV_OPEN_CALLED)) { + infop = env->reginfo; + renv = infop->primary; + MUTEX_LOCK(env, renv->mtx_regenv); + *bytes = renv->blob_threshold; + MUTEX_UNLOCK(env, renv->mtx_regenv); + } else + *bytes = env->dbenv->blob_threshold; + + return (0); +} + +/* + * __env_set_blob_threshold -- + * Set the default blob threshold for the environment. Any data item larger + * than the blob threshold is automatically saved as a blob file. + * + * PUBLIC: int __env_set_blob_threshold __P((DB_ENV *, u_int32_t, u_int32_t)); + */ +int +__env_set_blob_threshold(dbenv, bytes, flags) + DB_ENV *dbenv; + u_int32_t bytes; + u_int32_t flags; +{ + ENV *env; + REGENV *renv; + REGINFO *infop; + DB_THREAD_INFO *ip; + + env = dbenv->env; + + if (__db_fchk(dbenv->env, "DB_ENV->set_blob_threshold", flags, 0) != 0) + return (EINVAL); + + if (F_ISSET(env, ENV_OPEN_CALLED)) { + infop = env->reginfo; + renv = infop->primary; + ENV_ENTER(env, ip); + MUTEX_LOCK(env, renv->mtx_regenv); + renv->blob_threshold = bytes; + MUTEX_UNLOCK(env, renv->mtx_regenv); + ENV_LEAVE(env, ip); + } else + dbenv->blob_threshold = bytes; + + return (0); +} + +/* * __env_set_memory_init -- * DB_ENV->set_memory_init. * @@ -697,6 +802,43 @@ __env_set_app_dispatch(dbenv, app_dispatch) } /* + * __env_set_blob_dir -- + * API to allow the user to override the default blob file + * root directory. Must be set if blobs are enabled and an + * unnamed environment is created. + * + * PUBLIC: int __env_set_blob_dir __P((DB_ENV *, const char *)); + */ +int +__env_set_blob_dir(dbenv, dir) + DB_ENV *dbenv; + const char *dir; +{ + ENV *env; + + env = dbenv->env; + + ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_blob_dir"); + + if (dbenv->db_blob_dir != NULL) + __os_free(env, dbenv->db_blob_dir); + return (__os_strdup(env, dir, &dbenv->db_blob_dir)); +} + +/* + * __env_get_blob_dir -- + * Get the blob file root directory. + */ +static int +__env_get_blob_dir(dbenv, dirp) + DB_ENV *dbenv; + const char **dirp; +{ + *dirp = dbenv->db_blob_dir; + return (0); +} + +/* * __env_get_encrypt_flags -- * {DB_ENV,DB}->get_encrypt_flags. * @@ -1061,6 +1203,10 @@ __env_set_backup(env, on) return (EINVAL); } + /* + * This code does not need env_rep_enter for the checkpoint because + * it can only happen if there is an active bulk txn existing. + */ if (needs_checkpoint && (ret = __txn_checkpoint(env, 0, 0, 0))) return (ret); return (0); @@ -1244,6 +1390,11 @@ __env_set_data_len(dbenv, data_len) DB_ENV *dbenv; u_int32_t data_len; { + if (data_len == 0) { + __db_errx(dbenv->env, DB_STR("1593", +"Maximum number of bytes to display for each key/data item can not be 0.")); + return (EINVAL); + } dbenv->env->data_len = data_len; return (0); @@ -1720,6 +1871,7 @@ __env_get_verbose(dbenv, which, onoffp) case DB_VERB_DEADLOCK: case DB_VERB_FILEOPS: case DB_VERB_FILEOPS_ALL: + case DB_VERB_MVCC: case DB_VERB_RECOVERY: case DB_VERB_REGISTER: case DB_VERB_REPLICATION: @@ -1758,6 +1910,7 @@ __env_set_verbose(dbenv, which, on) case DB_VERB_DEADLOCK: case DB_VERB_FILEOPS: case DB_VERB_FILEOPS_ALL: + case DB_VERB_MVCC: case DB_VERB_RECOVERY: case DB_VERB_REGISTER: case DB_VERB_REPLICATION: @@ -1888,9 +2041,15 @@ __env_get_timeout(dbenv, timeoutp, flags) int ret; ret = 0; - if (flags == DB_SET_REG_TIMEOUT) { + if (flags == DB_SET_REG_TIMEOUT) *timeoutp = dbenv->envreg_timeout; - } else + else if (flags == DB_SET_MUTEX_FAILCHK_TIMEOUT) +#ifdef HAVE_FAILCHK_BROADCAST + *timeoutp = dbenv->mutex_failchk_timeout; +#else + ret = USR_ERR(dbenv->env, DB_OPNOTSUP); +#endif + else ret = __lock_get_env_timeout(dbenv, timeoutp, flags); return (ret); } @@ -1912,6 +2071,12 @@ __env_set_timeout(dbenv, timeout, flags) ret = 0; if (flags == DB_SET_REG_TIMEOUT) dbenv->envreg_timeout = timeout; + else if (flags == DB_SET_MUTEX_FAILCHK_TIMEOUT) +#ifdef HAVE_FAILCHK_BROADCAST + dbenv->mutex_failchk_timeout = timeout; +#else + ret = USR_ERR(dbenv->env, DB_OPNOTSUP); +#endif else ret = __lock_set_env_timeout(dbenv, timeout, flags); return (ret); diff --git a/src/env/env_name.c b/src/env/env_name.c index a3a0b371..d0dd5635 100644 --- a/src/env/env_name.c +++ b/src/env/env_name.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -9,6 +9,7 @@ #include "db_config.h" #include "db_int.h" +#include "dbinc/blob.h" static int __db_fullpath __P((ENV *, const char *, const char *, int, int, char **)); @@ -122,7 +123,7 @@ __db_appname(env, appname, file, dirp, namep) { DB_ENV *dbenv; char **ddp; - const char *dir; + const char *blob_dir, *dir; int ret; dbenv = env->dbenv; @@ -141,6 +142,8 @@ __db_appname(env, appname, file, dirp, namep) /* * DB_APP_NONE: * DB_HOME/file + * DB_APP_BLOB: + * DB_HOME/DB_BLOB_DIR/file * DB_APP_DATA: * DB_HOME/DB_DATA_DIR/file * DB_APP_LOG: @@ -151,6 +154,12 @@ __db_appname(env, appname, file, dirp, namep) switch (appname) { case DB_APP_NONE: break; + case DB_APP_BLOB: + if (dbenv != NULL && dbenv->db_blob_dir != NULL) + dir = dbenv->db_blob_dir; + else + dir = BLOB_DEFAULT_DIR; + break; case DB_APP_RECOVER: case DB_APP_DATA: /* @@ -164,6 +173,13 @@ __db_appname(env, appname, file, dirp, namep) /* Second, look in the environment home directory. */ DB_CHECKFILE(file, NULL, 1, 0, namep, dirp); + /* Third, check the blob directory. */ + if (dbenv != NULL && dbenv->db_blob_dir != NULL) + blob_dir = dbenv->db_blob_dir; + else + blob_dir = BLOB_DEFAULT_DIR; + DB_CHECKFILE(file, blob_dir, 1, 0, namep, dirp); + /* * Otherwise, we're going to create. Use the specified * directory unless we're in recovery and it doesn't exist. diff --git a/src/env/env_open.c b/src/env/env_open.c index 7eddca3a..85189369 100644 --- a/src/env/env_open.c +++ b/src/env/env_open.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -107,10 +107,16 @@ __env_open_pp(dbenv, db_home, flags, mode) __db_errx(env, DB_STR("1589", "DB_PRIVATE is not " "supported by 64-bit applications in " "mixed-size-addressing mode")); - return (EINVAL); - } + return (EINVAL); + } #endif + if (LF_ISSET(DB_PRIVATE) && PREFMAS_IS_SET(env)) { + __db_errx(env, DB_STR("1594", "DB_PRIVATE is not " + "supported in Replication Manager preferred master mode")); + return (EINVAL); + } + return (__env_open(dbenv, db_home, flags, mode)); } @@ -129,12 +135,20 @@ __env_open(dbenv, db_home, flags, mode) { DB_THREAD_INFO *ip; ENV *env; - u_int32_t orig_flags; - int register_recovery, ret, t_ret; + u_int32_t orig_flags, retry_flags; + int recovery_failed, register_recovery, ret, t_ret; + char *old_passwd; + size_t old_passwd_len; + u_int32_t old_encrypt_flags; ip = NULL; env = dbenv->env; + recovery_failed = 1; register_recovery = 0; + retry_flags = 0; + old_passwd = NULL; + old_passwd_len = 0; + old_encrypt_flags = 0; /* Initial configuration. */ if ((ret = __env_config(dbenv, db_home, &flags, mode)) != 0) @@ -171,13 +185,27 @@ __env_open(dbenv, db_home, flags, mode) dbenv->is_alive = __envreg_isalive; } - if ((ret = - __envreg_register(env, ®ister_recovery, flags)) != 0) + /* + * Backup the current key, because it would be consumed by + * __envreg_register below + */ + if (dbenv->passwd != NULL) { + if ((ret = __os_strdup(env, dbenv->passwd, &old_passwd)) != 0) + goto err; + old_passwd_len = dbenv->passwd_len; + (void)__env_get_encrypt_flags(dbenv, &old_encrypt_flags); + } + + F_SET(dbenv, DB_ENV_NOPANIC); + ret = __envreg_register(env, ®ister_recovery, flags); + dbenv->flags = orig_flags; + if (ret != 0) goto err; if (register_recovery) { if (!LF_ISSET(DB_RECOVER)) { __db_errx(env, DB_STR("1567", "The DB_RECOVER flag was not specified, and recovery is needed")); + recovery_failed = 0; ret = DB_RUNRECOVERY; goto err; } @@ -197,16 +225,27 @@ __env_open(dbenv, db_home, flags, mode) * want to remove files left over for any reason, from any session. */ retry: if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) + if ( #ifdef HAVE_REPLICATION - if ((ret = __rep_reset_init(env)) != 0 || - (ret = __env_remove_env(env)) != 0 || -#else - if ((ret = __env_remove_env(env)) != 0 || + (ret = __rep_reset_init(env)) != 0 || #endif - (ret = __env_refresh(dbenv, orig_flags, 0)) != 0) + (ret = __env_remove_env(env)) != 0 || + (ret = __env_refresh(dbenv, + orig_flags | retry_flags, 0)) != 0) goto err; - if ((ret = __env_attach_regions(dbenv, flags, orig_flags, 1)) != 0) + /* Restore the database key. */ + if (LF_ISSET(DB_REGISTER) && old_passwd != NULL) { + ret = __env_set_encrypt(dbenv, old_passwd, old_encrypt_flags); + memset(old_passwd, 0xff, old_passwd_len - 1); + __os_free(env, old_passwd); + if (ret != 0) + goto err; + } + + DB_ASSERT(env, ret == 0); + if ((ret = __env_attach_regions(dbenv, + flags, orig_flags | retry_flags, 1)) != 0) goto err; /* @@ -216,8 +255,18 @@ retry: if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) */ if (LF_ISSET(DB_FAILCHK) && !register_recovery) { ENV_ENTER(env, ip); - if ((ret = __env_failchk_int(dbenv)) != 0) + /* + * Set the thread state so that any waiting for a potentially + * dead thread will call is_alive() in order to avoid hanging. + */ + FAILCHK_THREAD(env, ip); + ret = __env_failchk_int(dbenv); + if (ret != 0) { + __db_err(env, ret, + DB_STR("1595", + "failchk crash after clean registry")); goto err; + } ENV_LEAVE(env, ip); } @@ -230,12 +279,12 @@ err: if (ret != 0) * processes can now proceed. * * If recovery failed, unregister now and let another process - * clean up. + * clean up and run recovery. */ if (ret == 0 && (t_ret = __envreg_xunlock(env)) != 0) ret = t_ret; if (ret != 0) - (void)__envreg_unregister(env, 1); + (void)__envreg_unregister(env, recovery_failed); } /* @@ -247,7 +296,11 @@ err: if (ret != 0) */ if (ret == DB_RUNRECOVERY && !register_recovery && !LF_ISSET(DB_RECOVER) && LF_ISSET(DB_REGISTER)) { + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(env, DB_STR("1596", + "env_open DB_REGISTER w/o RECOVER panic: trying w/recovery")); LF_SET(DB_RECOVER); + retry_flags = DB_ENV_NOPANIC; goto retry; } @@ -304,6 +357,9 @@ __env_open_arg(dbenv, flags) "replication requires transaction support")); return (EINVAL); } + if ((ret = + __log_set_config_int(dbenv, DB_LOG_BLOB, 1, 1)) != 0) + return (ret); } if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) { if ((ret = __db_fcchk(env, @@ -349,30 +405,6 @@ __env_open_arg(dbenv, flags) } #endif -#ifdef HAVE_MUTEX_FCNTL - /* - * !!! - * We need a file descriptor for fcntl(2) locking. We use the file - * handle from the REGENV file for this purpose. - * - * Since we may be using shared memory regions, e.g., shmget(2), and - * not a mapped-in regular file, the backing file may be only a few - * bytes in length. So, this depends on the ability to call fcntl to - * lock file offsets much larger than the actual physical file. I - * think that's safe -- besides, very few systems actually need this - * kind of support, SunOS is the only one still in wide use of which - * I'm aware. - * - * The error case is if an application lacks spinlocks and wants to be - * threaded. That doesn't work because fcntl will lock the underlying - * process, including all its threads. - */ - if (F_ISSET(env, ENV_THREAD)) { - __db_errx(env, DB_STR("1578", - "architecture lacks fast mutexes: applications cannot be threaded")); - return (EINVAL); - } -#endif return (ret); } @@ -506,7 +538,7 @@ __env_close_pp(dbenv, flags) { DB_THREAD_INFO *ip; ENV *env; - int rep_check, ret, t_ret; + int ret, t_ret; u_int32_t close_flags, flags_orig; env = dbenv->env; @@ -517,65 +549,75 @@ __env_close_pp(dbenv, flags) * Validate arguments, but as a DB_ENV handle destructor, we can't * fail. */ - if (flags != 0 && flags != DB_FORCESYNC && - (t_ret = __db_ferr(env, "DB_ENV->close", 0)) != 0 && ret == 0) - ret = t_ret; +#undef OKFLAGS +#define OKFLAGS (DB_FORCESYNC | DB_FORCESYNCENV) + + ret = __db_fchk(env, "DB_ENV->close", flags, OKFLAGS); #define DBENV_FORCESYNC 0x00000001 #define DBENV_CLOSE_REPCHECK 0x00000010 - if (flags == DB_FORCESYNC) + if (LF_ISSET(DB_FORCESYNC)) close_flags |= DBENV_FORCESYNC; + if (LF_ISSET(DB_FORCESYNCENV)) + F_SET(env, ENV_FORCESYNCENV); + + /* + * Call __env_close() to clean up resources even though the open + * didn't fully succeed. + * */ + if (!F_ISSET(env, ENV_OPEN_CALLED)) + goto do_close; /* * If the environment has panic'd, all we do is try and discard * the important resources. */ if (PANIC_ISSET(env)) { + /* + * Temporarily set no panic so we do not trigger the + * LAST_PANIC_CHECK_BEFORE_IO check in __os_physwrite thus + * allowing the unregister to happen correctly. + */ + flags_orig = dbenv->flags; + F_SET(dbenv, DB_ENV_NOPANIC); + ENV_ENTER(env, ip); /* clean up from registry file */ - if (dbenv->registry != NULL) { - /* - * Temporarily set no panic so we do not trigger the - * LAST_PANIC_CHECK_BEFORE_IO check in __os_physwr - * thus allowing the unregister to happen correctly. - */ - flags_orig = F_ISSET(dbenv, DB_ENV_NOPANIC); - F_SET(dbenv, DB_ENV_NOPANIC); + if (dbenv->registry != NULL) (void)__envreg_unregister(env, 0); - dbenv->registry = NULL; - if (!flags_orig) - F_CLR(dbenv, DB_ENV_NOPANIC); - } /* Close all underlying threads and sockets. */ - if (IS_ENV_REPLICATED(env)) - (void)__repmgr_close(env); + (void)__repmgr_close(env); /* Close all underlying file handles. */ (void)__file_handle_cleanup(env); + ENV_LEAVE(env, ip); + + dbenv->flags = flags_orig; + (void)__env_region_cleanup(env); - PANIC_CHECK(env); + return (__env_panic_msg(env)); } ENV_ENTER(env, ip); - rep_check = IS_ENV_REPLICATED(env) ? 1 : 0; - if (rep_check) { #ifdef HAVE_REPLICATION_THREADS - /* - * Shut down Replication Manager threads first of all. This - * must be done before __env_rep_enter to avoid a deadlock that - * could occur if repmgr's background threads try to do a rep - * operation that needs __rep_lockout. - */ - if ((t_ret = __repmgr_close(env)) != 0 && ret == 0) - ret = t_ret; + /* + * Shut down Replication Manager threads first of all. This + * must be done before __env_rep_enter to avoid a deadlock that + * could occur if repmgr's background threads try to do a rep + * operation that needs __rep_lockout. + */ + if ((t_ret = __repmgr_close(env)) != 0 && ret == 0) + ret = t_ret; #endif + if (IS_ENV_REPLICATED(env)) { if ((t_ret = __env_rep_enter(env, 0)) != 0 && ret == 0) ret = t_ret; + if (ret == 0) + close_flags |= DBENV_CLOSE_REPCHECK; } - if (rep_check) - close_flags |= DBENV_CLOSE_REPCHECK; +do_close: if ((t_ret = __env_close(dbenv, close_flags)) != 0 && ret == 0) ret = t_ret; @@ -640,8 +682,11 @@ __env_close(dbenv, flags) t_ret = dbp->alt_close(dbp, close_flags); else t_ret = __db_close(dbp, NULL, close_flags); - if (t_ret != 0 && ret == 0) - ret = t_ret; + if (t_ret != 0) { + if (ret == 0) + ret = t_ret; + break; + } } /* @@ -661,10 +706,8 @@ __env_close(dbenv, flags) #endif /* If we're registered, clean up. */ - if (dbenv->registry != NULL) { + if (dbenv->registry != NULL) (void)__envreg_unregister(env, 0); - dbenv->registry = NULL; - } /* Check we've closed all underlying file handles. */ if ((t_ret = __file_handle_cleanup(env)) != 0 && ret == 0) @@ -680,6 +723,9 @@ __env_close(dbenv, flags) if (dbenv->db_md_dir != NULL) __os_free(env, dbenv->db_md_dir); dbenv->db_md_dir = NULL; + if (dbenv->db_blob_dir != NULL) + __os_free(env, dbenv->db_blob_dir); + dbenv->db_blob_dir = NULL; if (dbenv->db_data_dir != NULL) { for (p = dbenv->db_data_dir; *p != NULL; ++p) __os_free(env, *p); @@ -761,9 +807,7 @@ __env_refresh(dbenv, orig_flags, rep_check) ret = t_ret; } - /* Discard the DB_ENV, ENV handle mutexes. */ - if ((t_ret = __mutex_free(env, &dbenv->mtx_db_env)) != 0 && ret == 0) - ret = t_ret; + /* Discard the ENV handle mutex. */ if ((t_ret = __mutex_free(env, &env->mtx_env)) != 0 && ret == 0) ret = t_ret; @@ -936,17 +980,38 @@ __file_handle_cleanup(env) ENV *env; { DB_FH *fhp; + DB_MPOOL *dbmp; + u_int i; - if (TAILQ_FIRST(&env->fdlist) == NULL) + if (TAILQ_EMPTY(&env->fdlist)) return (0); - __db_errx(env, DB_STR("1581", - "File handles still open at environment close")); + __db_errx(env, + DB_STR("1581", "File handles still open at environment close")); while ((fhp = TAILQ_FIRST(&env->fdlist)) != NULL) { - __db_errx(env, DB_STR_A("1582", "Open file handle: %s", "%s"), - fhp->name); - (void)__os_closehandle(env, fhp); + __db_errx(env, + DB_STR_A("1582", "Open file handle: %s", "%s"), fhp->name); + if (__os_closehandle(env, fhp) != 0) + break; } + if (env->lockfhp != NULL) + env->lockfhp = NULL; + /* Invalidate saved pointers to the regions' files: all are closed. */ + if (env->reginfo != NULL) + env->reginfo->fhp = NULL; + if (env->lg_handle != NULL) + env->lg_handle->reginfo.fhp = NULL; + if (env->lk_handle != NULL) + env->lk_handle->reginfo.fhp = NULL; +#ifdef HAVE_MUTEX_SUPPORT + if (env->mutex_handle != NULL) + env->mutex_handle->reginfo.fhp = NULL; +#endif + if (env->tx_handle != NULL) + env->tx_handle->reginfo.fhp = NULL; + if ((dbmp = env->mp_handle) != NULL && dbmp->reginfo != NULL) + for (i = 0; i < env->dbenv->mp_ncache; ++i) + dbmp->reginfo[i].fhp = NULL; return (EINVAL); } @@ -1109,11 +1174,9 @@ __env_attach_regions(dbenv, flags, orig_flags, retry_ok) goto err; /* - * Initialize the handle mutexes. + * Initialize the handle mutex. */ if ((ret = __mutex_alloc(env, - MTX_ENV_HANDLE, DB_MUTEX_PROCESS_ONLY, &dbenv->mtx_db_env)) != 0 || - (ret = __mutex_alloc(env, MTX_ENV_HANDLE, DB_MUTEX_PROCESS_ONLY, &env->mtx_env)) != 0) goto err; @@ -1125,8 +1188,15 @@ __env_attach_regions(dbenv, flags, orig_flags, retry_ok) goto err; rep_check = IS_ENV_REPLICATED(env) ? 1 : 0; - if (rep_check && (ret = __env_rep_enter(env, 0)) != 0) + if (rep_check && (ret = __env_rep_enter(env, 0)) != 0) { + /* + * If we get an error we didn't increment handle_cnt, + * so we don't want to decrement it later. Turn off + * rep_check here. + */ + rep_check = 0; goto err; + } if (LF_ISSET(DB_INIT_MPOOL)) { if ((ret = __memp_open(env, create_ok)) != 0) diff --git a/src/env/env_recover.c b/src/env/env_recover.c index 9636554a..fb7ddee7 100644 --- a/src/env/env_recover.c +++ b/src/env/env_recover.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -18,17 +18,15 @@ #include "dbinc/qam.h" #include "dbinc/txn.h" -#ifndef lint -static const char copyright[] = - "Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.\n"; -#endif - static int __db_log_corrupt __P((ENV *, DB_LSN *)); static int __env_init_rec_42 __P((ENV *)); static int __env_init_rec_43 __P((ENV *)); static int __env_init_rec_46 __P((ENV *)); static int __env_init_rec_47 __P((ENV *)); static int __env_init_rec_48 __P((ENV *)); +static int __env_init_rec_53 __P((ENV *)); +static int __env_init_rec_60 __P((ENV *)); +static int __env_init_rec_60p1 __P((ENV *)); static int __log_earliest __P((ENV *, DB_LOGC *, int32_t *, DB_LSN *)); static double __lsn_diff __P((DB_LSN *, DB_LSN *, DB_LSN *, u_int32_t, int)); @@ -632,6 +630,12 @@ err: if (logc != NULL && (t_ret = __logc_close(logc)) != 0 && ret == 0) dbenv->tx_timestamp = 0; + /* + * Failure means that the env has panicked. Disable locking so that the + * env can close without its mutexes calls causing additional panics. + */ + if (ret != 0) + F_SET(env->dbenv, DB_ENV_NOLOCKING); F_CLR(env->lg_handle, DBLOG_RECOVER); F_CLR(region, TXN_IN_RECOVERY); @@ -690,7 +694,8 @@ __lsn_diff(low, high, current, max, is_forward) * is trying to sync up with a master whose max LSN is less than this * client's max lsn; we want to roll back everything after that. * - * Find the latest checkpoint whose ckp_lsn is less than the max lsn. + * Find the latest checkpoint less than or equal to max lsn and + * return the ckp_lsn from that checkpoint. */ static int __log_backup(env, logc, max_lsn, start_lsn) @@ -713,10 +718,11 @@ __log_backup(env, logc, max_lsn, start_lsn) return (ret); /* * Follow checkpoints through the log until - * we find one with a ckp_lsn less than - * or equal max_lsn. + * we find one less than or equal max_lsn. + * Then return the ckp_lsn from that checkpoint as it + * is our earliest outstanding txn needed. */ - if (LOG_COMPARE(&ckp_args->ckp_lsn, max_lsn) <= 0) { + if (LOG_COMPARE(&lsn, max_lsn) <= 0) { *start_lsn = ckp_args->ckp_lsn; break; } @@ -727,7 +733,7 @@ __log_backup(env, logc, max_lsn, start_lsn) * done. Break with DB_NOTFOUND. */ if (IS_ZERO_LSN(lsn)) { - ret = DB_NOTFOUND; + ret = USR_ERR(env, DB_NOTFOUND); break; } __os_free(env, ckp_args); @@ -880,6 +886,9 @@ __db_log_corrupt(env, lsnp) /* * __env_init_rec -- * + * Install recover functions in the environment. Whenever this is updated, + * corresponding changes are needed by db_printlog's env_init_print(). + * * PUBLIC: int __env_init_rec __P((ENV *, u_int32_t)); */ int @@ -924,6 +933,29 @@ __env_init_rec(env, version) * oldest revision that applies must be used. Therefore we override * the recovery functions in reverse log version order. */ + if (version == DB_LOGVERSION) + goto done; + + /* DB_LOGVERSION_61 add the blob file id to the dbreg logs. */ + if (version > DB_LOGVERSION_60p1) + goto done; + if ((ret = __env_init_rec_60p1(env)) != 0) + goto err; + + /* + * DB_LOGVERSION_60p1 changed the two u_int32_t offset fields in the + * log for fop_write_file into a single int64. + */ + if (version > DB_LOGVERSION_60) + goto done; + if ((ret = __env_init_rec_60(env)) != 0) + goto err; + + /* DB_LOGVERSION_53 changed the heap addrem log record. */ + if (version > DB_LOGVERSION_53) + goto done; + if ((ret = __env_init_rec_53(env)) != 0) + goto err; /* * DB_LOGVERSION_53 is a strict superset of DB_LOGVERSION_50. * So, only check > DB_LOGVERSION_48p2. If/When log records are @@ -931,6 +963,8 @@ __env_init_rec(env, version) */ if (version > DB_LOGVERSION_48p2) goto done; + if (version >= DB_LOGVERSION_50) + goto done; if ((ret = __env_init_rec_48(env)) != 0) goto err; /* @@ -1091,3 +1125,77 @@ __env_init_rec_48(env) err: return (ret); } + +static int +__env_init_rec_53(env) + ENV *env; +{ + int ret; + +#ifdef HAVE_HEAP + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __heap_addrem_50_recover, DB___heap_addrem_50)) != 0) + goto err; +#else + COMPQUIET(env, NULL); + COMPQUIET(ret, 0); + goto err; +#endif +err: + return (ret); +} + +static int +__env_init_rec_60(env) + ENV *env; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __fop_create_60_recover, DB___fop_create_60)) != 0) + goto err; + + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __fop_remove_60_recover, DB___fop_remove_60)) != 0) + goto err; + + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __fop_rename_60_recover, DB___fop_rename_60)) != 0) + goto err; + + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __fop_rename_noundo_60_recover, DB___fop_rename_noundo_60)) != 0) + goto err; + + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __fop_file_remove_60_recover, DB___fop_file_remove_60)) != 0) + goto err; + + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __fop_write_60_recover, DB___fop_write_60)) != 0) + goto err; + + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __fop_write_file_60_recover, DB___fop_write_file_60)) != 0) + goto err; +err: + return (ret); +} + +static int +__env_init_rec_60p1(env) + ENV *env; +{ + int ret; + + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __dbreg_register_42_recover, DB___dbreg_register_42)) != 0) + goto err; +#ifdef HAVE_HEAP + if ((ret = __db_add_recovery_int(env, &env->recover_dtab, + __heap_addrem_60_recover, DB___heap_addrem_60)) != 0) + goto err; +#endif +err: + return (ret); +} diff --git a/src/env/env_region.c b/src/env/env_region.c index 113bea21..cf7085b7 100644 --- a/src/env/env_region.c +++ b/src/env/env_region.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -90,8 +90,11 @@ loop: renv = NULL; * it's actually a creation or not, and we'll have to fall-back to a * join if it's not a create. */ - if (F_ISSET(env, ENV_PRIVATE) || DB_GLOBAL(j_region_map) != NULL) + if (F_ISSET(env, ENV_PRIVATE) || DB_GLOBAL(j_region_map) != NULL) { + DB_DEBUG_MSG(env, "env_attach: creating %s", + F_ISSET(env, ENV_PRIVATE) ? "private" : "user map func"); goto creation; + } /* * Try to create the file, if we have the authority. We have to ensure @@ -179,14 +182,15 @@ loop: renv = NULL; * something in the region file other than meta-data and that * shouldn't happen. */ - if (size < sizeof(ref)) + if (size < sizeof(ref)) { + DB_DEBUG_MSG(env, "region size %d is too small", (int)size); goto retry; - else { + } else { if (size == sizeof(ref)) F_SET(env, ENV_SYSTEM_MEM); else if (F_ISSET(env, ENV_SYSTEM_MEM)) { - ret = EINVAL; + ret = USR_ERR(env, EINVAL); __db_err(env, ret, DB_STR_A("1535", "%s: existing environment not created in system memory", "%s"), infop->name); @@ -197,6 +201,7 @@ loop: renv = NULL; nrw < (size_t)sizeof(rbuf) || (ret = __os_seek(env, env->lockfhp, 0, 0, rbuf.region_off)) != 0) { + ret = USR_ERR(env, ret); __db_err(env, ret, DB_STR_A("1536", "%s: unable to read region info", "%s"), infop->name); @@ -207,7 +212,8 @@ loop: renv = NULL; if ((ret = __os_read(env, env->lockfhp, &ref, sizeof(ref), &nrw)) != 0 || nrw < (size_t)sizeof(ref)) { if (ret == 0) - ret = EIO; + ret = USR_ERR(env, EIO); + (void)USR_ERR(env, ret); __db_err(env, ret, DB_STR_A("1537", "%s: unable to read system-memory information", "%s"), infop->name); @@ -218,18 +224,16 @@ loop: renv = NULL; segid = ref.segid; } -#ifndef HAVE_MUTEX_FCNTL /* - * If we're not doing fcntl locking, we can close the file handle. We - * no longer need it and the less contact between the buffer cache and - * the VM, the better. + * We no longer need the file handle; the less contact between the + * buffer cache and the VM, the better. */ (void)__os_closehandle(env, env->lockfhp); env->lockfhp = NULL; -#endif /* Call the region join routine to acquire the region. */ memset(&tregion, 0, sizeof(tregion)); + tregion.type = REGION_TYPE_ENV; tregion.size = (roff_t)size; tregion.max = (roff_t)max; tregion.segid = segid; @@ -257,15 +261,15 @@ user_map_functions: "Program version %d.%d doesn't match environment version %d.%d", "%d %d %d %d"), DB_VERSION_MAJOR, DB_VERSION_MINOR, renv->majver, renv->minver); - ret = DB_VERSION_MISMATCH; + ret = USR_ERR(env, DB_VERSION_MISMATCH); } else - ret = EINVAL; + ret = USR_ERR(env, EINVAL); goto err; } if (renv->signature != signature) { __db_errx(env, DB_STR("1539", "Build signature doesn't match environment")); - ret = DB_VERSION_MISMATCH; + ret = USR_ERR(env, DB_VERSION_MISMATCH); goto err; } @@ -287,8 +291,16 @@ user_map_functions: ret = __env_panic_msg(env); goto err; } - if (renv->magic != DB_REGION_MAGIC) + if (renv->magic != DB_REGION_MAGIC) { + DB_DEBUG_MSG(env, + "attach sees bad region magic 0x%lx", (u_long)renv->magic); goto retry; + } + + if (dbenv->blob_threshold != 0 && + renv->blob_threshold != dbenv->blob_threshold) + __db_msg(env, DB_STR("1591", +"Warning: Ignoring blob_threshold size when joining environment")); /* * Get a reference to the underlying REGION information for this @@ -329,7 +341,7 @@ user_map_functions: if (*init_flagsp != 0) { __db_errx(env, DB_STR("1540", "configured environment flags incompatible with existing environment")); - ret = EINVAL; + ret = USR_ERR(env, EINVAL); goto err; } *init_flagsp = renv->init_flags; @@ -437,6 +449,8 @@ creation: renv->minver = (u_int32_t)minver; renv->patchver = (u_int32_t)patchver; renv->signature = signature; + renv->failure_panic = 0; + renv->failure_symptom[0] = '\0'; (void)time(&renv->timestamp); __os_unique_id(env, &renv->envid); @@ -447,6 +461,8 @@ creation: */ renv->init_flags = (init_flagsp == NULL) ? 0 : *init_flagsp; + renv->blob_threshold = dbenv->blob_threshold; + /* * Set up the region array. We use an array rather than a linked list * as we have to traverse this list after failure in some cases, and @@ -513,17 +529,14 @@ find_err: __db_errx(env, DB_STR_A("1544", } } -#ifndef HAVE_MUTEX_FCNTL /* - * If we're not doing fcntl locking, we can close the file handle. We - * no longer need it and the less contact between the buffer cache and - * the VM, the better. + * We no longer need the file handle and the less contact between the + * buffer cache and the VM, the better. */ if (env->lockfhp != NULL) { (void)__os_closehandle(env, env->lockfhp); env->lockfhp = NULL; } -#endif /* Everything looks good, we're done. */ env->reginfo = infop; @@ -550,7 +563,7 @@ retry: /* Close any open file handle. */ (void)__env_sys_detach(env, infop, F_ISSET(infop, REGION_CREATE)); - if (rp != NULL && F_ISSET(env, DB_PRIVATE)) + if (rp != NULL && F_ISSET(env, ENV_PRIVATE)) __env_alloc_free(infop, rp); } @@ -674,8 +687,23 @@ __env_panic_set(env, on) ENV *env; int on; { - if (env != NULL && env->reginfo != NULL) - ((REGENV *)env->reginfo->primary)->panic = on ? 1 : 0; + REGENV *renv; + + if (env != NULL && env->reginfo != NULL) { + /* + * Remember it in the process' env as well, so that the + * panic-ness is still known on exit from the final close. + */ + renv = env->reginfo->primary; + if (on) { + F_SET(env, ENV_REMEMBER_PANIC); + if (F_ISSET(env->dbenv, DB_ENV_FAILCHK)) + renv->failure_panic = 1; + } + else + F_CLR(env, ENV_REMEMBER_PANIC); + renv->panic = on ? 1 : 0; + } } /* @@ -775,6 +803,31 @@ __env_ref_get(dbenv, countp) } /* + * __env_region_cleanup -- + * Detach from any regions, e.g., when closing after a panic. + * + * PUBLIC: int __env_region_cleanup __P((ENV *)); + */ +int +__env_region_cleanup(env) + ENV *env; +{ + if (env->reginfo != NULL) { +#ifdef HAVE_MUTEX_SUPPORT + (void)__lock_region_detach(env, env->lk_handle); + (void)__mutex_region_detach(env, env->mutex_handle); +#endif + (void)__log_region_detach(env, env->lg_handle); + (void)__memp_region_detach(env, env->mp_handle); + (void)__txn_region_detach(env, env->tx_handle); + (void)__env_detach(env, 0); + /* Remember the panic state after detaching. */ + F_SET(env, ENV_REMEMBER_PANIC); + } + return (0); +} + +/* * __env_detach -- * Detach from the environment. * @@ -796,9 +849,7 @@ __env_detach(env, destroy) /* Close the locking file handle. */ if (env->lockfhp != NULL) { - if ((t_ret = - __os_closehandle(env, env->lockfhp)) != 0 && ret == 0) - ret = t_ret; + ret = __os_closehandle(env, env->lockfhp); env->lockfhp = NULL; } @@ -1249,13 +1300,13 @@ __env_sys_attach(env, infop, rp) __db_errx(env, DB_STR_A("1548", "region size %lu is too large; maximum is %lu", "%lu %lu"), (u_long)rp->size, (u_long)DB_REGIONSIZE_MAX); - return (EINVAL); + return (USR_ERR(env, EINVAL)); } if (rp->max > DB_REGIONSIZE_MAX) { __db_errx(env, DB_STR_A("1549", "region max %lu is too large; maximum is %lu", "%lu %lu"), (u_long)rp->max, (u_long)DB_REGIONSIZE_MAX); - return (EINVAL); + return (USR_ERR(env, EINVAL)); } #endif @@ -1281,7 +1332,7 @@ __env_sys_attach(env, infop, rp) "architecture does not support locks inside process-local (malloc) memory")); __db_errx(env, DB_STR("1551", "application may not specify both DB_PRIVATE and DB_THREAD")); - return (EINVAL); + return (USR_ERR(env, EINVAL)); } #endif if ((ret = __os_malloc( @@ -1310,7 +1361,7 @@ __env_sys_attach(env, infop, rp) "region memory was not correctly aligned")); (void)__env_sys_detach(env, infop, F_ISSET(infop, REGION_CREATE)); - return (EINVAL); + return (USR_ERR(env, EINVAL)); } return (0); @@ -1402,7 +1453,7 @@ __env_des_get(env, env_infop, infop, rpp) * the region, fail. The caller generates any error message. */ if (!F_ISSET(infop, REGION_CREATE_OK)) - return (ENOENT); + return (USR_ERR(env, ENOENT)); /* * If we didn't find a region and don't have room to create the region @@ -1411,7 +1462,7 @@ __env_des_get(env, env_infop, infop, rpp) if (empty_slot == NULL) { __db_errx(env, DB_STR("1553", "no room remaining for additional REGIONs")); - return (ENOENT); + return (USR_ERR(env, ENOENT)); } /* diff --git a/src/env/env_register.c b/src/env/env_register.c index 7475444d..731ddd1f 100644 --- a/src/env/env_register.c +++ b/src/env/env_register.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2004, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2004, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -30,6 +30,7 @@ static int __envreg_add __P((ENV *, int *, u_int32_t)); static int __envreg_pid_compare __P((const void *, const void *)); static int __envreg_create_active_pid __P((ENV *, char *)); +static int __envreg_add_active_pid __P((ENV*, char *)); /* * Support for portable, multi-process database environment locking, based on @@ -137,7 +138,7 @@ __envreg_register(env, need_recoveryp, flags) if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) __db_msg(env, DB_STR_A("1524", - "%lu: register environment", "%lu"), (u_long)pid); + "%lu: register environment", "%lu"), (u_long)pid); /* Build the path name and open the registry file. */ if ((ret = __db_appname(env, @@ -176,7 +177,6 @@ __envreg_register(env, need_recoveryp, flags) /* Register this process. */ if ((ret = __envreg_add(env, need_recoveryp, flags)) != 0) goto err; - /* * Release our exclusive lock if we don't need to run recovery. If * we need to run recovery, ENV->open will call back into register @@ -186,8 +186,7 @@ __envreg_register(env, need_recoveryp, flags) goto err; if (0) { -err: *need_recoveryp = 0; - +err: /* * !!! * Closing the file handle must release all of our locks. @@ -196,7 +195,6 @@ err: *need_recoveryp = 0; (void)__os_closehandle(env, dbenv->registry); dbenv->registry = NULL; } - if (pp != NULL) __os_free(env, pp); @@ -222,11 +220,11 @@ __envreg_add(env, need_recoveryp, flags) size_t nr, nw; u_int lcnt; u_int32_t bytes, mbytes, orig_flags; - int need_recovery, ret, t_ret; + int need_failchk, ret, t_ret; char *p, buf[PID_LEN + 10], pid_buf[PID_LEN + 10]; dbenv = env->dbenv; - need_recovery = 0; + need_failchk = t_ret = 0; COMPQUIET(dead, 0); COMPQUIET(p, NULL); ip = NULL; @@ -269,7 +267,7 @@ kill_all: /* * registering. */ if (nr != PID_LEN) { - need_recovery = 1; + need_failchk = 1; break; } @@ -299,7 +297,7 @@ kill_all: /* } #if DB_ENVREG_KILL_ALL - if (need_recovery) { + if (need_failchk) { pid = (pid_t)strtoul(buf, NULL, 10); (void)kill(pid, SIGKILL); @@ -318,7 +316,7 @@ kill_all: /* __db_msg(env, DB_STR_A("1530", "%02u: %s: FAILED", "%02u %s"), lcnt, p); - need_recovery = 1; + need_failchk = 1; dead = pos; #if DB_ENVREG_KILL_ALL goto kill_all; @@ -331,16 +329,27 @@ kill_all: /* "%02u: %s: LOCKED", "%02u %s"), lcnt, p); } + /* Check for a panic; if so there's no need to call failchk. */ + if (__env_attach(env, NULL, 0, 0) != 0) + goto sig_proc; + infop = env->reginfo; + renv = infop->primary; + *need_recoveryp = renv->panic != 0; + (void)__env_detach(env, 0); + if (*need_recoveryp) + return (0); + /* - * If we have to perform recovery... + * If we have to perform failchk... * * Mark all slots empty. Registry ignores empty slots we can't lock, * so it doesn't matter if any of the processes are in the middle of * exiting Berkeley DB -- they'll discard their lock when they exit. */ - if (need_recovery) { + if (need_failchk) { if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) - __db_msg(env, "%lu: recovery required", (u_long)pid); + __db_msg(env, + "%lu: failchk recovery required", (u_long)pid); if (LF_ISSET(DB_FAILCHK) || LF_ISSET(DB_FAILCHK_ISALIVE)) { if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) @@ -352,13 +361,14 @@ kill_all: /* env, pid_buf)) != 0) goto sig_proc; - /* The environment will already exist, so we do not + /* + * The environment will already exist, so we do not * want DB_CREATE set, nor do we want any recovery at * this point. No need to put values back as flags is * passed in by value. Save original dbenv flags in * case we need to recover/remove existing environment. * Set DB_ENV_FAILCHK before attach to help ensure we - * dont block on a mutex held by the dead process. + * don't block on a mutex held by the dead process. */ LF_CLR(DB_CREATE | DB_RECOVER | DB_RECOVER_FATAL); orig_flags = dbenv->flags; @@ -367,44 +377,53 @@ kill_all: /* if ((ret = __env_attach_regions( dbenv, flags, orig_flags, 0)) != 0) goto sig_proc; - if ((t_ret = - __env_set_state(env, &ip, THREAD_FAILCHK)) != 0 && - ret == 0) + if ((t_ret = __env_set_state(env, + &ip, THREAD_FAILCHK)) != 0 && ret == 0) ret = t_ret; - if ((t_ret = - __env_failchk_int(dbenv)) != 0 && ret == 0) + if (ret == 0 && (t_ret = __env_failchk_int(dbenv)) != 0) ret = t_ret; + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(env, + "%lu: failchk returned %d, ret is %d", + (u_long)pid, t_ret, ret); /* Free active pid array if used. */ if (LF_ISSET(DB_FAILCHK_ISALIVE)) { - DB_GLOBAL(num_active_pids) = 0; - DB_GLOBAL(size_active_pids) = 0; - __os_free( env, DB_GLOBAL(active_pids)); + env->num_active_pids = 0; + env->size_active_pids = 0; + __os_free(env, env->active_pids); + env->active_pids = NULL; } /* Detach from environment and deregister thread. */ - if ((t_ret = - __env_refresh(dbenv, orig_flags, 0)) != 0 && - ret == 0) + if ((t_ret = __env_refresh(dbenv, + orig_flags, 0)) != 0 && ret == 0) ret = t_ret; + F_CLR(env, ENV_OPEN_CALLED); + if (ret == 0) { if ((ret = __os_seek(env, dbenv->registry, - 0, 0,(u_int32_t)dead)) != 0 || + 0, 0, (u_int32_t)dead)) != 0 || (ret = __os_write(env, dbenv->registry, PID_EMPTY, PID_LEN, &nw)) != 0) return (ret); - need_recovery = 0; + need_failchk = 0; goto add; } } /* If we can't attach, then we cannot set DB_REGISTER panic. */ -sig_proc: if (__env_attach(env, NULL, 0, 0) == 0) { +sig_proc: + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(env, "%lu: sig_proc attaching errs %s/ret %s", + (u_long)pid, db_strerror(t_ret), db_strerror(ret)); + if (__env_attach(env, NULL, 0, 0) == 0) { infop = env->reginfo; renv = infop->primary; - /* Indicate DB_REGSITER panic. Also, set environment - * panic as this is the panic trigger mechanism in - * the code that everything looks for. + /* + * Indicate DB_REGISTER panic. Also, set (or re-set) + * environment panic as this is the panic trigger + * mechanism in the code that everything looks for. */ renv->reg_panic = 1; renv->panic = 1; @@ -484,7 +503,7 @@ add: if ((ret = __os_seek(env, dbenv->registry, 0, 0, 0)) != 0) } } - if (need_recovery) + if (need_failchk) *need_recoveryp = 1; return (ret); @@ -543,8 +562,9 @@ __envreg_unregister(env, recovery_failed) * also releasing our slot lock, we could race. That can't happen, I * don't think. */ -err: if ((t_ret = - __os_closehandle(env, dbenv->registry)) != 0 && ret == 0) +err: + if (dbenv->registry != NULL && + (t_ret = __os_closehandle(env, dbenv->registry)) != 0 && ret == 0) ret = t_ret; dbenv->registry = NULL; @@ -610,6 +630,10 @@ __envreg_isalive(dbenv, pid, tid, flags ) db_threadid_t tid; u_int32_t flags; { + ENV *env; + + env = dbenv->env; + /* in this case we really do not care about tid, simply for lint */ DB_THREADID_INIT(tid); @@ -617,15 +641,14 @@ __envreg_isalive(dbenv, pid, tid, flags ) if (!((flags == 0) || (flags == DB_MUTEX_PROCESS_ONLY))) return (EINVAL); - if (DB_GLOBAL(active_pids) == NULL || - DB_GLOBAL(num_active_pids) == 0 || dbenv == NULL) + if (env->active_pids == NULL || env->num_active_pids == 0) return (0); /* * bsearch returns a pointer to an entry in active_pids if a match * is found on pid, else no match found it returns NULL. This * routine will return a 1 if a match is found, else a 0. */ - if (bsearch(&pid, DB_GLOBAL(active_pids), DB_GLOBAL(num_active_pids), + if (bsearch(&pid, env->active_pids, env->num_active_pids, sizeof(pid_t), __envreg_pid_compare)) return 1; @@ -635,7 +658,8 @@ __envreg_isalive(dbenv, pid, tid, flags ) /* * __envreg_create_active_pid -- * Create array of pids, if need more room in array then double size. - * Only add active pids from DB_REGISTER file into array. + * Only add active pids from DB_REGISTER file into array. The given + * active my_pid is also added into array. */ static int __envreg_create_active_pid(env, my_pid) @@ -646,8 +670,7 @@ __envreg_create_active_pid(env, my_pid) char buf[PID_LEN + 10]; int ret; off_t pos; - pid_t pid, *tmparray; - size_t tmpsize, nr; + size_t nr; u_int lcnt; dbenv = env->dbenv; @@ -655,6 +678,15 @@ __envreg_create_active_pid(env, my_pid) ret = 0; /* + * The process getting here has not been added to the DB_REGISTER + * file yet, so include it as the first item in array + */ + if (env->num_active_pids == 0) { + if ((ret = __envreg_add_active_pid(env, my_pid)) != 0) + return (ret); + } + + /* * Walk through DB_REGISTER file, we grab pid entries that are locked * as those represent processes that are still alive. Ignore empty * slots, or those that are unlocked. @@ -678,53 +710,50 @@ __envreg_create_active_pid(env, my_pid) if ((ret = REGISTRY_UNLOCK(env, pos)) != 0) return (ret); } else { - /* first, check to make sure we have room in arrary */ - if (DB_GLOBAL(num_active_pids) + 1 > - DB_GLOBAL(size_active_pids)) { - tmpsize = - DB_GLOBAL(size_active_pids) * sizeof(pid_t); - - /* start with 512, then double if must grow */ - tmpsize = tmpsize>0 ? tmpsize*2 : 512; - if ((ret = __os_malloc - (env, tmpsize, &tmparray )) != 0) - return (ret); - - /* if array exists, then copy and free */ - if (DB_GLOBAL(active_pids)) { - memcpy( tmparray, - DB_GLOBAL(active_pids), - DB_GLOBAL(num_active_pids) * - sizeof(pid_t)); - __os_free( env, DB_GLOBAL(active_pids)); - } - - DB_GLOBAL(active_pids) = tmparray; - DB_GLOBAL(size_active_pids) = tmpsize; - - /* - * The process getting here has not been added - * to the DB_REGISTER file yet, so include it - * as the first item in array - */ - if (DB_GLOBAL(num_active_pids) == 0) { - pid = (pid_t)strtoul(my_pid, NULL, 10); - DB_GLOBAL(active_pids) - [DB_GLOBAL(num_active_pids)++] = pid; - } - } - - /* insert into array */ - pid = (pid_t)strtoul(buf, NULL, 10); - DB_GLOBAL(active_pids) - [DB_GLOBAL(num_active_pids)++] = pid; - + if ((ret = __envreg_add_active_pid(env, buf)) != 0) + return (ret); } } /* lets sort the array to allow for binary search in isalive func */ - qsort(DB_GLOBAL(active_pids), DB_GLOBAL(num_active_pids), + qsort(env->active_pids, env->num_active_pids, sizeof(pid_t), __envreg_pid_compare); return (ret); } + +/* + * __envreg_add_active_pid -- + * Add an active pid into array, if need more room in array then double size. + */ +static int +__envreg_add_active_pid(env, pid) + ENV *env; + char *pid; +{ + int ret; + size_t tmpsize; + + ret = 0; + + /* first, check to make sure we have room in arrary */ + if (env->num_active_pids + 1 > + env->size_active_pids) { + tmpsize = + env->size_active_pids * sizeof(pid_t); + + /* start with 512, then double if must grow */ + tmpsize = tmpsize > 0 ? tmpsize * 2 : 512; + if ((ret = __os_realloc + (env, tmpsize, &(env->active_pids) )) != 0) + return (ret); + + env->size_active_pids = tmpsize / sizeof(pid_t); + } + + /* insert into array */ + env->active_pids + [env->num_active_pids++] = (pid_t)strtoul(pid, NULL, 10); + + return (0); +} diff --git a/src/env/env_sig.c b/src/env/env_sig.c index 6d127f85..57e64228 100644 --- a/src/env/env_sig.c +++ b/src/env/env_sig.c @@ -28,9 +28,9 @@ * shared memory. */ #ifdef HAVE_MIXED_SIZE_ADDRESSING -#define __STRUCTURE_COUNT 41 +#define __STRUCTURE_COUNT 48 #else -#define __STRUCTURE_COUNT (41 + 104) +#define __STRUCTURE_COUNT (48 + 108) #endif /* @@ -66,7 +66,11 @@ __env_struct_sig() __ADD(__db_h_stat); __ADD(__db_heap_stat); __ADD(__db_qam_stat); +#ifdef HAVE_MUTEX_SUPPORT + __ADD(__mutex_state); +#endif __ADD(__db_thread_info); + __ADD(__env_thread_info); __ADD(__db_lockregion); __ADD(__sh_dbt); __ADD(__db_lockobj); @@ -82,6 +86,9 @@ __env_struct_sig() __ADD(__db_mutexregion); #endif #ifdef HAVE_MUTEX_SUPPORT + __ADD(__mutex_history); +#endif +#ifdef HAVE_MUTEX_SUPPORT __ADD(__db_mutex_t); #endif __ADD(__db_reg_env); @@ -92,6 +99,10 @@ __env_struct_sig() #ifndef HAVE_MIXED_SIZE_ADDRESSING __ADD(__db_dbt); +#ifdef HAVE_MUTEX_SUPPORT + __ADD(__db_event_mutex_died_info); +#endif + __ADD(__db_event_failchk_info); __ADD(__db_lockreq); __ADD(__db_log_cursor); __ADD(__log_rec_spec); @@ -113,6 +124,7 @@ __env_struct_sig() __ADD(__cq_fq); __ADD(__cq_aq); __ADD(__cq_jq); + __ADD(__db_stream); __ADD(__db_heap_rid); __ADD(__dbc); __ADD(__key_range); @@ -125,7 +137,6 @@ __env_struct_sig() __ADD(__fn); __ADD(__db_msgbuf); __ADD(__pin_list); - __ADD(__env_thread_info); __ADD(__flag_map); __ADD(__db_backup_handle); __ADD(__env); diff --git a/src/env/env_stat.c b/src/env/env_stat.c index 9bc3fe7e..094d0545 100644 --- a/src/env/env_stat.c +++ b/src/env/env_stat.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -21,11 +21,9 @@ static int __env_print_dbenv_all __P((ENV *, u_int32_t)); static int __env_print_env_all __P((ENV *, u_int32_t)); static int __env_print_fh __P((ENV *)); static int __env_print_stats __P((ENV *, u_int32_t)); -static int __env_print_thread __P((ENV *)); static int __env_stat_print __P((ENV *, u_int32_t)); static char *__env_thread_state_print __P((DB_THREAD_STATE)); -static const char * - __reg_type __P((reg_type_t)); +static const char * __reg_type __P((reg_type_t)); /* * __env_stat_print_pp -- @@ -146,7 +144,6 @@ __env_stat_print(env, flags) /* * __env_print_stats -- * Display the default environment statistics. - * */ static int __env_print_stats(env, flags) @@ -186,6 +183,10 @@ __env_print_stats(env, flags) (u_long)0, (u_long)0, (u_long)infop->rp->size); __db_dlbytes(env, "Maximum region size", (u_long)0, (u_long)0, (u_long)infop->rp->max); + STAT_LONG("Process failure detected", renv->failure_panic); + if (renv->failure_symptom[0] != '\0') + __db_msg(env, + "%s:\tFirst failure symptom", renv->failure_symptom); return (0); } @@ -267,8 +268,6 @@ __env_print_dbenv_all(env, flags) __db_msg(env, "%s", DB_GLOBAL(db_line)); STAT_POINTER("ENV", dbenv->env); - __mutex_print_debug_single( - env, "DB_ENV handle mutex", dbenv->mtx_db_env, flags); STAT_ISSET("Errcall", dbenv->db_errcall); STAT_ISSET("Errfile", dbenv->db_errfile); STAT_STRING("Errpfx", dbenv->db_errpfx); @@ -286,6 +285,7 @@ __env_print_dbenv_all(env, flags) STAT_ISSET("ThreadId", dbenv->thread_id); STAT_ISSET("ThreadIdString", dbenv->thread_id_string); + STAT_STRING("Blob dir", dbenv->db_blob_dir); STAT_STRING("Log dir", dbenv->db_log_dir); STAT_STRING("Metadata dir", dbenv->db_md_dir); STAT_STRING("Tmp dir", dbenv->db_tmp_dir); @@ -304,6 +304,8 @@ __env_print_dbenv_all(env, flags) STAT_ISSET("Password", dbenv->passwd); + STAT_ULONG("Blob threshold", dbenv->blob_threshold); + STAT_ISSET("App private", dbenv->app_private); STAT_ISSET("Api1 internal", dbenv->api1_internal); STAT_ISSET("Api2 internal", dbenv->api2_internal); @@ -314,6 +316,7 @@ __env_print_dbenv_all(env, flags) STAT_ULONG("Mutex cnt", dbenv->mutex_cnt); STAT_ULONG("Mutex inc", dbenv->mutex_inc); STAT_ULONG("Mutex tas spins", dbenv->mutex_tas_spins); + STAT_LONG("Mutex failchk timeout", dbenv->mutex_failchk_timeout); STAT_ISSET("Lock conflicts", dbenv->lk_conflicts); STAT_LONG("Lock modes", dbenv->lk_modes); @@ -356,6 +359,7 @@ __env_print_dbenv_all(env, flags) __db_prflags(env, NULL, dbenv->flags, db_env_fn, NULL, "\tPublic environment flags"); + COMPQUIET(flags, 0); return (0); } @@ -507,6 +511,8 @@ __env_thread_state_print(state) return ("blocked and dead"); case THREAD_OUT: return ("out"); + case THREAD_VERIFY: + return ("verify"); default: return ("unknown"); } @@ -516,14 +522,17 @@ __env_thread_state_print(state) /* * __env_print_thread -- * Display the thread block state. + * + * PUBLIC: int __env_print_thread __P((ENV *)); */ -static int +int __env_print_thread(env) ENV *env; { BH *bhp; DB_ENV *dbenv; DB_HASHTAB *htab; + DB_LOCKER *locker; DB_MPOOL *dbmp; DB_THREAD_INFO *ip; PIN_LIST *list, *lp; @@ -532,6 +541,7 @@ __env_print_thread(env) THREAD_INFO *thread; u_int32_t i; char buf[DB_THREADID_STRLEN]; + char time_buf[CTIME_BUFLEN]; dbenv = env->dbenv; @@ -561,6 +571,10 @@ __env_print_thread(env) dbenv->thread_id_string( dbenv, ip->dbth_pid, ip->dbth_tid, buf), __env_thread_state_print(ip->dbth_state)); + if (timespecisset(&ip->dbth_failtime)) + __db_msg(env, "Crashed at %s", + __db_ctimespec(&ip->dbth_failtime, + time_buf)); list = R_ADDR(env->reginfo, ip->dbth_pinlist); for (lp = list; lp < &list[ip->dbth_pinmax]; lp++) { if (lp->b_ref == INVALID_ROFF) @@ -570,6 +584,18 @@ __env_print_thread(env) __db_msg(env, "\t\tpins: %lu", (u_long)bhp->pgno); } + if (ip->dbth_local_locker != INVALID_ROFF) { + locker = (DB_LOCKER *) + R_ADDR(&env->lk_handle->reginfo, + ip->dbth_local_locker); + __db_msg(env, "\t\tcached locker %lx mtx %lu", + (u_long)locker->id, + (u_long)locker->mtx_locker); + + } +#ifdef HAVE_MUTEX_SUPPORT + (void)__mutex_record_print(env, ip); +#endif } return (0); } @@ -846,6 +872,7 @@ __reg_type(t) return ("Transaction"); case INVALID_REGION_TYPE: return ("Invalid"); + /*lint -e{787} */ } return ("Unknown"); } |