summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOleksandr Byelkin <sanja@mariadb.com>2020-01-19 14:37:25 +0100
committerOleksandr Byelkin <sanja@mariadb.com>2020-01-19 14:37:25 +0100
commit7993f893b844d71183956ee30a6fb37fc10ae90c (patch)
tree7e2629c2830a3891fbf963a6874e195f2d9153ec
parent10eacd5ff71dc6cf04d0b7ee85d63f4393093a49 (diff)
parent6cb208107e6f3225ee6f0572a49a558c54a3bbe9 (diff)
downloadmariadb-git-7993f893b844d71183956ee30a6fb37fc10ae90c.tar.gz
Merge branch 'merge-tokudb-5.6' into 10.1
-rw-r--r--storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc6
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc10
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-flusher.cc44
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-internal.h21
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-ops.cc56
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-ops.h4
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-recount-rows.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-test-helpers.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-verify.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/ft.cc28
-rw-r--r--storage/tokudb/PerconaFT/ft/ft.h10
-rw-r--r--storage/tokudb/PerconaFT/ft/logger/logger.cc12
-rw-r--r--storage/tokudb/PerconaFT/ft/node.cc18
-rw-r--r--storage/tokudb/PerconaFT/ft/node.h21
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/block_table.cc4
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/ft-node-deserialize.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc2
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc12
-rw-r--r--storage/tokudb/PerconaFT/ft/txn/rollback.cc4
-rw-r--r--storage/tokudb/PerconaFT/ft/txn/txn.cc4
-rw-r--r--storage/tokudb/PerconaFT/portability/toku_instr_mysql.cc9
-rw-r--r--storage/tokudb/PerconaFT/src/ydb.cc58
-rw-r--r--storage/tokudb/PerconaFT/src/ydb_db.cc8
-rw-r--r--storage/tokudb/PerconaFT/src/ydb_db.h1
-rw-r--r--storage/tokudb/PerconaFT/tools/tokuftdump.cc2
-rw-r--r--storage/tokudb/ha_tokudb.cc4
-rw-r--r--storage/tokudb/tokudb_status.h4
27 files changed, 239 insertions, 111 deletions
diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
index d97d8762252..8e9856b4060 100644
--- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
+++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
@@ -224,6 +224,9 @@ uint32_t toku_get_checkpoint_period_unlocked (CACHETABLE ct) {
}
void toku_set_cleaner_period (CACHETABLE ct, uint32_t new_period) {
+ if(force_recovery) {
+ return;
+ }
ct->cl.set_period(new_period);
}
@@ -3025,9 +3028,12 @@ int toku_cleaner_thread (void *cleaner_v) {
//
ENSURE_POD(cleaner);
+extern uint force_recovery;
+
int cleaner::init(uint32_t _cleaner_iterations, pair_list* _pl, CACHETABLE _ct) {
// default is no cleaner, for now
m_cleaner_cron_init = false;
+ if (force_recovery) return 0;
int r = toku_minicron_setup(&m_cleaner_cron, 0, toku_cleaner_thread, this);
if (r == 0) {
m_cleaner_cron_init = true;
diff --git a/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc b/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc
index 35ba864b9ea..ab9802e88b0 100644
--- a/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc
@@ -72,7 +72,7 @@ cachetable_put_empty_node_with_dep_nodes(
enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes];
for (uint32_t i = 0; i < num_dependent_nodes; i++) {
dependent_pairs[i] = dependent_nodes[i]->ct_pair;
- dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty;
+ dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty();
}
toku_cachetable_put_with_dep_pairs(
@@ -252,7 +252,7 @@ toku_pin_ftnode_for_query(
// written out, it would have to be dirtied. That
// requires a write lock, and a write lock requires you to
// resolve checkpointing.
- if (!node->dirty) {
+ if (!node->dirty()) {
toku_ft_bn_update_max_msn(node, max_msn_in_path, bfe->child_to_read);
}
}
@@ -279,7 +279,7 @@ toku_pin_ftnode_with_dep_nodes(
enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes];
for (uint32_t i = 0; i < num_dependent_nodes; i++) {
dependent_pairs[i] = dependent_nodes[i]->ct_pair;
- dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty;
+ dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty();
}
int r = toku_cachetable_get_and_pin_with_dep_pairs(
@@ -332,7 +332,7 @@ cleanup:
void toku_unpin_ftnode(FT ft, FTNODE node) {
int r = toku_cachetable_unpin(ft->cf,
node->ct_pair,
- static_cast<enum cachetable_dirty>(node->dirty),
+ static_cast<enum cachetable_dirty>(node->dirty()),
make_ftnode_pair_attr(node));
invariant_zero(r);
}
@@ -343,7 +343,7 @@ toku_unpin_ftnode_read_only(FT ft, FTNODE node)
int r = toku_cachetable_unpin(
ft->cf,
node->ct_pair,
- (enum cachetable_dirty) node->dirty,
+ (enum cachetable_dirty) node->dirty(),
make_invalid_pair_attr()
);
assert(r==0);
diff --git a/storage/tokudb/PerconaFT/ft/ft-flusher.cc b/storage/tokudb/PerconaFT/ft/ft-flusher.cc
index e6452f60cfc..8e687d4ae58 100644
--- a/storage/tokudb/PerconaFT/ft/ft-flusher.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-flusher.cc
@@ -138,7 +138,7 @@ maybe_destroy_child_blbs(FTNODE node, FTNODE child, FT ft)
// up to date.
if (child->n_children > 1 &&
child->height == 0 &&
- !child->dirty) {
+ !child->dirty()) {
for (int i = 0; i < child->n_children; ++i) {
if (BP_STATE(child, i) == PT_AVAIL &&
node->max_msn_applied_to_node_on_disk.msn < BLB_MAX_MSN_APPLIED(child, i).msn)
@@ -479,7 +479,7 @@ handle_split_of_child(
}
)
- node->dirty = 1;
+ node->set_dirty();
XREALLOC_N(node->n_children+1, node->bp);
// Slide the children over.
@@ -661,8 +661,8 @@ static void ftnode_finalize_split(FTNODE node, FTNODE B, MSN max_msn_applied_to_
// The new node in the split inherits the oldest known reference xid
B->oldest_referenced_xid_known = node->oldest_referenced_xid_known;
- node->dirty = 1;
- B->dirty = 1;
+ node->set_dirty();
+ B->set_dirty();
}
void
@@ -1002,8 +1002,8 @@ flush_this_child(
paranoid_invariant(child->blocknum.b!=0);
// VERIFY_NODE does not work off client thread as of now
//VERIFY_NODE(t, child);
- node->dirty = 1;
- child->dirty = 1;
+ node->set_dirty();
+ child->set_dirty();
BP_WORKDONE(node, childnum) = 0; // this buffer is drained, no work has been done by its contents
NONLEAF_CHILDINFO bnc = BNC(node, childnum);
@@ -1033,8 +1033,8 @@ merge_leaf_nodes(FTNODE a, FTNODE b)
// TODO(leif): this is no longer the way in_memory_stats is
// maintained. verify that it's ok to move this just before the unpin
// and then do that.
- a->dirty = 1;
- b->dirty = 1;
+ a->set_dirty();
+ b->set_dirty();
bn_data* a_last_bd = BLB_DATA(a, a->n_children-1);
// this bool states if the last basement node in a has any items or not
@@ -1166,8 +1166,8 @@ maybe_merge_pinned_nonleaf_nodes(
a->n_children = new_n_children;
b->n_children = 0;
- a->dirty = 1;
- b->dirty = 1;
+ a->set_dirty();
+ b->set_dirty();
*did_merge = true;
*did_rebalance = false;
@@ -1210,7 +1210,7 @@ maybe_merge_pinned_nodes(
toku_ftnode_assert_fully_in_memory(parent);
toku_ftnode_assert_fully_in_memory(a);
toku_ftnode_assert_fully_in_memory(b);
- parent->dirty = 1; // just to make sure
+ parent->set_dirty(); // just to make sure
{
MSN msna = a->max_msn_applied_to_node_on_disk;
MSN msnb = b->max_msn_applied_to_node_on_disk;
@@ -1334,8 +1334,8 @@ ft_merge_child(
}
paranoid_invariant(BP_BLOCKNUM(node, childnuma).b == childa->blocknum.b);
- childa->dirty = 1; // just to make sure
- childb->dirty = 1; // just to make sure
+ childa->set_dirty(); // just to make sure
+ childb->set_dirty(); // just to make sure
} else {
// flow will be inaccurate for a while, oh well. the children
// are leaves in this case so it's not a huge deal (we're
@@ -1344,7 +1344,7 @@ ft_merge_child(
// If we didn't merge the nodes, then we need the correct pivot.
invariant_notnull(splitk.data);
node->pivotkeys.replace_at(&splitk, childnuma);
- node->dirty = 1;
+ node->set_dirty();
}
toku_destroy_dbt(&splitk);
}
@@ -1368,7 +1368,7 @@ ft_merge_child(
call_flusher_thread_callback(ft_flush_aflter_merge);
// unlock the parent
- paranoid_invariant(node->dirty);
+ paranoid_invariant(node->dirty());
toku_unpin_ftnode(ft, node);
}
else {
@@ -1376,7 +1376,7 @@ ft_merge_child(
call_flusher_thread_callback(ft_flush_aflter_rebalance);
// unlock the parent
- paranoid_invariant(node->dirty);
+ paranoid_invariant(node->dirty());
toku_unpin_ftnode(ft, node);
toku_unpin_ftnode(ft, childb);
}
@@ -1438,9 +1438,9 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
// only do the following work if there is a flush to perform
if (toku_bnc_n_entries(BNC(parent, childnum)) > 0 || parent->height == 1) {
- if (!parent->dirty) {
+ if (!parent->dirty()) {
dirtied++;
- parent->dirty = 1;
+ parent->set_dirty();
}
// detach buffer
BP_WORKDONE(parent, childnum) = 0; // this buffer is drained, no work has been done by its contents
@@ -1485,9 +1485,9 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
// in the buffer to flush, and as a result, flushing is not necessary
// and bnc is NULL
if (bnc != NULL) {
- if (!child->dirty) {
+ if (!child->dirty()) {
dirtied++;
- child->dirty = 1;
+ child->set_dirty();
}
// do the actual flush
toku_bnc_flush_to_child(
@@ -1786,7 +1786,7 @@ static void flush_node_fun(void *fe_v)
// read them back in, or just do the regular partial fetch. If we
// don't, that means fe->node is a parent, so we need to do this anyway.
bring_node_fully_into_memory(fe->node,fe->ft);
- fe->node->dirty = 1;
+ fe->node->set_dirty();
struct flusher_advice fa;
struct flush_status_update_extra fste;
@@ -1892,7 +1892,7 @@ void toku_ft_flush_node_on_background_thread(FT ft, FTNODE parent)
//
// can detach buffer and unpin root here
//
- parent->dirty = 1;
+ parent->set_dirty();
BP_WORKDONE(parent, childnum) = 0; // this buffer is drained, no work has been done by its contents
NONLEAF_CHILDINFO bnc = BNC(parent, childnum);
NONLEAF_CHILDINFO new_bnc = toku_create_empty_nl();
diff --git a/storage/tokudb/PerconaFT/ft/ft-internal.h b/storage/tokudb/PerconaFT/ft/ft-internal.h
index eec591d1744..130d3c302aa 100644
--- a/storage/tokudb/PerconaFT/ft/ft-internal.h
+++ b/storage/tokudb/PerconaFT/ft/ft-internal.h
@@ -76,11 +76,30 @@ enum ft_type {
FT_CHECKPOINT_INPROGRESS
};
+extern "C" {
+extern uint force_recovery;
+}
+
+extern int writing_rollback;
+
// The ft_header is not managed by the cachetable. Instead, it hangs off the cachefile as userdata.
struct ft_header {
enum ft_type type;
- int dirty;
+ int dirty_;
+
+ void set_dirty() {
+ if(force_recovery) assert(writing_rollback);
+ dirty_ = 1;
+ }
+
+ void clear_dirty() {
+ dirty_ = 0;
+ }
+
+ bool dirty() {
+ return dirty_;
+ }
// Free-running counter incremented once per checkpoint (toggling LSB).
// LSB indicates which header location is used on disk so this
diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc
index 6d39b08fe02..d2e92768dde 100644
--- a/storage/tokudb/PerconaFT/ft/ft-ops.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc
@@ -655,7 +655,7 @@ void toku_ftnode_clone_callback(void *value_data,
node->layout_version_read_from_disk;
cloned_node->build_id = node->build_id;
cloned_node->height = node->height;
- cloned_node->dirty = node->dirty;
+ cloned_node->dirty_ = node->dirty_;
cloned_node->fullhash = node->fullhash;
cloned_node->n_children = node->n_children;
@@ -671,8 +671,8 @@ void toku_ftnode_clone_callback(void *value_data,
toku_ftnode_clone_partitions(node, cloned_node);
// clear dirty bit
- node->dirty = 0;
- cloned_node->dirty = 0;
+ node->clear_dirty();
+ cloned_node->clear_dirty();
node->layout_version_read_from_disk = FT_LAYOUT_VERSION;
// set new pair attr if necessary
if (node->height == 0) {
@@ -741,7 +741,7 @@ void toku_ftnode_flush_callback(CACHEFILE UU(cachefile),
// persisted, we need undo the logical row count adjustments as
// they may occur again in the future if/when the node is
// re-read from disk for another query or change.
- if (!ftnode->dirty && !write_me) {
+ if (!ftnode->dirty() && !write_me) {
int64_t lrc_delta = 0;
for (int i = 0; i < ftnode->n_children; i++) {
if (BP_STATE(ftnode, i) == PT_AVAIL) {
@@ -846,8 +846,8 @@ int toku_ftnode_fetch_callback(CACHEFILE UU(cachefile),
if (r == 0) {
*sizep = make_ftnode_pair_attr(*node);
(*node)->ct_pair = p;
- *dirtyp = (*node)->dirty; // deserialize could mark the node as dirty
- // (presumably for upgrade)
+ *dirtyp = (*node)->dirty(); // deserialize could mark the node as dirty
+ // (presumably for upgrade)
}
return r;
}
@@ -869,7 +869,7 @@ void toku_ftnode_pe_est_callback(
paranoid_invariant(ftnode_pv != NULL);
long bytes_to_free = 0;
FTNODE node = static_cast<FTNODE>(ftnode_pv);
- if (node->dirty || node->height == 0 ||
+ if (node->dirty() || node->height == 0 ||
node->layout_version_read_from_disk < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) {
*bytes_freed_estimate = 0;
*cost = PE_CHEAP;
@@ -946,7 +946,7 @@ int toku_ftnode_pe_callback(void *ftnode_pv,
void *pointers_to_free[node->n_children * 2];
// Don't partially evict dirty nodes
- if (node->dirty) {
+ if (node->dirty()) {
goto exit;
}
// Don't partially evict nodes whose partitions can't be read back
@@ -1399,7 +1399,7 @@ ft_init_new_root(FT ft, FTNODE oldroot, FTNODE *newrootp)
MSN msna = oldroot->max_msn_applied_to_node_on_disk;
newroot->max_msn_applied_to_node_on_disk = msna;
BP_STATE(newroot,0) = PT_AVAIL;
- newroot->dirty = 1;
+ newroot->set_dirty();
// Set the first child to have the new blocknum,
// and then swap newroot with oldroot. The new root
@@ -1487,7 +1487,7 @@ static void inject_message_in_locked_node(
// mark the node as dirty.
// enforcing invariant here.
//
- paranoid_invariant(node->dirty != 0);
+ paranoid_invariant(node->dirty() != 0);
// update some status variables
if (node->height != 0) {
@@ -1847,7 +1847,7 @@ static void push_something_in_subtree(
}
}
- if (next_loc != NEITHER_EXTREME || child->dirty || toku_bnc_should_promote(ft, bnc)) {
+ if (next_loc != NEITHER_EXTREME || child->dirty() || toku_bnc_should_promote(ft, bnc)) {
push_something_in_subtree(ft, child, -1, msg, flow_deltas, gc_info, depth + 1, next_loc, false);
toku_sync_fetch_and_add(&bnc->flow[0], flow_deltas[0]);
// The recursive call unpinned the child, but
@@ -2802,9 +2802,9 @@ static int ft_create_file(FT_HANDLE UU(ft_handle), const char *fname, int *fdp)
}
// open a file for use by the ft. if the file does not exist, error
-static int ft_open_file(const char *fname, int *fdp) {
+static int ft_open_file(const char *fname, int *fdp, bool rw) {
int fd;
- fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode);
+ fd = ft_open_maybe_direct(fname, (rw ? O_RDWR : O_RDONLY) | O_BINARY, file_mode);
if (fd==-1) {
return get_error_errno();
}
@@ -2955,7 +2955,7 @@ toku_ft_handle_inherit_options(FT_HANDLE t, FT ft) {
// The checkpointed version (checkpoint_lsn) of the dictionary must be no later than max_acceptable_lsn .
// Requires: The multi-operation client lock must be held to prevent a checkpoint from occuring.
static int
-ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, FILENUM use_filenum, DICTIONARY_ID use_dictionary_id, LSN max_acceptable_lsn) {
+ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, FILENUM use_filenum, DICTIONARY_ID use_dictionary_id, LSN max_acceptable_lsn, bool open_rw = true) {
int r;
bool txn_created = false;
char *fname_in_cwd = NULL;
@@ -2977,7 +2977,7 @@ ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only
fname_in_cwd = toku_cachetable_get_fname_in_cwd(cachetable, fname_in_env);
{
int fd = -1;
- r = ft_open_file(fname_in_cwd, &fd);
+ r = ft_open_file(fname_in_cwd, &fd, open_rw);
if (reserved_filenum.fileid == FILENUM_NONE.fileid) {
reserved_filenum = toku_cachetable_reserve_filenum(cachetable);
}
@@ -3123,15 +3123,15 @@ toku_ft_handle_open_recovery(FT_HANDLE t, const char *fname_in_env, int is_creat
// Open an ft in normal use. The FILENUM and dict_id are assigned by the ft_handle_open() function.
// Requires: The multi-operation client lock must be held to prevent a checkpoint from occuring.
int
-toku_ft_handle_open(FT_HANDLE t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn) {
+toku_ft_handle_open(FT_HANDLE t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, bool open_rw) {
int r;
- r = ft_handle_open(t, fname_in_env, is_create, only_create, cachetable, txn, FILENUM_NONE, DICTIONARY_ID_NONE, MAX_LSN);
+ r = ft_handle_open(t, fname_in_env, is_create, only_create, cachetable, txn, FILENUM_NONE, DICTIONARY_ID_NONE, MAX_LSN, open_rw);
return r;
}
// clone an ft handle. the cloned handle has a new dict_id but refers to the same fractal tree
int
-toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn) {
+toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn, bool open_rw) {
FT_HANDLE result_ft_handle;
toku_ft_handle_create(&result_ft_handle);
@@ -3146,7 +3146,7 @@ toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN t
CACHEFILE cf = ft_handle->ft->cf;
CACHETABLE ct = toku_cachefile_get_cachetable(cf);
const char *fname_in_env = toku_cachefile_fname_in_env(cf);
- int r = toku_ft_handle_open(result_ft_handle, fname_in_env, false, false, ct, txn);
+ int r = toku_ft_handle_open(result_ft_handle, fname_in_env, false, false, ct, txn, open_rw);
if (r != 0) {
toku_ft_handle_close(result_ft_handle);
result_ft_handle = NULL;
@@ -3547,7 +3547,7 @@ unlock_ftnode_fun (void *v) {
int r = toku_cachetable_unpin_ct_prelocked_no_flush(
ft_handle->ft->cf,
node->ct_pair,
- (enum cachetable_dirty) node->dirty,
+ (enum cachetable_dirty) node->dirty(),
x->msgs_applied ? make_ftnode_pair_attr(node) : make_invalid_pair_attr()
);
assert_zero(r);
@@ -4969,6 +4969,14 @@ static void toku_pfs_keys_destroy(void) {
}
int toku_ft_layer_init(void) {
+ static bool ft_layer_init_started = false;
+
+ if(ft_layer_init_started) {
+ return 0;
+ }
+
+ ft_layer_init_started = true;
+
int r = 0;
// Portability must be initialized first
@@ -4999,6 +5007,14 @@ exit:
}
void toku_ft_layer_destroy(void) {
+ static bool ft_layer_destroy_started = false;
+
+ if(ft_layer_destroy_started) {
+ return;
+ }
+
+ ft_layer_destroy_started = true;
+
toku_mutex_destroy(&ft_open_close_lock);
toku_ft_serialize_layer_destroy();
toku_checkpoint_destroy();
diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.h b/storage/tokudb/PerconaFT/ft/ft-ops.h
index df8ffe287df..7b6d0634c37 100644
--- a/storage/tokudb/PerconaFT/ft/ft-ops.h
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.h
@@ -125,12 +125,12 @@ typedef int (*ft_update_func)(DB *db, const DBT *key, const DBT *old_val, const
void toku_ft_set_update(FT_HANDLE ft_h, ft_update_func update_fun);
int toku_ft_handle_open(FT_HANDLE, const char *fname_in_env,
- int is_create, int only_create, CACHETABLE ct, TOKUTXN txn) __attribute__ ((warn_unused_result));
+ int is_create, int only_create, CACHETABLE ct, TOKUTXN txn, bool open_rw=true) __attribute__ ((warn_unused_result));
int toku_ft_handle_open_recovery(FT_HANDLE, const char *fname_in_env, int is_create, int only_create, CACHETABLE ct, TOKUTXN txn,
FILENUM use_filenum, LSN max_acceptable_lsn) __attribute__ ((warn_unused_result));
// clone an ft handle. the cloned handle has a new dict_id but refers to the same fractal tree
-int toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn);
+int toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn, bool open_rw=true);
// close an ft handle during normal operation. the underlying ft may or may not close,
// depending if there are still references. an lsn for this close will come from the logger.
diff --git a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
index e31d80772d5..3b5501b66d3 100644
--- a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
@@ -98,7 +98,7 @@ int toku_ft_recount_rows(FT_HANDLE ft,
if (rre._cancelled == false) {
// update ft count
toku_unsafe_set(&ft->ft->in_memory_logical_rows, rre._keys);
- ft->ft->h->dirty = 1;
+ ft->ft->h->set_dirty();
ret = 0;
}
diff --git a/storage/tokudb/PerconaFT/ft/ft-test-helpers.cc b/storage/tokudb/PerconaFT/ft/ft-test-helpers.cc
index 930fb3013d2..8338a0777eb 100644
--- a/storage/tokudb/PerconaFT/ft/ft-test-helpers.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-test-helpers.cc
@@ -258,7 +258,7 @@ int toku_testsetup_insert_to_nonleaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, en
// is directly queueing something in a FIFO instead of
// using ft APIs.
node->max_msn_applied_to_node_on_disk = msn;
- node->dirty = 1;
+ node->set_dirty();
// Also hack max_msn_in_ft
ft_handle->ft->h->max_msn_in_ft = msn;
diff --git a/storage/tokudb/PerconaFT/ft/ft-verify.cc b/storage/tokudb/PerconaFT/ft/ft-verify.cc
index a2835f730eb..3819799c32f 100644
--- a/storage/tokudb/PerconaFT/ft/ft-verify.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-verify.cc
@@ -511,7 +511,7 @@ toku_verify_ft_with_progress (FT_HANDLE ft_handle, int (*progress_callback)(void
if (r == 0) {
toku_ft_lock(ft_handle->ft);
ft_handle->ft->h->time_of_last_verification = time(NULL);
- ft_handle->ft->h->dirty = 1;
+ ft_handle->ft->h->set_dirty();
toku_ft_unlock(ft_handle->ft);
}
return r;
diff --git a/storage/tokudb/PerconaFT/ft/ft.cc b/storage/tokudb/PerconaFT/ft/ft.cc
index 454bf11794f..5c9f27bf5ad 100644
--- a/storage/tokudb/PerconaFT/ft/ft.cc
+++ b/storage/tokudb/PerconaFT/ft/ft.cc
@@ -60,7 +60,7 @@ void toku_reset_root_xid_that_created(FT ft, TXNID new_root_xid_that_created) {
// (see cooperative use of dirty bit in ft_begin_checkpoint())
toku_ft_lock(ft);
ft->h->root_xid_that_created = new_root_xid_that_created;
- ft->h->dirty = 1;
+ ft->h->set_dirty();
toku_ft_unlock(ft);
}
@@ -146,7 +146,7 @@ static void ft_begin_checkpoint (LSN checkpoint_lsn, void *header_v) {
assert(ft->h->type == FT_CURRENT);
assert(ft->checkpoint_header == NULL);
ft_copy_for_checkpoint_unlocked(ft, checkpoint_lsn);
- ft->h->dirty = 0; // this is only place this bit is cleared (in currentheader)
+ ft->h->clear_dirty(); // this is only place this bit is cleared (in currentheader)
ft->blocktable.note_start_checkpoint_unlocked();
toku_ft_unlock (ft);
}
@@ -185,7 +185,7 @@ static void ft_checkpoint (CACHEFILE cf, int fd, void *header_v) {
FT_HEADER ch = ft->checkpoint_header;
assert(ch);
assert(ch->type == FT_CHECKPOINT_INPROGRESS);
- if (ch->dirty) { // this is only place this bit is tested (in checkpoint_header)
+ if (ch->dirty()) { // this is only place this bit is tested (in checkpoint_header)
TOKULOGGER logger = toku_cachefile_logger(cf);
if (logger) {
toku_logger_fsync_if_lsn_not_fsynced(logger, ch->checkpoint_lsn);
@@ -200,7 +200,7 @@ static void ft_checkpoint (CACHEFILE cf, int fd, void *header_v) {
// write translation and header to disk (or at least to OS internal buffer)
toku_serialize_ft_to(fd, ch, &ft->blocktable, ft->cf);
- ch->dirty = 0; // this is only place this bit is cleared (in checkpoint_header)
+ ch->clear_dirty(); // this is only place this bit is cleared (in checkpoint_header)
// fsync the cachefile
toku_cachefile_fsync(cf);
@@ -254,7 +254,7 @@ static void ft_close(CACHEFILE cachefile, int fd, void *header_v, bool oplsn_val
toku_log_fclose(
logger,
&lsn,
- ft->h->dirty,
+ ft->h->dirty(),
bs,
toku_cachefile_filenum(cachefile)); // flush the log on
// close (if new header
@@ -265,7 +265,7 @@ static void ft_close(CACHEFILE cachefile, int fd, void *header_v, bool oplsn_val
}
}
}
- if (ft->h->dirty) { // this is the only place this bit is tested (in currentheader)
+ if (ft->h->dirty()) { // this is the only place this bit is tested (in currentheader)
bool do_checkpoint = true;
if (logger && logger->rollback_cachefile == cachefile) {
do_checkpoint = false;
@@ -274,7 +274,7 @@ static void ft_close(CACHEFILE cachefile, int fd, void *header_v, bool oplsn_val
ft_begin_checkpoint(lsn, header_v);
ft_checkpoint(cachefile, fd, ft);
ft_end_checkpoint(cachefile, fd, header_v);
- assert(!ft->h->dirty); // dirty bit should be cleared by begin_checkpoint and never set again (because we're closing the dictionary)
+ assert(!ft->h->dirty()); // dirty bit should be cleared by begin_checkpoint and never set again (because we're closing the dictionary)
}
}
}
@@ -370,7 +370,7 @@ ft_header_create(FT_OPTIONS options, BLOCKNUM root_blocknum, TXNID root_xid_that
uint64_t now = (uint64_t) time(NULL);
struct ft_header h = {
.type = FT_CURRENT,
- .dirty = 0,
+ .dirty_ = 0,
.checkpoint_count = 0,
.checkpoint_lsn = ZERO_LSN,
.layout_version = FT_LAYOUT_VERSION,
@@ -521,7 +521,7 @@ toku_ft_note_hot_begin(FT_HANDLE ft_handle) {
toku_ft_lock(ft);
ft->h->time_of_last_optimize_begin = now;
ft->h->count_of_optimize_in_progress++;
- ft->h->dirty = 1;
+ ft->h->set_dirty();
toku_ft_unlock(ft);
}
@@ -545,7 +545,7 @@ toku_ft_note_hot_complete(FT_HANDLE ft_handle, bool success, MSN msn_at_start_of
if (ft->h->count_of_optimize_in_progress == ft->h->count_of_optimize_in_progress_read_from_disk)
ft->h->count_of_optimize_in_progress = 0;
}
- ft->h->dirty = 1;
+ ft->h->set_dirty();
toku_ft_unlock(ft);
}
@@ -958,7 +958,7 @@ void toku_ft_remove_reference(
void toku_ft_set_nodesize(FT ft, unsigned int nodesize) {
toku_ft_lock(ft);
ft->h->nodesize = nodesize;
- ft->h->dirty = 1;
+ ft->h->set_dirty();
toku_ft_unlock(ft);
}
@@ -971,7 +971,7 @@ void toku_ft_get_nodesize(FT ft, unsigned int *nodesize) {
void toku_ft_set_basementnodesize(FT ft, unsigned int basementnodesize) {
toku_ft_lock(ft);
ft->h->basementnodesize = basementnodesize;
- ft->h->dirty = 1;
+ ft->h->set_dirty();
toku_ft_unlock(ft);
}
@@ -984,7 +984,7 @@ void toku_ft_get_basementnodesize(FT ft, unsigned int *basementnodesize) {
void toku_ft_set_compression_method(FT ft, enum toku_compression_method method) {
toku_ft_lock(ft);
ft->h->compression_method = method;
- ft->h->dirty = 1;
+ ft->h->set_dirty();
toku_ft_unlock(ft);
}
@@ -997,7 +997,7 @@ void toku_ft_get_compression_method(FT ft, enum toku_compression_method *methodp
void toku_ft_set_fanout(FT ft, unsigned int fanout) {
toku_ft_lock(ft);
ft->h->fanout = fanout;
- ft->h->dirty = 1;
+ ft->h->set_dirty();
toku_ft_unlock(ft);
}
diff --git a/storage/tokudb/PerconaFT/ft/ft.h b/storage/tokudb/PerconaFT/ft/ft.h
index ff0b63b2b12..5c6caead978 100644
--- a/storage/tokudb/PerconaFT/ft/ft.h
+++ b/storage/tokudb/PerconaFT/ft/ft.h
@@ -184,11 +184,11 @@ void tokuft_update_product_name_strings(void);
extern char toku_product_name[TOKU_MAX_PRODUCT_NAME_LENGTH];
struct toku_product_name_strings_struct {
- char db_version[sizeof(toku_product_name) + sizeof("1.2.3 build ") + 256];
- char environmentdictionary[sizeof(toku_product_name) + sizeof(".environment")];
- char fileopsdirectory[sizeof(toku_product_name) + sizeof(".directory")];
- char single_process_lock[sizeof(toku_product_name) + sizeof("___lock_dont_delete_me")];
- char rollback_cachefile[sizeof(toku_product_name) + sizeof(".rollback")];
+ char db_version[sizeof(toku_product_name) + sizeof("1.2.3 build ") + 256 + 1];
+ char environmentdictionary[sizeof(toku_product_name) + sizeof(".environment") + 1];
+ char fileopsdirectory[sizeof(toku_product_name) + sizeof(".directory") + 1];
+ char single_process_lock[sizeof(toku_product_name) + sizeof("___lock_dont_delete_me") + 1];
+ char rollback_cachefile[sizeof(toku_product_name) + sizeof(".rollback") + 1];
};
extern struct toku_product_name_strings_struct toku_product_name_strings;
diff --git a/storage/tokudb/PerconaFT/ft/logger/logger.cc b/storage/tokudb/PerconaFT/ft/logger/logger.cc
index ddbbdcb25ab..5b2d1492cc9 100644
--- a/storage/tokudb/PerconaFT/ft/logger/logger.cc
+++ b/storage/tokudb/PerconaFT/ft/logger/logger.cc
@@ -49,6 +49,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "util/status.h"
+int writing_rollback = 0;
+
static const int log_format_version = TOKU_LOG_VERSION;
toku_instr_key *result_output_condition_lock_mutex_key;
@@ -231,6 +233,7 @@ void toku_logger_initialize_rollback_cache(TOKULOGGER logger, FT ft) {
}
int toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, bool create) {
+ writing_rollback++;
assert(logger->is_open);
assert(!logger->rollback_cachefile);
@@ -250,6 +253,7 @@ int toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, bool cre
} else {
toku_ft_handle_close(ft_handle);
}
+ writing_rollback--;
return r;
}
@@ -267,20 +271,20 @@ void toku_logger_close_rollback_check_empty(TOKULOGGER logger, bool clean_shutdo
FT CAST_FROM_VOIDP(ft, toku_cachefile_get_userdata(cf));
if (clean_shutdown) {
//Verify it is safe to close it.
- assert(!ft->h->dirty); //Must not be dirty.
+ assert(!ft->h->dirty()); //Must not be dirty.
ft->blocktable.free_unused_blocknums(ft->h->root_blocknum);
// Must have no data blocks (rollback logs or otherwise).
ft->blocktable.verify_no_data_blocks_except_root(ft->h->root_blocknum);
- assert(!ft->h->dirty);
+ assert(!ft->h->dirty());
} else {
- ft->h->dirty = 0;
+ ft->h->clear_dirty();
}
ft_to_close = toku_ft_get_only_existing_ft_handle(ft);
if (clean_shutdown) {
bool is_empty;
is_empty = toku_ft_is_empty_fast(ft_to_close);
assert(is_empty);
- assert(!ft->h->dirty); // it should not have been dirtied by the toku_ft_is_empty test.
+ assert(!ft->h->dirty()); // it should not have been dirtied by the toku_ft_is_empty test.
}
}
diff --git a/storage/tokudb/PerconaFT/ft/node.cc b/storage/tokudb/PerconaFT/ft/node.cc
index 27943496fbf..39a76c8615e 100644
--- a/storage/tokudb/PerconaFT/ft/node.cc
+++ b/storage/tokudb/PerconaFT/ft/node.cc
@@ -77,7 +77,7 @@ void toku_initialize_empty_ftnode(FTNODE n, BLOCKNUM blocknum, int height, int n
}
}
}
- n->dirty = 1; // special case exception, it's okay to mark as dirty because the basements are empty
+ n->set_dirty(); // special case exception, it's okay to mark as dirty because the basements are empty
toku_ft_status_note_ftnode(height, true);
}
@@ -153,7 +153,7 @@ void toku_ftnode_clone_partitions(FTNODE node, FTNODE cloned_node) {
void toku_evict_bn_from_memory(FTNODE node, int childnum, FT ft) {
// free the basement node
- assert(!node->dirty);
+ assert(!node->dirty());
BASEMENTNODE bn = BLB(node, childnum);
toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta);
toku_ft_adjust_logical_row_count(ft, -BLB_LRD(node, childnum));
@@ -595,7 +595,7 @@ toku_apply_ancestors_messages_to_node (
oldest_referenced_xid_for_simple_gc,
node->oldest_referenced_xid_known,
true);
- if (!node->dirty && child_to_read >= 0) {
+ if (!node->dirty() && child_to_read >= 0) {
paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL);
apply_ancestors_messages_to_bn(
t,
@@ -712,7 +712,7 @@ bool toku_ft_leaf_needs_ancestors_messages(
paranoid_invariant(node->height == 0);
bool needs_ancestors_messages = false;
// child_to_read may be -1 in test cases
- if (!node->dirty && child_to_read >= 0) {
+ if (!node->dirty() && child_to_read >= 0) {
paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL);
needs_ancestors_messages = bn_needs_ancestors_messages(
ft,
@@ -745,7 +745,7 @@ cleanup:
void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read) {
invariant(node->height == 0);
- if (!node->dirty && child_to_read >= 0) {
+ if (!node->dirty() && child_to_read >= 0) {
paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL);
BASEMENTNODE bn = BLB(node, child_to_read);
if (max_msn_applied.msn > bn->max_msn_applied.msn) {
@@ -832,7 +832,7 @@ struct rebalance_array_info {
void toku_ftnode_leaf_rebalance(FTNODE node, unsigned int basementnodesize) {
assert(node->height == 0);
- assert(node->dirty);
+ assert(node->dirty());
uint32_t num_orig_basements = node->n_children;
// Count number of leaf entries in this leaf (num_le).
@@ -1141,7 +1141,7 @@ void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey
invariant(childnum > 0);
node->pivotkeys.insert_at(pivotkey, childnum - 1);
}
- node->dirty = 1;
+ node->set_dirty();
}
void
@@ -1744,7 +1744,7 @@ static void ft_append_msg_to_child_buffer(const toku::comparator &cmp, FTNODE no
int childnum, const ft_msg &msg, bool is_fresh) {
paranoid_invariant(BP_STATE(node,childnum) == PT_AVAIL);
bnc_insert_msg(BNC(node, childnum), msg, is_fresh, cmp);
- node->dirty = 1;
+ node->set_dirty();
}
// This is only exported for tests.
@@ -2089,7 +2089,7 @@ void toku_ft_leaf_apply_msg(
// be reapplied later), we mark the node as dirty and
// take the opportunity to update node->max_msn_applied_to_node_on_disk.
//
- node->dirty = 1;
+ node->set_dirty();
//
// we cannot blindly update node->max_msn_applied_to_node_on_disk,
diff --git a/storage/tokudb/PerconaFT/ft/node.h b/storage/tokudb/PerconaFT/ft/node.h
index 05c8a44ebed..61093f3ed8d 100644
--- a/storage/tokudb/PerconaFT/ft/node.h
+++ b/storage/tokudb/PerconaFT/ft/node.h
@@ -155,6 +155,12 @@ private:
size_t _total_size;
};
+extern int writing_rollback;
+
+extern "C" {
+extern uint force_recovery;
+}
+
// TODO: class me up
struct ftnode {
// max_msn_applied that will be written to disk
@@ -173,9 +179,22 @@ struct ftnode {
uint32_t build_id;
// height is always >= 0. 0 for leaf, >0 for nonleaf.
int height;
- int dirty;
+ int dirty_;
uint32_t fullhash;
+ void set_dirty() {
+ if(force_recovery) assert(writing_rollback);
+ dirty_ = 1;
+ }
+
+ void clear_dirty() {
+ dirty_ = 0;
+ }
+
+ bool dirty() {
+ return dirty_;
+ }
+
// for internal nodes, if n_children==fanout+1 then the tree needs to be
// rebalanced. for leaf nodes, represents number of basement nodes
int n_children;
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
index 56d51f56915..c4c99844edf 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
@@ -195,9 +195,9 @@ static void ft_set_dirty(FT ft, bool for_checkpoint) {
invariant(ft->h->type == FT_CURRENT);
if (for_checkpoint) {
invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS);
- ft->checkpoint_header->dirty = 1;
+ ft->checkpoint_header->set_dirty();
} else {
- ft->h->dirty = 1;
+ ft->h->set_dirty();
}
}
diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft-node-deserialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft-node-deserialize.cc
index 02a9dfd085c..de58fb42a8b 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/ft-node-deserialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft-node-deserialize.cc
@@ -60,7 +60,7 @@ initialize_ftnode(FTNODE node, BLOCKNUM blocknum)
{
node->fullhash = 0xDEADBEEF; // <CER> Is this 'spoof' ok?
node->blocknum = blocknum;
- node->dirty = 0;
+ node->clear_dirty();
node->bp = NULL;
// <CER> Can we use this initialization as a correctness assert in
// a later function?
diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
index 0d6573972d7..0813855bf55 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
@@ -340,7 +340,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
{
struct ft_header h = {
.type = FT_CURRENT,
- .dirty = 0,
+ .dirty_ = 0,
.checkpoint_count = checkpoint_count,
.checkpoint_lsn = checkpoint_lsn,
.layout_version = FT_LAYOUT_VERSION,
diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
index 46bb8f81412..46f2e9600c5 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
@@ -827,7 +827,7 @@ int toku_serialize_ftnode_to(int fd,
node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);
toku_free(compressed_buf);
- node->dirty = 0; // See #1957. Must set the node to be clean after
+ node->clear_dirty(); // See #1957. Must set the node to be clean after
// serializing it so that it doesn't get written again on
// the next checkpoint or eviction.
if (node->height == 0) {
@@ -1544,7 +1544,7 @@ static FTNODE alloc_ftnode_for_deserialize(uint32_t fullhash, BLOCKNUM blocknum)
FTNODE XMALLOC(node);
node->fullhash = fullhash;
node->blocknum = blocknum;
- node->dirty = 0;
+ node->clear_dirty();
node->oldest_referenced_xid_known = TXNID_NONE;
node->bp = nullptr;
node->ct_pair = nullptr;
@@ -1951,7 +1951,7 @@ static int deserialize_and_upgrade_internal_node(FTNODE node,
// Assign the highest msn from our upgrade message buffers
node->max_msn_applied_to_node_on_disk = highest_msn;
// Since we assigned MSNs to this node's messages, we need to dirty it.
- node->dirty = 1;
+ node->set_dirty();
// Must compute the checksum now (rather than at the end, while we
// still have the pointer to the buffer).
@@ -2908,9 +2908,9 @@ int toku_serialize_rollback_log_to(int fd,
toku_free(compressed_buf);
if (!is_serialized) {
toku_static_serialized_rollback_log_destroy(&serialized_local);
- log->dirty = 0; // See #1957. Must set the node to be clean after
- // serializing it so that it doesn't get written again
- // on the next checkpoint or eviction.
+ log->dirty = false; // See #1957. Must set the node to be clean after
+ // serializing it so that it doesn't get written again
+ // on the next checkpoint or eviction.
}
return 0;
}
diff --git a/storage/tokudb/PerconaFT/ft/txn/rollback.cc b/storage/tokudb/PerconaFT/ft/txn/rollback.cc
index 0c793842f3c..105f980dc0d 100644
--- a/storage/tokudb/PerconaFT/ft/txn/rollback.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/rollback.cc
@@ -43,6 +43,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "ft/logger/log-internal.h"
#include "ft/txn/rollback-ct-callbacks.h"
+extern int writing_rollback;
+
static void rollback_unpin_remove_callback(CACHEKEY* cachekey, bool for_checkpoint, void* extra) {
FT CAST_FROM_VOIDP(ft, extra);
ft->blocktable.free_blocknum(cachekey, ft, for_checkpoint);
@@ -155,6 +157,7 @@ static void rollback_log_create (
ROLLBACK_LOG_NODE *result
)
{
+ writing_rollback++;
ROLLBACK_LOG_NODE XMALLOC(log);
rollback_empty_log_init(log);
@@ -169,6 +172,7 @@ static void rollback_log_create (
get_write_callbacks_for_rollback_log(ft),
toku_rollback_node_save_ct_pair);
txn->roll_info.current_rollback = log->blocknum;
+ writing_rollback --;
}
void toku_rollback_log_unpin(TOKUTXN txn, ROLLBACK_LOG_NODE log) {
diff --git a/storage/tokudb/PerconaFT/ft/txn/txn.cc b/storage/tokudb/PerconaFT/ft/txn/txn.cc
index 7327cbd9d24..7152833d88d 100644
--- a/storage/tokudb/PerconaFT/ft/txn/txn.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/txn.cc
@@ -723,7 +723,11 @@ time_t toku_txn_get_start_time(struct tokutxn *txn) {
return txn->start_time;
}
+extern uint force_recovery;
int toku_txn_reads_txnid(TXNID txnid, TOKUTXN txn, bool is_provisional UU()) {
+ if(force_recovery) {
+ return TOKUDB_ACCEPT;
+ }
int r = 0;
TXNID oldest_live_in_snapshot = toku_get_oldest_in_live_root_txn_list(txn);
if (oldest_live_in_snapshot == TXNID_NONE && txnid < txn->snapshot_txnid64) {
diff --git a/storage/tokudb/PerconaFT/portability/toku_instr_mysql.cc b/storage/tokudb/PerconaFT/portability/toku_instr_mysql.cc
index 786a6ef0546..0f287429542 100644
--- a/storage/tokudb/PerconaFT/portability/toku_instr_mysql.cc
+++ b/storage/tokudb/PerconaFT/portability/toku_instr_mysql.cc
@@ -359,7 +359,16 @@ void toku_instr_rwlock_wrlock_wait_end(
void toku_instr_rwlock_unlock(toku_pthread_rwlock_t &rwlock) {
if (rwlock.psi_rwlock)
+
+// Due to change introduced in e4148f2a22922687f7652c4e3d21a22da07c9e78
+// PSI rwlock version and interface changed
+// PSI_CURRENT_RWLOCK_VERSION is not defined in MySQL 5.6 and is defined
+// as 1 in 5.7 and < 8.0.17
+#if defined(PSI_CURRENT_RWLOCK_VERSION) && (PSI_CURRENT_RWLOCK_VERSION == 2)
+ PSI_RWLOCK_CALL(unlock_rwlock)(rwlock.psi_rwlock, PSI_RWLOCK_UNLOCK);
+#else
PSI_RWLOCK_CALL(unlock_rwlock)(rwlock.psi_rwlock);
+#endif
}
#endif // TOKU_MYSQL_WITH_PFS
diff --git a/storage/tokudb/PerconaFT/src/ydb.cc b/storage/tokudb/PerconaFT/src/ydb.cc
index 8dcbba361b9..4d549c0ac73 100644
--- a/storage/tokudb/PerconaFT/src/ydb.cc
+++ b/storage/tokudb/PerconaFT/src/ydb.cc
@@ -39,6 +39,9 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
extern const char *toku_patent_string;
const char *toku_copyright_string = "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.";
+
+extern int writing_rollback;
+
#include <db.h>
#include <errno.h>
#include <string.h>
@@ -87,6 +90,10 @@ const char *toku_copyright_string = "Copyright (c) 2006, 2015, Percona and/or it
int toku_close_trace_file (void) { return 0; }
#endif
+extern "C" {
+ uint force_recovery = 0;
+}
+
// Set when env is panicked, never cleared.
static int env_is_panicked = 0;
@@ -223,6 +230,9 @@ env_fs_redzone(DB_ENV *env, uint64_t total) {
// Check the available space in the file systems used by tokuft and erect barriers when available space gets low.
static int
env_fs_poller(void *arg) {
+ if(force_recovery == 6) {
+ return 0;
+ }
DB_ENV *env = (DB_ENV *) arg;
int r;
@@ -307,6 +317,9 @@ env_fs_init(DB_ENV *env) {
// Initialize the minicron that polls file system space
static int
env_fs_init_minicron(DB_ENV *env) {
+ if(force_recovery == 6) {
+ return 0;
+ }
int r = toku_minicron_setup(&env->i->fs_poller, env->i->fs_poll_time*1000, env_fs_poller, env);
if (r == 0)
env->i->fs_poller_is_init = true;
@@ -709,7 +722,7 @@ static int validate_env(DB_ENV *env,
}
// Test for fileops directory
- if (r == 0) {
+ if (r == 0 && force_recovery != 6) {
path = toku_construct_full_name(
2, env->i->dir, toku_product_name_strings.fileopsdirectory);
assert(path);
@@ -752,7 +765,7 @@ static int validate_env(DB_ENV *env,
}
// Test for recovery log
- if ((r == 0) && (env->i->open_flags & DB_INIT_LOG)) {
+ if ((r == 0) && (env->i->open_flags & DB_INIT_LOG) && force_recovery != 6) {
// if using transactions, test for existence of log
r = ydb_recover_log_exists(env); // return 0 or ENOENT
if (expect_newenv && (r != ENOENT))
@@ -813,6 +826,27 @@ unlock_single_process(DB_ENV *env) {
// (The set of necessary files is defined in the function validate_env() above.)
static int
env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) {
+
+ if(force_recovery == 6) {
+ {
+ const int len = strlen(toku_product_name_strings.rollback_cachefile);
+ toku_product_name_strings.rollback_cachefile[len] = '2';
+ toku_product_name_strings.rollback_cachefile[len+1] = 0;
+ }
+
+ {
+ const int len = strlen(toku_product_name_strings.single_process_lock);
+ toku_product_name_strings.single_process_lock[len] = '2';
+ toku_product_name_strings.single_process_lock[len+1] = 0;
+ }
+
+ {
+ const int len = strlen(toku_product_name_strings.environmentdictionary);
+ toku_product_name_strings.environmentdictionary[len] = '2';
+ toku_product_name_strings.environmentdictionary[len+1] = 0;
+ }
+ }
+
HANDLE_PANICKED_ENV(env);
int r;
bool newenv; // true iff creating a new environment
@@ -903,7 +937,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) {
bool need_rollback_cachefile;
need_rollback_cachefile = false;
- if (flags & (DB_INIT_TXN | DB_INIT_LOG)) {
+ if (flags & (DB_INIT_TXN | DB_INIT_LOG) && force_recovery != 6) {
need_rollback_cachefile = true;
}
@@ -916,7 +950,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) {
r = ydb_maybe_upgrade_env(env, &last_lsn_of_clean_shutdown_read_from_log, &upgrade_in_progress);
if (r!=0) goto cleanup;
- if (upgrade_in_progress) {
+ if (upgrade_in_progress || force_recovery == 6) {
// Delete old rollback file. There was a clean shutdown, so it has nothing useful,
// and there is no value in upgrading it. It is simpler to just create a new one.
char* rollback_filename = toku_construct_full_name(2, env->i->dir, toku_product_name_strings.rollback_cachefile);
@@ -934,9 +968,13 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) {
unused_flags &= ~DB_INIT_TXN & ~DB_INIT_LOG;
+ if(force_recovery == 6) {
+ flags |= DB_INIT_LOG | DB_INIT_TXN;
+ }
+
// do recovery only if there exists a log and recovery is requested
// otherwise, a log is created when the logger is opened later
- if (!newenv) {
+ if (!newenv && force_recovery == 0) {
if (flags & DB_INIT_LOG) {
// the log does exist
if (flags & DB_RECOVER) {
@@ -1005,7 +1043,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) {
assert (using_txns);
toku_logger_set_cachetable(env->i->logger, env->i->cachetable);
if (!toku_logger_rollback_is_open(env->i->logger)) {
- bool create_new_rollback_file = newenv | upgrade_in_progress;
+ bool create_new_rollback_file = newenv | upgrade_in_progress | (force_recovery == 6);
r = toku_logger_open_rollback(env->i->logger, env->i->cachetable, create_new_rollback_file);
if (r != 0) {
r = toku_ydb_do_error(env, r, "Cant open rollback\n");
@@ -1024,6 +1062,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) {
assert_zero(r);
r = toku_db_use_builtin_key_cmp(env->i->persistent_environment);
assert_zero(r);
+ writing_rollback++;
r = toku_db_open_iname(env->i->persistent_environment, txn, toku_product_name_strings.environmentdictionary, DB_CREATE, mode);
if (r != 0) {
r = toku_ydb_do_error(env, r, "Cant open persistent env\n");
@@ -1056,6 +1095,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) {
assert_zero(r);
}
capture_persistent_env_contents(env, txn);
+ writing_rollback--;
}
{
r = toku_db_create(&env->i->directory, env, 0);
@@ -1074,8 +1114,10 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) {
txn = NULL;
}
cp = toku_cachetable_get_checkpointer(env->i->cachetable);
- r = toku_checkpoint(cp, env->i->logger, NULL, NULL, NULL, NULL, STARTUP_CHECKPOINT);
- assert_zero(r);
+ if (!force_recovery) {
+ r = toku_checkpoint(cp, env->i->logger, NULL, NULL, NULL, NULL, STARTUP_CHECKPOINT);
+ }
+ writing_rollback--;
env_fs_poller(env); // get the file system state at startup
r = env_fs_init_minicron(env);
if (r != 0) {
diff --git a/storage/tokudb/PerconaFT/src/ydb_db.cc b/storage/tokudb/PerconaFT/src/ydb_db.cc
index 40c4a7f6577..ac44b8e7fd3 100644
--- a/storage/tokudb/PerconaFT/src/ydb_db.cc
+++ b/storage/tokudb/PerconaFT/src/ydb_db.cc
@@ -323,6 +323,7 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP
// DB_THREAD is implicitly supported and DB_BLACKHOLE is supported at the ft-layer
unused_flags &= ~DB_THREAD;
unused_flags &= ~DB_BLACKHOLE;
+ unused_flags &= ~DB_RDONLY;
// check for unknown or conflicting flags
if (unused_flags) return EINVAL; // unknown flags
@@ -404,7 +405,7 @@ int toku_db_lt_on_create_callback(toku::locktree *lt, void *extra) {
FT_HANDLE ft_handle = info->ft_handle;
FT_HANDLE cloned_ft_handle;
- r = toku_ft_handle_clone(&cloned_ft_handle, ft_handle, ttxn);
+ r = toku_ft_handle_clone(&cloned_ft_handle, ft_handle, ttxn, info->open_rw);
if (r == 0) {
assert(lt->get_userdata() == NULL);
lt->set_userdata(cloned_ft_handle);
@@ -465,6 +466,7 @@ int toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t
flags&=~DB_READ_COMMITTED;
flags&=~DB_SERIALIZABLE;
flags&=~DB_IS_HOT_INDEX;
+ flags&=~DB_RDONLY;
// unknown or conflicting flags are bad
int unknown_flags = flags & ~DB_THREAD;
unknown_flags &= ~DB_BLACKHOLE;
@@ -479,11 +481,12 @@ int toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t
db->i->open_flags = flags;
db->i->open_mode = mode;
+ bool open_rw = mode & (S_IWUSR | S_IWOTH | S_IWGRP);
FT_HANDLE ft_handle = db->i->ft_handle;
int r = toku_ft_handle_open(ft_handle, iname_in_env,
is_db_create, is_db_excl,
db->dbenv->i->cachetable,
- txn ? db_txn_struct_i(txn)->tokutxn : nullptr);
+ txn ? db_txn_struct_i(txn)->tokutxn : nullptr, open_rw);
if (r != 0) {
goto out;
}
@@ -505,6 +508,7 @@ int toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t
struct lt_on_create_callback_extra on_create_extra = {
.txn = txn,
.ft_handle = db->i->ft_handle,
+ open_rw
};
db->i->lt = db->dbenv->i->ltm.get_lt(db->i->dict_id,
toku_ft_get_comparator(db->i->ft_handle),
diff --git a/storage/tokudb/PerconaFT/src/ydb_db.h b/storage/tokudb/PerconaFT/src/ydb_db.h
index ab8fcd2a401..c260e9d0fbe 100644
--- a/storage/tokudb/PerconaFT/src/ydb_db.h
+++ b/storage/tokudb/PerconaFT/src/ydb_db.h
@@ -67,6 +67,7 @@ void ydb_db_layer_get_status(YDB_DB_LAYER_STATUS statp);
struct lt_on_create_callback_extra {
DB_TXN *txn;
FT_HANDLE ft_handle;
+ bool open_rw;
};
int toku_db_lt_on_create_callback(toku::locktree *lt, void *extra);
void toku_db_lt_on_destroy_callback(toku::locktree *lt);
diff --git a/storage/tokudb/PerconaFT/tools/tokuftdump.cc b/storage/tokudb/PerconaFT/tools/tokuftdump.cc
index 2838ae5182e..44edb15162a 100644
--- a/storage/tokudb/PerconaFT/tools/tokuftdump.cc
+++ b/storage/tokudb/PerconaFT/tools/tokuftdump.cc
@@ -181,7 +181,7 @@ static void dump_header(FT ft) {
printf(" time_of_creation= %" PRIu64 " %s\n", ft->h->time_of_creation, timestr);
format_time(ft->h->time_of_last_modification, timestr);
printf(" time_of_last_modification=%" PRIu64 " %s\n", ft->h->time_of_last_modification, timestr);
- printf(" dirty=%d\n", ft->h->dirty);
+ printf(" dirty=%d\n", ft->h->dirty());
printf(" checkpoint_count=%" PRId64 "\n", ft->h->checkpoint_count);
printf(" checkpoint_lsn=%" PRId64 "\n", ft->h->checkpoint_lsn.lsn);
printf(" nodesize=%u\n", ft->h->nodesize);
diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc
index a4dc9f6e326..39931e747ce 100644
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -1333,7 +1333,7 @@ int ha_tokudb::open_main_dictionary(
NULL,
DB_BTREE,
open_flags,
- 0);
+ S_IWUSR);
if (error) {
goto exit;
}
@@ -1396,7 +1396,7 @@ int ha_tokudb::open_secondary_dictionary(
}
- error = (*ptr)->open(*ptr, txn, newname, NULL, DB_BTREE, open_flags, 0);
+ error = (*ptr)->open(*ptr, txn, newname, NULL, DB_BTREE, open_flags, S_IWUSR);
if (error) {
my_errno = error;
goto cleanup;
diff --git a/storage/tokudb/tokudb_status.h b/storage/tokudb/tokudb_status.h
index 5cca54e52c9..07772bdc92a 100644
--- a/storage/tokudb/tokudb_status.h
+++ b/storage/tokudb/tokudb_status.h
@@ -201,7 +201,7 @@ int create(
name,
NULL,
DB_BTREE, DB_CREATE | DB_EXCL,
- 0);
+ S_IWUSR);
}
if (error == 0) {
*status_db_ptr = status_db;
@@ -230,7 +230,7 @@ int open(
NULL,
DB_BTREE,
DB_THREAD,
- 0);
+ S_IWUSR);
}
if (error == 0) {
uint32_t pagesize = 0;