summaryrefslogtreecommitdiff
path: root/storage/tokudb/PerconaFT
diff options
context:
space:
mode:
Diffstat (limited to 'storage/tokudb/PerconaFT')
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-ops.cc23
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-ops.h5
-rw-r--r--storage/tokudb/PerconaFT/ft/logger/recover.cc3
-rw-r--r--storage/tokudb/PerconaFT/ft/node.cc18
-rw-r--r--storage/tokudb/PerconaFT/ft/node.h54
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc3
-rw-r--r--storage/tokudb/PerconaFT/ft/txn/roll.cc3
-rw-r--r--storage/tokudb/PerconaFT/util/dmt.h5
-rw-r--r--storage/tokudb/PerconaFT/util/omt.h2
9 files changed, 66 insertions, 50 deletions
diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc
index 30a8710d7aa..ad9ecb1d074 100644
--- a/storage/tokudb/PerconaFT/ft/ft-ops.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc
@@ -651,10 +651,8 @@ void toku_ftnode_clone_callback(void *value_data,
// set new pair attr if necessary
if (node->height == 0) {
*new_attr = make_ftnode_pair_attr(node);
- for (int i = 0; i < node->n_children; i++) {
- BLB(node, i)->logical_rows_delta = 0;
- BLB(cloned_node, i)->logical_rows_delta = 0;
- }
+ node->logical_rows_delta = 0;
+ cloned_node->logical_rows_delta = 0;
} else {
new_attr->is_valid = false;
}
@@ -702,6 +700,10 @@ void toku_ftnode_flush_callback(CACHEFILE UU(cachefile),
if (ftnode->height == 0) {
FT_STATUS_INC(FT_FULL_EVICTIONS_LEAF, 1);
FT_STATUS_INC(FT_FULL_EVICTIONS_LEAF_BYTES, node_size);
+ if (!ftnode->dirty) {
+ toku_ft_adjust_logical_row_count(
+ ft, -ftnode->logical_rows_delta);
+ }
} else {
FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF, 1);
FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF_BYTES, node_size);
@@ -714,11 +716,12 @@ void toku_ftnode_flush_callback(CACHEFILE UU(cachefile),
BASEMENTNODE bn = BLB(ftnode, i);
toku_ft_decrease_stats(&ft->in_memory_stats,
bn->stat64_delta);
- if (!ftnode->dirty)
- toku_ft_adjust_logical_row_count(
- ft, -bn->logical_rows_delta);
}
}
+ if (!ftnode->dirty) {
+ toku_ft_adjust_logical_row_count(
+ ft, -ftnode->logical_rows_delta);
+ }
}
}
toku_ftnode_free(&ftnode);
@@ -944,8 +947,6 @@ int toku_ftnode_pe_callback(void *ftnode_pv,
basements_to_destroy[num_basements_to_destroy++] = bn;
toku_ft_decrease_stats(&ft->in_memory_stats,
bn->stat64_delta);
- toku_ft_adjust_logical_row_count(ft,
- -bn->logical_rows_delta);
set_BNULL(node, i);
BP_STATE(node, i) = PT_ON_DISK;
num_partial_evictions++;
@@ -2652,7 +2653,7 @@ static std::unique_ptr<char[], decltype(&toku_free)> toku_file_get_parent_dir(
return result;
}
-static bool toku_create_subdirs_if_needed(const char *path) {
+bool toku_create_subdirs_if_needed(const char *path) {
static const mode_t dir_mode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP |
S_IWGRP | S_IXGRP | S_IROTH | S_IXOTH;
@@ -4563,6 +4564,8 @@ int toku_ft_rename_iname(DB_TXN *txn,
bs_new_name);
}
+ if (!toku_create_subdirs_if_needed(new_iname_full.get()))
+ return get_error_errno();
r = toku_os_rename(old_iname_full.get(), new_iname_full.get());
if (r != 0)
return r;
diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.h b/storage/tokudb/PerconaFT/ft/ft-ops.h
index 70cf045d43c..df8ffe287df 100644
--- a/storage/tokudb/PerconaFT/ft/ft-ops.h
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.h
@@ -288,3 +288,8 @@ void toku_ft_set_direct_io(bool direct_io_on);
void toku_ft_set_compress_buffers_before_eviction(bool compress_buffers);
void toku_note_deserialized_basement_node(bool fixed_key_size);
+
+// Creates all directories for the path if necessary,
+// returns true if all dirs are created successfully or
+// all dirs exist, false otherwise.
+bool toku_create_subdirs_if_needed(const char* path);
diff --git a/storage/tokudb/PerconaFT/ft/logger/recover.cc b/storage/tokudb/PerconaFT/ft/logger/recover.cc
index a9c30c0e37a..9eaa56bdc53 100644
--- a/storage/tokudb/PerconaFT/ft/logger/recover.cc
+++ b/storage/tokudb/PerconaFT/ft/logger/recover.cc
@@ -987,7 +987,8 @@ static int toku_recover_frename(struct logtype_frename *l, RECOVER_ENV renv) {
return 1;
if (old_exist && !new_exist &&
- (toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 ||
+ (!toku_create_subdirs_if_needed(new_iname_full.get()) ||
+ toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 ||
toku_fsync_directory(old_iname_full.get()) == -1 ||
toku_fsync_directory(new_iname_full.get()) == -1))
return 1;
diff --git a/storage/tokudb/PerconaFT/ft/node.cc b/storage/tokudb/PerconaFT/ft/node.cc
index 12e5fda226e..07309ff7f94 100644
--- a/storage/tokudb/PerconaFT/ft/node.cc
+++ b/storage/tokudb/PerconaFT/ft/node.cc
@@ -386,7 +386,8 @@ static void bnc_apply_messages_to_basement_node(
const pivot_bounds &
bounds, // contains pivot key bounds of this basement node
txn_gc_info *gc_info,
- bool *msgs_applied) {
+ bool *msgs_applied,
+ int64_t* logical_rows_delta) {
int r;
NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum);
@@ -394,7 +395,6 @@ static void bnc_apply_messages_to_basement_node(
// apply messages from this buffer
STAT64INFO_S stats_delta = {0, 0};
uint64_t workdone_this_ancestor = 0;
- int64_t logical_rows_delta = 0;
uint32_t stale_lbi, stale_ube;
if (!bn->stale_ancestor_messages_applied) {
@@ -470,7 +470,7 @@ static void bnc_apply_messages_to_basement_node(
gc_info,
&workdone_this_ancestor,
&stats_delta,
- &logical_rows_delta);
+ logical_rows_delta);
}
} else if (stale_lbi == stale_ube) {
// No stale messages to apply, we just apply fresh messages, and mark
@@ -482,7 +482,7 @@ static void bnc_apply_messages_to_basement_node(
.gc_info = gc_info,
.workdone = &workdone_this_ancestor,
.stats_to_update = &stats_delta,
- .logical_rows_delta = &logical_rows_delta};
+ .logical_rows_delta = logical_rows_delta};
if (fresh_ube - fresh_lbi > 0)
*msgs_applied = true;
r = bnc->fresh_message_tree
@@ -503,7 +503,7 @@ static void bnc_apply_messages_to_basement_node(
.gc_info = gc_info,
.workdone = &workdone_this_ancestor,
.stats_to_update = &stats_delta,
- .logical_rows_delta = &logical_rows_delta};
+ .logical_rows_delta = logical_rows_delta};
r = bnc->stale_message_tree
.iterate_on_range<struct iterate_do_bn_apply_msg_extra,
@@ -521,8 +521,6 @@ static void bnc_apply_messages_to_basement_node(
if (stats_delta.numbytes || stats_delta.numrows) {
toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
}
- toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
- bn->logical_rows_delta += logical_rows_delta;
}
static void
@@ -536,6 +534,7 @@ apply_ancestors_messages_to_bn(
bool* msgs_applied
)
{
+ int64_t logical_rows_delta = 0;
BASEMENTNODE curr_bn = BLB(node, childnum);
const pivot_bounds curr_bounds = bounds.next_bounds(node, childnum);
for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) {
@@ -548,13 +547,16 @@ apply_ancestors_messages_to_bn(
curr_ancestors->childnum,
curr_bounds,
gc_info,
- msgs_applied
+ msgs_applied,
+ &logical_rows_delta
);
// We don't want to check this ancestor node again if the
// next time we query it, the msn hasn't changed.
curr_bn->max_msn_applied = curr_ancestors->node->max_msn_applied_to_node_on_disk;
}
}
+ toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
+ node->logical_rows_delta += logical_rows_delta;
// At this point, we know all the stale messages above this
// basement node have been applied, and any new messages will be
// fresh, so we don't need to look at stale messages for this
diff --git a/storage/tokudb/PerconaFT/ft/node.h b/storage/tokudb/PerconaFT/ft/node.h
index 52eefec0936..db189e36d59 100644
--- a/storage/tokudb/PerconaFT/ft/node.h
+++ b/storage/tokudb/PerconaFT/ft/node.h
@@ -157,36 +157,49 @@ private:
// TODO: class me up
struct ftnode {
- MSN max_msn_applied_to_node_on_disk; // max_msn_applied that will be written to disk
+ // max_msn_applied that will be written to disk
+ MSN max_msn_applied_to_node_on_disk;
unsigned int flags;
- BLOCKNUM blocknum; // Which block number is this node?
- int layout_version; // What version of the data structure?
- int layout_version_original; // different (<) from layout_version if upgraded from a previous version (useful for debugging)
- int layout_version_read_from_disk; // transient, not serialized to disk, (useful for debugging)
- uint32_t build_id; // build_id (svn rev number) of software that wrote this node to disk
- int height; /* height is always >= 0. 0 for leaf, >0 for nonleaf. */
- int dirty;
+ // Which block number is this node?
+ BLOCKNUM blocknum;
+ // What version of the data structure?
+ int layout_version;
+ // different (<) from layout_version if upgraded from a previous version
+ // (useful for debugging)
+ int layout_version_original;
+ // transient, not serialized to disk, (useful for debugging)
+ int layout_version_read_from_disk;
+ // build_id (svn rev number) of software that wrote this node to disk
+ uint32_t build_id;
+ // height is always >= 0. 0 for leaf, >0 for nonleaf.
+ int height;
+ int dirty;
uint32_t fullhash;
+ // current count of rows add or removed as a result of message application
+ // to this node as a basement, irrelevant for internal nodes, gets reset
+ // when node is undirtied. Used to back out tree scoped LRC id node is
+ // evicted but not persisted
+ int64_t logical_rows_delta;
- // for internal nodes, if n_children==fanout+1 then the tree needs to be rebalanced.
- // for leaf nodes, represents number of basement nodes
+ // for internal nodes, if n_children==fanout+1 then the tree needs to be
+ // rebalanced. for leaf nodes, represents number of basement nodes
int n_children;
ftnode_pivot_keys pivotkeys;
- // What's the oldest referenced xid that this node knows about? The real oldest
- // referenced xid might be younger, but this is our best estimate. We use it
- // as a heuristic to transition provisional mvcc entries from provisional to
- // committed (from implicity committed to really committed).
+ // What's the oldest referenced xid that this node knows about? The real
+ // oldest referenced xid might be younger, but this is our best estimate.
+ // We use it as a heuristic to transition provisional mvcc entries from
+ // provisional to committed (from implicity committed to really committed).
//
- // A better heuristic would be the oldest live txnid, but we use this since it
- // still works well most of the time, and its readily available on the inject
- // code path.
+ // A better heuristic would be the oldest live txnid, but we use this since
+ // it still works well most of the time, and its readily available on the
+ // inject code path.
TXNID oldest_referenced_xid_known;
// array of size n_children, consisting of ftnode partitions
- // each one is associated with a child
- // for internal nodes, the ith partition corresponds to the ith message buffer
- // for leaf nodes, the ith partition corresponds to the ith basement node
+ // each one is associated with a child for internal nodes, the ith
+ // partition corresponds to the ith message buffer for leaf nodes, the ith
+ // partition corresponds to the ith basement node
struct ftnode_partition *bp;
struct ctpair *ct_pair;
};
@@ -199,7 +212,6 @@ struct ftnode_leaf_basement_node {
MSN max_msn_applied; // max message sequence number applied
bool stale_ancestor_messages_applied;
STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk
- int64_t logical_rows_delta;
};
typedef struct ftnode_leaf_basement_node *BASEMENTNODE;
diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
index 5914f8a1050..56876b474d4 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
@@ -996,7 +996,6 @@ BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) {
bn->seqinsert = orig_bn->seqinsert;
bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied;
bn->stat64_delta = orig_bn->stat64_delta;
- bn->logical_rows_delta = orig_bn->logical_rows_delta;
bn->data_buffer.clone(&orig_bn->data_buffer);
return bn;
}
@@ -1007,7 +1006,6 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) {
bn->seqinsert = 0;
bn->stale_ancestor_messages_applied = false;
bn->stat64_delta = ZEROSTATS;
- bn->logical_rows_delta = 0;
bn->data_buffer.init_zero();
return bn;
}
@@ -1432,6 +1430,7 @@ static FTNODE alloc_ftnode_for_deserialize(uint32_t fullhash, BLOCKNUM blocknum)
node->fullhash = fullhash;
node->blocknum = blocknum;
node->dirty = 0;
+ node->logical_rows_delta = 0;
node->bp = nullptr;
node->oldest_referenced_xid_known = TXNID_NONE;
return node;
diff --git a/storage/tokudb/PerconaFT/ft/txn/roll.cc b/storage/tokudb/PerconaFT/ft/txn/roll.cc
index 9f3977743a0..4f374d62173 100644
--- a/storage/tokudb/PerconaFT/ft/txn/roll.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/roll.cc
@@ -227,7 +227,8 @@ int toku_rollback_frename(BYTESTRING old_iname,
return 1;
if (!old_exist && new_exist &&
- (toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 ||
+ (!toku_create_subdirs_if_needed(old_iname_full.get()) ||
+ toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 ||
toku_fsync_directory(new_iname_full.get()) == -1 ||
toku_fsync_directory(old_iname_full.get()) == -1))
return 1;
diff --git a/storage/tokudb/PerconaFT/util/dmt.h b/storage/tokudb/PerconaFT/util/dmt.h
index 71cde8814ab..99be296d0e9 100644
--- a/storage/tokudb/PerconaFT/util/dmt.h
+++ b/storage/tokudb/PerconaFT/util/dmt.h
@@ -589,7 +589,6 @@ private:
void convert_from_tree_to_array(void);
- __attribute__((nonnull(2,5)))
void delete_internal(subtree *const subtreep, const uint32_t idx, subtree *const subtree_replace, subtree **const rebalance_subtree);
template<typename iterate_extra_t,
@@ -627,16 +626,12 @@ private:
__attribute__((nonnull))
void rebalance(subtree *const subtree);
- __attribute__((nonnull(3)))
static void copyout(uint32_t *const outlen, dmtdata_t *const out, const dmt_node *const n);
- __attribute__((nonnull(3)))
static void copyout(uint32_t *const outlen, dmtdata_t **const out, dmt_node *const n);
- __attribute__((nonnull(4)))
static void copyout(uint32_t *const outlen, dmtdata_t *const out, const uint32_t len, const dmtdata_t *const stored_value_ptr);
- __attribute__((nonnull(4)))
static void copyout(uint32_t *const outlen, dmtdata_t **const out, const uint32_t len, dmtdata_t *const stored_value_ptr);
template<typename dmtcmp_t,
diff --git a/storage/tokudb/PerconaFT/util/omt.h b/storage/tokudb/PerconaFT/util/omt.h
index 799ed0eae7c..c7ed2ca546f 100644
--- a/storage/tokudb/PerconaFT/util/omt.h
+++ b/storage/tokudb/PerconaFT/util/omt.h
@@ -284,7 +284,6 @@ public:
* By taking ownership of the array, we save a malloc and memcpy,
* and possibly a free (if the caller is done with the array).
*/
- __attribute__((nonnull))
void create_steal_sorted_array(omtdata_t **const values, const uint32_t numvalues, const uint32_t new_capacity);
/**
@@ -667,7 +666,6 @@ private:
void set_at_internal(const subtree &subtree, const omtdata_t &value, const uint32_t idx);
- __attribute__((nonnull(2,5)))
void delete_internal(subtree *const subtreep, const uint32_t idx, omt_node *const copyn, subtree **const rebalance_subtree);
template<typename iterate_extra_t,