diff options
Diffstat (limited to 'storage/tokudb/PerconaFT')
-rw-r--r-- | storage/tokudb/PerconaFT/ft/ft-ops.cc | 23 | ||||
-rw-r--r-- | storage/tokudb/PerconaFT/ft/ft-ops.h | 5 | ||||
-rw-r--r-- | storage/tokudb/PerconaFT/ft/logger/recover.cc | 3 | ||||
-rw-r--r-- | storage/tokudb/PerconaFT/ft/node.cc | 18 | ||||
-rw-r--r-- | storage/tokudb/PerconaFT/ft/node.h | 54 | ||||
-rw-r--r-- | storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc | 3 | ||||
-rw-r--r-- | storage/tokudb/PerconaFT/ft/txn/roll.cc | 3 | ||||
-rw-r--r-- | storage/tokudb/PerconaFT/util/dmt.h | 5 | ||||
-rw-r--r-- | storage/tokudb/PerconaFT/util/omt.h | 2 |
9 files changed, 66 insertions, 50 deletions
diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc index 30a8710d7aa..ad9ecb1d074 100644 --- a/storage/tokudb/PerconaFT/ft/ft-ops.cc +++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc @@ -651,10 +651,8 @@ void toku_ftnode_clone_callback(void *value_data, // set new pair attr if necessary if (node->height == 0) { *new_attr = make_ftnode_pair_attr(node); - for (int i = 0; i < node->n_children; i++) { - BLB(node, i)->logical_rows_delta = 0; - BLB(cloned_node, i)->logical_rows_delta = 0; - } + node->logical_rows_delta = 0; + cloned_node->logical_rows_delta = 0; } else { new_attr->is_valid = false; } @@ -702,6 +700,10 @@ void toku_ftnode_flush_callback(CACHEFILE UU(cachefile), if (ftnode->height == 0) { FT_STATUS_INC(FT_FULL_EVICTIONS_LEAF, 1); FT_STATUS_INC(FT_FULL_EVICTIONS_LEAF_BYTES, node_size); + if (!ftnode->dirty) { + toku_ft_adjust_logical_row_count( + ft, -ftnode->logical_rows_delta); + } } else { FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF, 1); FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF_BYTES, node_size); @@ -714,11 +716,12 @@ void toku_ftnode_flush_callback(CACHEFILE UU(cachefile), BASEMENTNODE bn = BLB(ftnode, i); toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta); - if (!ftnode->dirty) - toku_ft_adjust_logical_row_count( - ft, -bn->logical_rows_delta); } } + if (!ftnode->dirty) { + toku_ft_adjust_logical_row_count( + ft, -ftnode->logical_rows_delta); + } } } toku_ftnode_free(&ftnode); @@ -944,8 +947,6 @@ int toku_ftnode_pe_callback(void *ftnode_pv, basements_to_destroy[num_basements_to_destroy++] = bn; toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta); - toku_ft_adjust_logical_row_count(ft, - -bn->logical_rows_delta); set_BNULL(node, i); BP_STATE(node, i) = PT_ON_DISK; num_partial_evictions++; @@ -2652,7 +2653,7 @@ static std::unique_ptr<char[], decltype(&toku_free)> toku_file_get_parent_dir( return result; } -static bool toku_create_subdirs_if_needed(const char *path) { +bool toku_create_subdirs_if_needed(const char *path) { static const mode_t dir_mode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP | S_IROTH | S_IXOTH; @@ -4563,6 +4564,8 @@ int toku_ft_rename_iname(DB_TXN *txn, bs_new_name); } + if (!toku_create_subdirs_if_needed(new_iname_full.get())) + return get_error_errno(); r = toku_os_rename(old_iname_full.get(), new_iname_full.get()); if (r != 0) return r; diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.h b/storage/tokudb/PerconaFT/ft/ft-ops.h index 70cf045d43c..df8ffe287df 100644 --- a/storage/tokudb/PerconaFT/ft/ft-ops.h +++ b/storage/tokudb/PerconaFT/ft/ft-ops.h @@ -288,3 +288,8 @@ void toku_ft_set_direct_io(bool direct_io_on); void toku_ft_set_compress_buffers_before_eviction(bool compress_buffers); void toku_note_deserialized_basement_node(bool fixed_key_size); + +// Creates all directories for the path if necessary, +// returns true if all dirs are created successfully or +// all dirs exist, false otherwise. +bool toku_create_subdirs_if_needed(const char* path); diff --git a/storage/tokudb/PerconaFT/ft/logger/recover.cc b/storage/tokudb/PerconaFT/ft/logger/recover.cc index a9c30c0e37a..9eaa56bdc53 100644 --- a/storage/tokudb/PerconaFT/ft/logger/recover.cc +++ b/storage/tokudb/PerconaFT/ft/logger/recover.cc @@ -987,7 +987,8 @@ static int toku_recover_frename(struct logtype_frename *l, RECOVER_ENV renv) { return 1; if (old_exist && !new_exist && - (toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 || + (!toku_create_subdirs_if_needed(new_iname_full.get()) || + toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 || toku_fsync_directory(old_iname_full.get()) == -1 || toku_fsync_directory(new_iname_full.get()) == -1)) return 1; diff --git a/storage/tokudb/PerconaFT/ft/node.cc b/storage/tokudb/PerconaFT/ft/node.cc index 12e5fda226e..07309ff7f94 100644 --- a/storage/tokudb/PerconaFT/ft/node.cc +++ b/storage/tokudb/PerconaFT/ft/node.cc @@ -386,7 +386,8 @@ static void bnc_apply_messages_to_basement_node( const pivot_bounds & bounds, // contains pivot key bounds of this basement node txn_gc_info *gc_info, - bool *msgs_applied) { + bool *msgs_applied, + int64_t* logical_rows_delta) { int r; NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum); @@ -394,7 +395,6 @@ static void bnc_apply_messages_to_basement_node( // apply messages from this buffer STAT64INFO_S stats_delta = {0, 0}; uint64_t workdone_this_ancestor = 0; - int64_t logical_rows_delta = 0; uint32_t stale_lbi, stale_ube; if (!bn->stale_ancestor_messages_applied) { @@ -470,7 +470,7 @@ static void bnc_apply_messages_to_basement_node( gc_info, &workdone_this_ancestor, &stats_delta, - &logical_rows_delta); + logical_rows_delta); } } else if (stale_lbi == stale_ube) { // No stale messages to apply, we just apply fresh messages, and mark @@ -482,7 +482,7 @@ static void bnc_apply_messages_to_basement_node( .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta, - .logical_rows_delta = &logical_rows_delta}; + .logical_rows_delta = logical_rows_delta}; if (fresh_ube - fresh_lbi > 0) *msgs_applied = true; r = bnc->fresh_message_tree @@ -503,7 +503,7 @@ static void bnc_apply_messages_to_basement_node( .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta, - .logical_rows_delta = &logical_rows_delta}; + .logical_rows_delta = logical_rows_delta}; r = bnc->stale_message_tree .iterate_on_range<struct iterate_do_bn_apply_msg_extra, @@ -521,8 +521,6 @@ static void bnc_apply_messages_to_basement_node( if (stats_delta.numbytes || stats_delta.numrows) { toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta); } - toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta); - bn->logical_rows_delta += logical_rows_delta; } static void @@ -536,6 +534,7 @@ apply_ancestors_messages_to_bn( bool* msgs_applied ) { + int64_t logical_rows_delta = 0; BASEMENTNODE curr_bn = BLB(node, childnum); const pivot_bounds curr_bounds = bounds.next_bounds(node, childnum); for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) { @@ -548,13 +547,16 @@ apply_ancestors_messages_to_bn( curr_ancestors->childnum, curr_bounds, gc_info, - msgs_applied + msgs_applied, + &logical_rows_delta ); // We don't want to check this ancestor node again if the // next time we query it, the msn hasn't changed. curr_bn->max_msn_applied = curr_ancestors->node->max_msn_applied_to_node_on_disk; } } + toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta); + node->logical_rows_delta += logical_rows_delta; // At this point, we know all the stale messages above this // basement node have been applied, and any new messages will be // fresh, so we don't need to look at stale messages for this diff --git a/storage/tokudb/PerconaFT/ft/node.h b/storage/tokudb/PerconaFT/ft/node.h index 52eefec0936..db189e36d59 100644 --- a/storage/tokudb/PerconaFT/ft/node.h +++ b/storage/tokudb/PerconaFT/ft/node.h @@ -157,36 +157,49 @@ private: // TODO: class me up struct ftnode { - MSN max_msn_applied_to_node_on_disk; // max_msn_applied that will be written to disk + // max_msn_applied that will be written to disk + MSN max_msn_applied_to_node_on_disk; unsigned int flags; - BLOCKNUM blocknum; // Which block number is this node? - int layout_version; // What version of the data structure? - int layout_version_original; // different (<) from layout_version if upgraded from a previous version (useful for debugging) - int layout_version_read_from_disk; // transient, not serialized to disk, (useful for debugging) - uint32_t build_id; // build_id (svn rev number) of software that wrote this node to disk - int height; /* height is always >= 0. 0 for leaf, >0 for nonleaf. */ - int dirty; + // Which block number is this node? + BLOCKNUM blocknum; + // What version of the data structure? + int layout_version; + // different (<) from layout_version if upgraded from a previous version + // (useful for debugging) + int layout_version_original; + // transient, not serialized to disk, (useful for debugging) + int layout_version_read_from_disk; + // build_id (svn rev number) of software that wrote this node to disk + uint32_t build_id; + // height is always >= 0. 0 for leaf, >0 for nonleaf. + int height; + int dirty; uint32_t fullhash; + // current count of rows add or removed as a result of message application + // to this node as a basement, irrelevant for internal nodes, gets reset + // when node is undirtied. Used to back out tree scoped LRC id node is + // evicted but not persisted + int64_t logical_rows_delta; - // for internal nodes, if n_children==fanout+1 then the tree needs to be rebalanced. - // for leaf nodes, represents number of basement nodes + // for internal nodes, if n_children==fanout+1 then the tree needs to be + // rebalanced. for leaf nodes, represents number of basement nodes int n_children; ftnode_pivot_keys pivotkeys; - // What's the oldest referenced xid that this node knows about? The real oldest - // referenced xid might be younger, but this is our best estimate. We use it - // as a heuristic to transition provisional mvcc entries from provisional to - // committed (from implicity committed to really committed). + // What's the oldest referenced xid that this node knows about? The real + // oldest referenced xid might be younger, but this is our best estimate. + // We use it as a heuristic to transition provisional mvcc entries from + // provisional to committed (from implicity committed to really committed). // - // A better heuristic would be the oldest live txnid, but we use this since it - // still works well most of the time, and its readily available on the inject - // code path. + // A better heuristic would be the oldest live txnid, but we use this since + // it still works well most of the time, and its readily available on the + // inject code path. TXNID oldest_referenced_xid_known; // array of size n_children, consisting of ftnode partitions - // each one is associated with a child - // for internal nodes, the ith partition corresponds to the ith message buffer - // for leaf nodes, the ith partition corresponds to the ith basement node + // each one is associated with a child for internal nodes, the ith + // partition corresponds to the ith message buffer for leaf nodes, the ith + // partition corresponds to the ith basement node struct ftnode_partition *bp; struct ctpair *ct_pair; }; @@ -199,7 +212,6 @@ struct ftnode_leaf_basement_node { MSN max_msn_applied; // max message sequence number applied bool stale_ancestor_messages_applied; STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk - int64_t logical_rows_delta; }; typedef struct ftnode_leaf_basement_node *BASEMENTNODE; diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc index 5914f8a1050..56876b474d4 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc @@ -996,7 +996,6 @@ BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) { bn->seqinsert = orig_bn->seqinsert; bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied; bn->stat64_delta = orig_bn->stat64_delta; - bn->logical_rows_delta = orig_bn->logical_rows_delta; bn->data_buffer.clone(&orig_bn->data_buffer); return bn; } @@ -1007,7 +1006,6 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) { bn->seqinsert = 0; bn->stale_ancestor_messages_applied = false; bn->stat64_delta = ZEROSTATS; - bn->logical_rows_delta = 0; bn->data_buffer.init_zero(); return bn; } @@ -1432,6 +1430,7 @@ static FTNODE alloc_ftnode_for_deserialize(uint32_t fullhash, BLOCKNUM blocknum) node->fullhash = fullhash; node->blocknum = blocknum; node->dirty = 0; + node->logical_rows_delta = 0; node->bp = nullptr; node->oldest_referenced_xid_known = TXNID_NONE; return node; diff --git a/storage/tokudb/PerconaFT/ft/txn/roll.cc b/storage/tokudb/PerconaFT/ft/txn/roll.cc index 9f3977743a0..4f374d62173 100644 --- a/storage/tokudb/PerconaFT/ft/txn/roll.cc +++ b/storage/tokudb/PerconaFT/ft/txn/roll.cc @@ -227,7 +227,8 @@ int toku_rollback_frename(BYTESTRING old_iname, return 1; if (!old_exist && new_exist && - (toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 || + (!toku_create_subdirs_if_needed(old_iname_full.get()) || + toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 || toku_fsync_directory(new_iname_full.get()) == -1 || toku_fsync_directory(old_iname_full.get()) == -1)) return 1; diff --git a/storage/tokudb/PerconaFT/util/dmt.h b/storage/tokudb/PerconaFT/util/dmt.h index 71cde8814ab..99be296d0e9 100644 --- a/storage/tokudb/PerconaFT/util/dmt.h +++ b/storage/tokudb/PerconaFT/util/dmt.h @@ -589,7 +589,6 @@ private: void convert_from_tree_to_array(void); - __attribute__((nonnull(2,5))) void delete_internal(subtree *const subtreep, const uint32_t idx, subtree *const subtree_replace, subtree **const rebalance_subtree); template<typename iterate_extra_t, @@ -627,16 +626,12 @@ private: __attribute__((nonnull)) void rebalance(subtree *const subtree); - __attribute__((nonnull(3))) static void copyout(uint32_t *const outlen, dmtdata_t *const out, const dmt_node *const n); - __attribute__((nonnull(3))) static void copyout(uint32_t *const outlen, dmtdata_t **const out, dmt_node *const n); - __attribute__((nonnull(4))) static void copyout(uint32_t *const outlen, dmtdata_t *const out, const uint32_t len, const dmtdata_t *const stored_value_ptr); - __attribute__((nonnull(4))) static void copyout(uint32_t *const outlen, dmtdata_t **const out, const uint32_t len, dmtdata_t *const stored_value_ptr); template<typename dmtcmp_t, diff --git a/storage/tokudb/PerconaFT/util/omt.h b/storage/tokudb/PerconaFT/util/omt.h index 799ed0eae7c..c7ed2ca546f 100644 --- a/storage/tokudb/PerconaFT/util/omt.h +++ b/storage/tokudb/PerconaFT/util/omt.h @@ -284,7 +284,6 @@ public: * By taking ownership of the array, we save a malloc and memcpy, * and possibly a free (if the caller is done with the array). */ - __attribute__((nonnull)) void create_steal_sorted_array(omtdata_t **const values, const uint32_t numvalues, const uint32_t new_capacity); /** @@ -667,7 +666,6 @@ private: void set_at_internal(const subtree &subtree, const omtdata_t &value, const uint32_t idx); - __attribute__((nonnull(2,5))) void delete_internal(subtree *const subtreep, const uint32_t idx, omt_node *const copyn, subtree **const rebalance_subtree); template<typename iterate_extra_t, |