summaryrefslogtreecommitdiff
path: root/storage/tokudb/PerconaFT
diff options
context:
space:
mode:
Diffstat (limited to 'storage/tokudb/PerconaFT')
-rw-r--r--storage/tokudb/PerconaFT/buildheader/make_tdb.cc3
-rw-r--r--storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h2
-rw-r--r--storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc16
-rw-r--r--storage/tokudb/PerconaFT/ft/cachetable/cachetable.h6
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-ops.cc151
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-ops.h2
-rw-r--r--storage/tokudb/PerconaFT/ft/ft.cc14
-rw-r--r--storage/tokudb/PerconaFT/ft/ft.h6
-rw-r--r--storage/tokudb/PerconaFT/ft/logger/logformat.cc9
-rw-r--r--storage/tokudb/PerconaFT/ft/logger/recover.cc78
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h16
-rw-r--r--storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc7
-rw-r--r--storage/tokudb/PerconaFT/ft/txn/roll.cc118
-rw-r--r--storage/tokudb/PerconaFT/portability/file.cc6
-rw-r--r--storage/tokudb/PerconaFT/portability/memory.cc9
-rw-r--r--storage/tokudb/PerconaFT/portability/memory.h4
-rw-r--r--storage/tokudb/PerconaFT/portability/toku_portability.h2
-rw-r--r--storage/tokudb/PerconaFT/src/tests/CMakeLists.txt42
-rw-r--r--storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc155
-rw-r--r--storage/tokudb/PerconaFT/src/ydb-internal.h3
-rw-r--r--storage/tokudb/PerconaFT/src/ydb.cc50
-rw-r--r--storage/tokudb/PerconaFT/src/ydb_db.cc99
-rw-r--r--storage/tokudb/PerconaFT/src/ydb_db.h16
23 files changed, 672 insertions, 142 deletions
diff --git a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
index 576f902f6ae..7ede78b3c0d 100644
--- a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
+++ b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
@@ -422,6 +422,9 @@ static void print_db_env_struct (void) {
"int (*set_checkpoint_pool_threads)(DB_ENV *, uint32_t)",
"void (*set_check_thp)(DB_ENV *, bool new_val)",
"bool (*get_check_thp)(DB_ENV *)",
+ "bool (*set_dir_per_db)(DB_ENV *, bool new_val)",
+ "bool (*get_dir_per_db)(DB_ENV *)",
+ "const char *(*get_data_dir)(DB_ENV *env)",
NULL};
sort_and_dump_fields("db_env", true, extra);
diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h b/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h
index dc6aec9226d..05fb771de08 100644
--- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h
+++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h
@@ -138,6 +138,8 @@ struct cachefile {
// nor attempt to open any cachefile with the same fname (dname)
// until this cachefile has been fully closed and unlinked.
bool unlink_on_close;
+ // If set then fclose will not be logged in recovery log.
+ bool skip_log_recover_on_close;
int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */
CACHETABLE cachetable;
struct fileid fileid;
diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
index 5bba977de1a..6d753805fa9 100644
--- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
+++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
@@ -467,6 +467,10 @@ toku_cachefile_fname_in_env (CACHEFILE cf) {
return cf->fname_in_env;
}
+void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env) {
+ cf->fname_in_env = new_fname_in_env;
+}
+
int
toku_cachefile_get_fd (CACHEFILE cf) {
return cf->fd;
@@ -2903,6 +2907,18 @@ bool toku_cachefile_is_unlink_on_close(CACHEFILE cf) {
return cf->unlink_on_close;
}
+void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf) {
+ cf->skip_log_recover_on_close = true;
+}
+
+void toku_cachefile_do_log_recover_on_close(CACHEFILE cf) {
+ cf->skip_log_recover_on_close = false;
+}
+
+bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf) {
+ return cf->skip_log_recover_on_close;
+}
+
uint64_t toku_cachefile_size(CACHEFILE cf) {
int64_t file_size;
int fd = toku_cachefile_get_fd(cf);
diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h
index 148326562ab..3b3cb0a2d46 100644
--- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h
+++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h
@@ -500,12 +500,18 @@ int toku_cachefile_get_fd (CACHEFILE);
// Return the filename
char * toku_cachefile_fname_in_env (CACHEFILE cf);
+void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env);
+
// Make it so when the cachefile closes, the underlying file is unlinked
void toku_cachefile_unlink_on_close(CACHEFILE cf);
// is this cachefile marked as unlink on close?
bool toku_cachefile_is_unlink_on_close(CACHEFILE cf);
+void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf);
+void toku_cachefile_do_log_recover_on_close(CACHEFILE cf);
+bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf);
+
// Return the logger associated with the cachefile
struct tokulogger *toku_cachefile_logger(CACHEFILE cf);
diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc
index f131668889e..30a8710d7aa 100644
--- a/storage/tokudb/PerconaFT/ft/ft-ops.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc
@@ -149,22 +149,23 @@ basement nodes, bulk fetch, and partial fetch:
#include "ft/cachetable/checkpoint.h"
#include "ft/cursor.h"
-#include "ft/ft.h"
#include "ft/ft-cachetable-wrappers.h"
#include "ft/ft-flusher.h"
#include "ft/ft-internal.h"
-#include "ft/msg.h"
+#include "ft/ft.h"
#include "ft/leafentry.h"
#include "ft/logger/log-internal.h"
+#include "ft/msg.h"
#include "ft/node.h"
#include "ft/serialize/block_table.h"
-#include "ft/serialize/sub_block.h"
#include "ft/serialize/ft-serialize.h"
#include "ft/serialize/ft_layout_version.h"
#include "ft/serialize/ft_node-serialize.h"
+#include "ft/serialize/sub_block.h"
#include "ft/txn/txn_manager.h"
-#include "ft/ule.h"
#include "ft/txn/xids.h"
+#include "ft/ule.h"
+#include "src/ydb-internal.h"
#include <toku_race_tools.h>
@@ -179,6 +180,7 @@ basement nodes, bulk fetch, and partial fetch:
#include <stdint.h>
+#include <memory>
/* Status is intended for display to humans to help understand system behavior.
* It does not need to be perfectly thread-safe.
*/
@@ -2593,12 +2595,104 @@ static inline int ft_open_maybe_direct(const char *filename, int oflag, int mode
static const mode_t file_mode = S_IRUSR+S_IWUSR+S_IRGRP+S_IWGRP+S_IROTH+S_IWOTH;
+inline bool toku_file_is_root(const char *path, const char *last_slash) {
+ return last_slash == path;
+}
+
+static std::unique_ptr<char[], decltype(&toku_free)> toku_file_get_parent_dir(
+ const char *path) {
+ std::unique_ptr<char[], decltype(&toku_free)> result(nullptr, &toku_free);
+
+ bool has_trailing_slash = false;
+
+ /* Find the offset of the last slash */
+ const char *last_slash = strrchr(path, OS_PATH_SEPARATOR);
+
+ if (!last_slash) {
+ /* No slash in the path, return NULL */
+ return result;
+ }
+
+ /* Ok, there is a slash. Is there anything after it? */
+ if (static_cast<size_t>(last_slash - path + 1) == strlen(path)) {
+ has_trailing_slash = true;
+ }
+
+ /* Reduce repetative slashes. */
+ while (last_slash > path && last_slash[-1] == OS_PATH_SEPARATOR) {
+ last_slash--;
+ }
+
+ /* Check for the root of a drive. */
+ if (toku_file_is_root(path, last_slash)) {
+ return result;
+ }
+
+ /* If a trailing slash prevented the first strrchr() from trimming
+ the last component of the path, trim that component now. */
+ if (has_trailing_slash) {
+ /* Back up to the previous slash. */
+ last_slash--;
+ while (last_slash > path && last_slash[0] != OS_PATH_SEPARATOR) {
+ last_slash--;
+ }
+
+ /* Reduce repetative slashes. */
+ while (last_slash > path && last_slash[-1] == OS_PATH_SEPARATOR) {
+ last_slash--;
+ }
+ }
+
+ /* Check for the root of a drive. */
+ if (toku_file_is_root(path, last_slash)) {
+ return result;
+ }
+
+ result.reset(toku_strndup(path, last_slash - path));
+ return result;
+}
+
+static bool toku_create_subdirs_if_needed(const char *path) {
+ static const mode_t dir_mode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP |
+ S_IWGRP | S_IXGRP | S_IROTH | S_IXOTH;
+
+ toku_struct_stat stat;
+ bool subdir_exists = true;
+ auto subdir = toku_file_get_parent_dir(path);
+
+ if (!subdir.get())
+ return true;
+
+ if (toku_stat(subdir.get(), &stat) == -1) {
+ if (ENOENT == get_error_errno())
+ subdir_exists = false;
+ else
+ return false;
+ }
+
+ if (subdir_exists) {
+ if (!S_ISDIR(stat.st_mode))
+ return false;
+ return true;
+ }
+
+ if (!toku_create_subdirs_if_needed(subdir.get()))
+ return false;
+
+ if (toku_os_mkdir(subdir.get(), dir_mode))
+ return false;
+
+ return true;
+}
+
// open a file for use by the ft
// Requires: File does not exist.
static int ft_create_file(FT_HANDLE UU(ft_handle), const char *fname, int *fdp) {
int r;
int fd;
int er;
+ if (!toku_create_subdirs_if_needed(fname))
+ return get_error_errno();
fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode);
assert(fd==-1);
if ((er = get_maybe_error_errno()) != ENOENT) {
@@ -4427,6 +4521,55 @@ void toku_ft_unlink(FT_HANDLE handle) {
toku_cachefile_unlink_on_close(cf);
}
+int toku_ft_rename_iname(DB_TXN *txn,
+ const char *data_dir,
+ const char *old_iname,
+ const char *new_iname,
+ CACHETABLE ct) {
+ int r = 0;
+
+ std::unique_ptr<char[], decltype(&toku_free)> new_iname_full(nullptr,
+ &toku_free);
+ std::unique_ptr<char[], decltype(&toku_free)> old_iname_full(nullptr,
+ &toku_free);
+
+ new_iname_full.reset(toku_construct_full_name(2, data_dir, new_iname));
+ old_iname_full.reset(toku_construct_full_name(2, data_dir, old_iname));
+
+ if (txn) {
+ BYTESTRING bs_old_name = {static_cast<uint32_t>(strlen(old_iname) + 1),
+ const_cast<char *>(old_iname)};
+ BYTESTRING bs_new_name = {static_cast<uint32_t>(strlen(new_iname) + 1),
+ const_cast<char *>(new_iname)};
+ FILENUM filenum = FILENUM_NONE;
+ {
+ CACHEFILE cf;
+ r = toku_cachefile_of_iname_in_env(ct, old_iname, &cf);
+ if (r != ENOENT) {
+ char *old_fname_in_cf = toku_cachefile_fname_in_env(cf);
+ toku_cachefile_set_fname_in_env(cf, toku_xstrdup(new_iname));
+ toku_free(old_fname_in_cf);
+ filenum = toku_cachefile_filenum(cf);
+ }
+ }
+ toku_logger_save_rollback_frename(
+ db_txn_struct_i(txn)->tokutxn, &bs_old_name, &bs_new_name);
+ toku_log_frename(db_txn_struct_i(txn)->tokutxn->logger,
+ (LSN *)0,
+ 0,
+ toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn),
+ bs_old_name,
+ filenum,
+ bs_new_name);
+ }
+
+ r = toku_os_rename(old_iname_full.get(), new_iname_full.get());
+ if (r != 0)
+ return r;
+ r = toku_fsync_directory(new_iname_full.get());
+ return r;
+}
+
int toku_ft_get_fragmentation(FT_HANDLE ft_handle, TOKU_DB_FRAGMENTATION report) {
int fd = toku_cachefile_get_fd(ft_handle->ft->cf);
toku_ft_lock(ft_handle->ft);
diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.h b/storage/tokudb/PerconaFT/ft/ft-ops.h
index 313a74628ea..70cf045d43c 100644
--- a/storage/tokudb/PerconaFT/ft/ft-ops.h
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.h
@@ -48,6 +48,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "ft/msg.h"
#include "util/dbt.h"
+#define OS_PATH_SEPARATOR '/'
+
typedef struct ft_handle *FT_HANDLE;
int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result));
diff --git a/storage/tokudb/PerconaFT/ft/ft.cc b/storage/tokudb/PerconaFT/ft/ft.cc
index 699fcc57603..7c94b4c59d3 100644
--- a/storage/tokudb/PerconaFT/ft/ft.cc
+++ b/storage/tokudb/PerconaFT/ft/ft.cc
@@ -253,7 +253,19 @@ static void ft_close(CACHEFILE cachefile, int fd, void *header_v, bool oplsn_val
char* fname_in_env = toku_cachefile_fname_in_env(cachefile);
assert(fname_in_env);
BYTESTRING bs = {.len=(uint32_t) strlen(fname_in_env), .data=fname_in_env};
- toku_log_fclose(logger, &lsn, ft->h->dirty, bs, toku_cachefile_filenum(cachefile)); // flush the log on close (if new header is being written), otherwise it might not make it out.
+ if (!toku_cachefile_is_skip_log_recover_on_close(cachefile)) {
+ toku_log_fclose(
+ logger,
+ &lsn,
+ ft->h->dirty,
+ bs,
+ toku_cachefile_filenum(cachefile)); // flush the log on
+ // close (if new header
+ // is being written),
+ // otherwise it might
+ // not make it out.
+ toku_cachefile_do_log_recover_on_close(cachefile);
+ }
}
}
if (ft->h->dirty) { // this is the only place this bit is tested (in currentheader)
diff --git a/storage/tokudb/PerconaFT/ft/ft.h b/storage/tokudb/PerconaFT/ft/ft.h
index d600e093bdc..7a3c4fa783c 100644
--- a/storage/tokudb/PerconaFT/ft/ft.h
+++ b/storage/tokudb/PerconaFT/ft/ft.h
@@ -53,6 +53,12 @@ typedef struct ft_options *FT_OPTIONS;
void toku_ft_unlink(FT_HANDLE handle);
void toku_ft_unlink_on_commit(FT_HANDLE handle, TOKUTXN txn);
+int toku_ft_rename_iname(DB_TXN *txn,
+ const char *data_dir,
+ const char *old_iname,
+ const char *new_iname,
+ CACHETABLE ct);
+
void toku_ft_init_reflock(FT ft);
void toku_ft_destroy_reflock(FT ft);
void toku_ft_grab_reflock(FT ft);
diff --git a/storage/tokudb/PerconaFT/ft/logger/logformat.cc b/storage/tokudb/PerconaFT/ft/logger/logformat.cc
index 6f3baa81c86..49b61138803 100644
--- a/storage/tokudb/PerconaFT/ft/logger/logformat.cc
+++ b/storage/tokudb/PerconaFT/ft/logger/logformat.cc
@@ -90,6 +90,10 @@ const struct logtype rollbacks[] = {
{"fcreate", 'F', FA{{"FILENUM", "filenum", 0},
{"BYTESTRING", "iname", 0},
NULLFIELD}, LOG_BEGIN_ACTION_NA},
+ //rename file
+ {"frename", 'n', FA{{"BYTESTRING", "old_iname", 0},
+ {"BYTESTRING", "new_iname", 0},
+ NULLFIELD}, LOG_BEGIN_ACTION_NA},
// cmdinsert is used to insert a key-value pair into a DB. For rollback we don't need the data.
{"cmdinsert", 'i', FA{
{"FILENUM", "filenum", 0},
@@ -195,6 +199,11 @@ const struct logtype logtypes[] = {
{"fdelete", 'U', FA{{"TXNID_PAIR", "xid", 0},
{"FILENUM", "filenum", 0},
NULLFIELD}, SHOULD_LOG_BEGIN},
+ {"frename", 'n', FA{{"TXNID_PAIR", "xid", 0},
+ {"BYTESTRING", "old_iname", 0},
+ {"FILENUM", "old_filenum", 0},
+ {"BYTESTRING", "new_iname", 0},
+ NULLFIELD}, IGNORE_LOG_BEGIN},
{"enq_insert", 'I', FA{{"FILENUM", "filenum", 0},
{"TXNID_PAIR", "xid", 0},
{"BYTESTRING", "key", 0},
diff --git a/storage/tokudb/PerconaFT/ft/logger/recover.cc b/storage/tokudb/PerconaFT/ft/logger/recover.cc
index 38f29773bd6..a9c30c0e37a 100644
--- a/storage/tokudb/PerconaFT/ft/logger/recover.cc
+++ b/storage/tokudb/PerconaFT/ft/logger/recover.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+#include <memory>
#include "ft/cachetable/cachetable.h"
#include "ft/cachetable/checkpoint.h"
#include "ft/ft.h"
@@ -935,6 +936,83 @@ static int toku_recover_backward_fdelete (struct logtype_fdelete *UU(l), RECOVER
return 0;
}
+static int toku_recover_frename(struct logtype_frename *l, RECOVER_ENV renv) {
+ assert(renv);
+ assert(renv->env);
+
+ toku_struct_stat stat;
+ const char *data_dir = renv->env->get_data_dir(renv->env);
+ bool old_exist = true;
+ bool new_exist = true;
+
+ assert(data_dir);
+
+ struct file_map_tuple *tuple;
+
+ std::unique_ptr<char[], decltype(&toku_free)> old_iname_full(
+ toku_construct_full_name(2, data_dir, l->old_iname.data), &toku_free);
+ std::unique_ptr<char[], decltype(&toku_free)> new_iname_full(
+ toku_construct_full_name(2, data_dir, l->new_iname.data), &toku_free);
+
+ if (toku_stat(old_iname_full.get(), &stat) == -1) {
+ if (ENOENT == errno)
+ old_exist = false;
+ else
+ return 1;
+ }
+
+ if (toku_stat(new_iname_full.get(), &stat) == -1) {
+ if (ENOENT == errno)
+ new_exist = false;
+ else
+ return 1;
+ }
+
+ // Both old and new files can exist if:
+ // - rename() is not completed
+ // - fcreate was replayed during recovery
+ // 'Stalled cachefiles' container cachefile_list::m_stale_fileid contains
+ // closed but not yet evicted cachefiles and the key of this container is
+ // fs-dependent file id - (device id, inode number) pair. As it is supposed
+ // new file have not yet created during recovery process the 'stalled
+ // cachefile' container can contain only cache file of old file.
+ // To preserve the old cachefile file's id and keep it in
+ // 'stalled cachefiles' container the new file is removed
+ // and the old file is renamed.
+ if (old_exist && new_exist &&
+ (toku_os_unlink(new_iname_full.get()) == -1 ||
+ toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 ||
+ toku_fsync_directory(old_iname_full.get()) == -1 ||
+ toku_fsync_directory(new_iname_full.get()) == -1))
+ return 1;
+
+ if (old_exist && !new_exist &&
+ (toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 ||
+ toku_fsync_directory(old_iname_full.get()) == -1 ||
+ toku_fsync_directory(new_iname_full.get()) == -1))
+ return 1;
+
+ if (file_map_find(&renv->fmap, l->old_filenum, &tuple) != DB_NOTFOUND) {
+ if (tuple->iname)
+ toku_free(tuple->iname);
+ tuple->iname = toku_xstrdup(l->new_iname.data);
+ }
+
+ TOKUTXN txn = NULL;
+ toku_txnid2txn(renv->logger, l->xid, &txn);
+
+ if (txn)
+ toku_logger_save_rollback_frename(txn, &l->old_iname, &l->new_iname);
+
+ return 0;
+}
+
+static int toku_recover_backward_frename(struct logtype_frename *UU(l),
+ RECOVER_ENV UU(renv)) {
+ // nothing
+ return 0;
+}
+
static int toku_recover_enq_insert (struct logtype_enq_insert *l, RECOVER_ENV renv) {
int r;
TOKUTXN txn = NULL;
diff --git a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
index 92f1e278e1a..eb8c953b08c 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
+++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
@@ -106,6 +106,7 @@ namespace MhsRbTree {
static const uint64_t MHS_MAX_VAL = 0xffffffffffffffff;
OUUInt64() : _value(0) {}
OUUInt64(uint64_t s) : _value(s) {}
+ OUUInt64(const OUUInt64& o) : _value(o._value) {}
bool operator<(const OUUInt64 &r) const {
invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
return _value < r.ToInt();
@@ -182,15 +183,18 @@ namespace MhsRbTree {
class Node {
public:
- struct BlockPair {
+ class BlockPair {
+ public:
OUUInt64 _offset;
OUUInt64 _size;
BlockPair() : _offset(0), _size(0) {}
BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {}
-
BlockPair(OUUInt64 o, OUUInt64 s) : _offset(o), _size(s) {}
- int operator<(const struct BlockPair &rhs) const {
+ BlockPair(const BlockPair &o)
+ : _offset(o._offset), _size(o._size) {}
+
+ int operator<(const BlockPair &rhs) const {
return _offset < rhs._offset;
}
int operator<(const uint64_t &o) const { return _offset < o; }
@@ -203,15 +207,15 @@ namespace MhsRbTree {
};
EColor _color;
- struct BlockPair _hole;
- struct Pair _label;
+ BlockPair _hole;
+ Pair _label;
Node *_left;
Node *_right;
Node *_parent;
Node(EColor c,
Node::BlockPair h,
- struct Pair lb,
+ Pair lb,
Node *l,
Node *r,
Node *p)
diff --git a/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc
index 85f29ce9813..cefe66335a6 100644
--- a/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc
@@ -53,9 +53,10 @@ static void generate_random_input() {
std::srand(unsigned(std::time(0)));
// set some values:
- for (uint64_t i = 1; i < N; ++i) {
- input_vector.push_back({i, 0});
- old_vector[i] = {i, 0};
+ for (uint64_t i = 0; i < N; ++i) {
+ MhsRbTree::Node::BlockPair bp = {i+1, 0};
+ input_vector.push_back(bp);
+ old_vector[i] = bp;
}
// using built-in random generator:
std::random_shuffle(input_vector.begin(), input_vector.end(), myrandom);
diff --git a/storage/tokudb/PerconaFT/ft/txn/roll.cc b/storage/tokudb/PerconaFT/ft/txn/roll.cc
index 90eee1e580a..9f3977743a0 100644
--- a/storage/tokudb/PerconaFT/ft/txn/roll.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/roll.cc
@@ -38,13 +38,13 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
/* rollback and rollforward routines. */
-
-#include "ft/ft.h"
+#include <memory>
#include "ft/ft-ops.h"
+#include "ft/ft.h"
#include "ft/log_header.h"
#include "ft/logger/log-internal.h"
-#include "ft/txn/xids.h"
#include "ft/txn/rollback-apply.h"
+#include "ft/txn/xids.h"
// functionality provided by roll.c is exposed by an autogenerated
// header file, logheader.h
@@ -162,10 +162,122 @@ toku_rollback_fcreate (FILENUM filenum,
// directory row lock for its dname) and we would not get this
// far if there were other live handles.
toku_cachefile_unlink_on_close(cf);
+ toku_cachefile_skip_log_recover_on_close(cf);
done:
return 0;
}
+int toku_commit_frename(BYTESTRING /* old_name */,
+ BYTESTRING /* new_iname */,
+ TOKUTXN /* txn */,
+ LSN UU(oplsn)) {
+ return 0;
+}
+
+int toku_rollback_frename(BYTESTRING old_iname,
+ BYTESTRING new_iname,
+ TOKUTXN txn,
+ LSN UU(oplsn)) {
+ assert(txn);
+ assert(txn->logger);
+ assert(txn->logger->ct);
+
+ CACHETABLE cachetable = txn->logger->ct;
+
+ toku_struct_stat stat;
+ bool old_exist = true;
+ bool new_exist = true;
+
+ std::unique_ptr<char[], decltype(&toku_free)> old_iname_full(
+ toku_cachetable_get_fname_in_cwd(cachetable, old_iname.data),
+ &toku_free);
+ std::unique_ptr<char[], decltype(&toku_free)> new_iname_full(
+ toku_cachetable_get_fname_in_cwd(cachetable, new_iname.data),
+ &toku_free);
+
+ if (toku_stat(old_iname_full.get(), &stat) == -1) {
+ if (ENOENT == errno)
+ old_exist = false;
+ else
+ return 1;
+ }
+
+ if (toku_stat(new_iname_full.get(), &stat) == -1) {
+ if (ENOENT == errno)
+ new_exist = false;
+ else
+ return 1;
+ }
+
+ // Both old and new files can exist if:
+ // - rename() is not completed
+ // - fcreate was replayed during recovery
+ // 'Stalled cachefiles' container cachefile_list::m_stale_fileid contains
+ // closed but not yet evicted cachefiles and the key of this container is
+ // fs-dependent file id - (device id, inode number) pair. To preserve the
+ // new cachefile
+ // file's id and keep it in 'stalled cachefiles' container the old file is
+ // removed
+ // and the new file is renamed.
+ if (old_exist && new_exist &&
+ (toku_os_unlink(old_iname_full.get()) == -1 ||
+ toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 ||
+ toku_fsync_directory(new_iname_full.get()) == -1 ||
+ toku_fsync_directory(old_iname_full.get()) == -1))
+ return 1;
+
+ if (!old_exist && new_exist &&
+ (toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 ||
+ toku_fsync_directory(new_iname_full.get()) == -1 ||
+ toku_fsync_directory(old_iname_full.get()) == -1))
+ return 1;
+
+ // it's ok if both files do not exist on recovery
+ if (!old_exist && !new_exist)
+ assert(txn->for_recovery);
+
+ CACHEFILE cf;
+ int r = toku_cachefile_of_iname_in_env(cachetable, new_iname.data, &cf);
+ if (r != ENOENT) {
+ char *old_fname_in_cf = toku_cachefile_fname_in_env(cf);
+ toku_cachefile_set_fname_in_env(cf, toku_xstrdup(old_iname.data));
+ toku_free(old_fname_in_cf);
+ // There is at least one case when fclose logging cause error:
+ // 1) start transaction
+ // 2) create ft 'a'(write "fcreate" in recovery log)
+ // 3) rename ft 'a' to 'b'(write "frename" in recovery log)
+ // 4) abort transaction:
+ // a) rollback rename ft (renames 'b' to 'a')
+ // b) rollback create ft (removes 'a'):
+ // invokes toku_cachefile_unlink_on_close - lazy unlink on file
+ // close,
+ // it just sets corresponding flag in cachefile object
+ // c) write "unlink" for 'a' in recovery log
+ // (when transaction is aborted all locks are released,
+ // when file lock is released the file is closed and unlinked if
+ // corresponding flag is set in cachefile object)
+ // 5) crash
+ //
+ // After this we have the following records in recovery log:
+ // - create ft 'a',
+ // - rename 'a' to 'b',
+ // - unlink 'a'
+ //
+ // On recovery:
+ // - create 'a'
+ // - rename 'a' to 'b'
+ // - unlink 'a' - as 'a' file does not exist we have crash on assert
+ // here
+ //
+ // There is no need to write "unlink" in recovery log in (4a) because
+ // 'a' will be removed
+ // on transaction rollback on recovery.
+ toku_cachefile_skip_log_recover_on_close(cf);
+ }
+
+ return 0;
+}
+
int find_ft_from_filenum (const FT &ft, const FILENUM &filenum);
int find_ft_from_filenum (const FT &ft, const FILENUM &filenum) {
FILENUM thisfnum = toku_cachefile_filenum(ft->cf);
diff --git a/storage/tokudb/PerconaFT/portability/file.cc b/storage/tokudb/PerconaFT/portability/file.cc
index 5332a2dff55..0e3efc1a12a 100644
--- a/storage/tokudb/PerconaFT/portability/file.cc
+++ b/storage/tokudb/PerconaFT/portability/file.cc
@@ -356,6 +356,12 @@ toku_os_close(int fd) { // if EINTR, retry until success
return r;
}
+int toku_os_rename(const char *old_name, const char *new_name) {
+ return rename(old_name, new_name);
+}
+
+int toku_os_unlink(const char *path) { return unlink(path); }
+
ssize_t
toku_os_read(int fd, void *buf, size_t count) {
ssize_t r;
diff --git a/storage/tokudb/PerconaFT/portability/memory.cc b/storage/tokudb/PerconaFT/portability/memory.cc
index 2de12699c61..5430ff84b70 100644
--- a/storage/tokudb/PerconaFT/portability/memory.cc
+++ b/storage/tokudb/PerconaFT/portability/memory.cc
@@ -313,6 +313,15 @@ toku_strdup(const char *s) {
return (char *) toku_memdup(s, strlen(s)+1);
}
+char *toku_strndup(const char *s, size_t n) {
+ size_t s_size = strlen(s);
+ size_t bytes_to_copy = n > s_size ? s_size : n;
+ ++bytes_to_copy;
+ char *result = (char *)toku_memdup(s, bytes_to_copy);
+ result[bytes_to_copy - 1] = 0;
+ return result;
+}
+
void
toku_free(void *p) {
if (p) {
diff --git a/storage/tokudb/PerconaFT/portability/memory.h b/storage/tokudb/PerconaFT/portability/memory.h
index 7780536f279..5ae652d39fc 100644
--- a/storage/tokudb/PerconaFT/portability/memory.h
+++ b/storage/tokudb/PerconaFT/portability/memory.h
@@ -125,7 +125,9 @@ size_t toku_malloc_usable_size(void *p) __attribute__((__visibility__("default")
void *toku_memdup (const void *v, size_t len);
/* Toku-version of strdup. Use this so that it calls toku_malloc() */
char *toku_strdup (const char *s) __attribute__((__visibility__("default")));
-
+/* Toku-version of strndup. Use this so that it calls toku_malloc() */
+char *toku_strndup(const char *s, size_t n)
+ __attribute__((__visibility__("default")));
/* Copy memory. Analogous to strdup() Crashes instead of returning NULL */
void *toku_xmemdup (const void *v, size_t len) __attribute__((__visibility__("default")));
/* Toku-version of strdup. Use this so that it calls toku_xmalloc() Crashes instead of returning NULL */
diff --git a/storage/tokudb/PerconaFT/portability/toku_portability.h b/storage/tokudb/PerconaFT/portability/toku_portability.h
index 921d3a309f6..f127b0fe172 100644
--- a/storage/tokudb/PerconaFT/portability/toku_portability.h
+++ b/storage/tokudb/PerconaFT/portability/toku_portability.h
@@ -246,6 +246,8 @@ int toku_os_open(const char *path, int oflag, int mode);
int toku_os_open_direct(const char *path, int oflag, int mode);
int toku_os_close(int fd);
int toku_os_fclose(FILE * stream);
+int toku_os_rename(const char *old_name, const char *new_name);
+int toku_os_unlink(const char *path);
ssize_t toku_os_read(int fd, void *buf, size_t count);
ssize_t toku_os_pread(int fd, void *buf, size_t count, off_t offset);
void toku_os_recursive_delete(const char *path);
diff --git a/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt b/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt
index 47f6aa44a75..c01a8f0d628 100644
--- a/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt
@@ -108,11 +108,11 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS)
foreach(ov c d r)
if (ov STREQUAL c)
- set(gset 0)
set(hset 0)
+ set(iset 0)
else ()
- set(gset 0 1 2 3 4 5)
- set(hset 0 1)
+ set(hset 0 1 2 3 4 5)
+ set(iset 0 1)
endif ()
foreach(av 0 1)
@@ -130,25 +130,27 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS)
foreach(dv ${dset})
foreach(ev ${eset})
foreach(fv 0 1)
- foreach(gv ${gset})
+ foreach(gv 0 1)
foreach(hv ${hset})
-
- if ((NOT ov STREQUAL c) AND (NOT cv) AND ((NOT bv) OR (NOT ev) OR (dv)))
- set(iset 0 1)
- else ()
- set(iset 0)
- endif ()
-
foreach(iv ${iset})
- set(testname "ydb/recovery_fileops_unit.${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}")
- set(envdir "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}")
- set(errfile "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}.ctest-errors")
- add_test(NAME ${testname}
- COMMAND run_recovery_fileops_unit.sh $<TARGET_FILE:recovery_fileops_unit.tdb> ${errfile} 137
- -O ${ov} -A ${av} -B ${bv} -C ${cv} -D ${dv} -E ${ev} -F ${fv} -G ${gv} -H ${hv} -I ${iv}
- )
- setup_toku_test_properties(${testname} ${envdir})
- set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${errfile}")
+
+ if ((NOT ov STREQUAL c) AND (NOT cv) AND ((NOT bv) OR (NOT ev) OR (dv)))
+ set(jset 0 1)
+ else ()
+ set(jset 0)
+ endif ()
+
+ foreach(jv ${jset})
+ set(testname "ydb/recovery_fileops_unit.${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}")
+ set(envdir "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}")
+ set(errfile "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}.ctest-errors")
+ add_test(NAME ${testname}
+ COMMAND run_recovery_fileops_unit.sh $<TARGET_FILE:recovery_fileops_unit.tdb> ${errfile} 137
+ -O ${ov} -A ${av} -B ${bv} -C ${cv} -D ${dv} -E ${ev} -F ${fv} -G ${gv} -H ${hv} -I ${iv} -J ${jv}
+ )
+ setup_toku_test_properties(${testname} ${envdir})
+ set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${errfile}")
+ endforeach(jv)
endforeach(iv)
endforeach(hv)
endforeach(gv)
diff --git a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
index 2c905c5ff12..cc99ab560d8 100644
--- a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
+++ b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
@@ -36,17 +36,17 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-#include "test.h"
-#include "toku_pthread.h"
#include <db.h>
-#include <sys/stat.h>
#include <stdlib.h>
-
+#include <sys/stat.h>
+#include "ft/logger/logger.h"
+#include "test.h"
+#include "toku_pthread.h"
static int do_recover;
static int do_crash;
static char fileop;
-static int choices['I'-'A'+1];
+static int choices['J' - 'A' + 1];
const int num_choices = sizeof(choices)/sizeof(choices[0]);
static DB_TXN *txn;
const char *oldname = "oldfoo";
@@ -58,11 +58,14 @@ static char *cmd;
static void
usage(void) {
- fprintf(stderr, "Usage:\n%s [-v|-q]* [-h] (-c|-r) -O fileop -A# -B# -C# -D# -E# -F# [-G# -H# -I#]\n"
- " fileop = c/r/d (create/rename/delete)\n"
- " Where # is a single digit number > 0.\n"
- " A-F are required for fileop=create\n"
- " A-I are required for fileop=delete, fileop=rename\n", cmd);
+ fprintf(stderr,
+ "Usage:\n%s [-v|-q]* [-h] (-c|-r) -O fileop -A# -B# -C# -D# -E# "
+ "-F# -G# [-H# -I# -J#]\n"
+ " fileop = c/r/d (create/rename/delete)\n"
+ " Where # is a single digit number > 0.\n"
+ " A-G are required for fileop=create\n"
+ " A-I are required for fileop=delete, fileop=rename\n",
+ cmd);
exit(1);
}
@@ -129,19 +132,18 @@ get_choice_flush_log_before_crash(void) {
return get_bool_choice('F');
}
-static int
-get_choice_create_type(void) {
- return get_x_choice('G', 6);
-}
+static int get_choice_dir_per_db(void) { return get_bool_choice('G'); }
+
+static int get_choice_create_type(void) { return get_x_choice('H', 6); }
static int
get_choice_txn_does_open_close_before_fileop(void) {
- return get_bool_choice('H');
+ return get_bool_choice('I');
}
static int
get_choice_lock_table_split_fcreate(void) {
- int choice = get_bool_choice('I');
+ int choice = get_bool_choice('J');
if (choice)
assert(fileop_did_commit());
return choice;
@@ -156,63 +158,65 @@ do_args(int argc, char * const argv[]) {
choices[i] = -1;
}
- int c;
- while ((c = getopt(argc, argv, "vqhcrO:A:B:C:D:E:F:G:H:I:X:")) != -1) {
- switch(c) {
- case 'v':
- verbose++;
- break;
- case 'q':
- verbose--;
- if (verbose<0) verbose=0;
- break;
- case 'h':
- case '?':
- usage();
- break;
- case 'c':
- do_crash = 1;
- break;
- case 'r':
- do_recover = 1;
- break;
- case 'O':
- if (fileop != '\0')
+ char c;
+ while ((c = getopt(argc, argv, "vqhcrO:A:B:C:D:E:F:G:H:I:J:X:")) != -1) {
+ switch (c) {
+ case 'v':
+ verbose++;
+ break;
+ case 'q':
+ verbose--;
+ if (verbose < 0)
+ verbose = 0;
+ break;
+ case 'h':
+ case '?':
usage();
- fileop = optarg[0];
- switch (fileop) {
- case 'c':
- case 'r':
- case 'd':
- break;
- default:
+ break;
+ case 'c':
+ do_crash = 1;
+ break;
+ case 'r':
+ do_recover = 1;
+ break;
+ case 'O':
+ if (fileop != '\0')
usage();
- break;
- }
- break;
- case 'A':
- case 'B':
- case 'C':
- case 'D':
- case 'E':
- case 'F':
- case 'G':
- case 'H':
- case 'I':
- if (fileop == '\0')
- usage();
- int num;
- num = atoi(optarg);
- if (num < 0 || num > 9)
- usage();
- choices[c - 'A'] = num;
- break;
- case 'X':
- if (strcmp(optarg, "novalgrind") == 0) {
- // provide a way for the shell script runner to pass an
- // arg that suppresses valgrind on this child process
+ fileop = optarg[0];
+ switch (fileop) {
+ case 'c':
+ case 'r':
+ case 'd':
+ break;
+ default:
+ usage();
+ break;
+ }
+ break;
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ if (fileop == '\0')
+ usage();
+ int num;
+ num = atoi(optarg);
+ if (num < 0 || num > 9)
+ usage();
+ choices[c - 'A'] = num;
break;
- }
+ case 'X':
+ if (strcmp(optarg, "novalgrind") == 0) {
+ // provide a way for the shell script runner to pass an
+ // arg that suppresses valgrind on this child process
+ break;
+ }
// otherwise, fall through to an error
default:
usage();
@@ -222,7 +226,7 @@ do_args(int argc, char * const argv[]) {
if (argc!=optind) { usage(); exit(1); }
for (i = 0; i < num_choices; i++) {
- if (i >= 'G' - 'A' && fileop == 'c')
+ if (i >= 'H' - 'A' && fileop == 'c')
break;
if (choices[i] == -1)
usage();
@@ -261,6 +265,8 @@ static void env_startup(void) {
int envflags = DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_CREATE | DB_PRIVATE | recover_flag;
r = db_env_create(&env, 0);
CKERR(r);
+ r = env->set_dir_per_db(env, get_choice_dir_per_db());
+ CKERR(r);
env->set_errfile(env, stderr);
r = env->open(env, TOKU_TEST_FILENAME, envflags, S_IRWXU+S_IRWXG+S_IRWXO);
CKERR(r);
@@ -625,8 +631,11 @@ recover_and_verify(void) {
else if (did_create_commit_early())
expect_old_name = 1;
}
- verify_file_exists(oldname, expect_old_name);
- verify_file_exists(newname, expect_new_name);
+ // We can't expect files existence until recovery log was not flushed
+ if ((get_choice_flush_log_before_crash())) {
+ verify_file_exists(oldname, expect_old_name);
+ verify_file_exists(newname, expect_new_name);
+ }
env_shutdown();
}
diff --git a/storage/tokudb/PerconaFT/src/ydb-internal.h b/storage/tokudb/PerconaFT/src/ydb-internal.h
index 2d6c84126e1..d40f7795b0b 100644
--- a/storage/tokudb/PerconaFT/src/ydb-internal.h
+++ b/storage/tokudb/PerconaFT/src/ydb-internal.h
@@ -132,7 +132,8 @@ struct __toku_db_env_internal {
int datadir_lockfd;
int logdir_lockfd;
int tmpdir_lockfd;
- bool check_thp; // if set check if transparent huge pages are disables
+ bool check_thp; // if set check if transparent huge pages are disabled
+ bool dir_per_db;
uint64_t (*get_loader_memory_size_callback)(void);
uint64_t default_lock_timeout_msec;
uint64_t (*get_lock_timeout_callback)(uint64_t default_lock_timeout_msec);
diff --git a/storage/tokudb/PerconaFT/src/ydb.cc b/storage/tokudb/PerconaFT/src/ydb.cc
index aed271bce40..3341f6d76c6 100644
--- a/storage/tokudb/PerconaFT/src/ydb.cc
+++ b/storage/tokudb/PerconaFT/src/ydb.cc
@@ -1298,6 +1298,22 @@ env_get_check_thp(DB_ENV * env) {
return env->i->check_thp;
}
+static bool env_set_dir_per_db(DB_ENV *env, bool new_val) {
+ HANDLE_PANICKED_ENV(env);
+ bool r = env->i->dir_per_db;
+ env->i->dir_per_db = new_val;
+ return r;
+}
+
+static bool env_get_dir_per_db(DB_ENV *env) {
+ HANDLE_PANICKED_ENV(env);
+ return env->i->dir_per_db;
+}
+
+static const char *env_get_data_dir(DB_ENV *env) {
+ return env->i->real_data_dir;
+}
+
static int env_dbremove(DB_ENV * env, DB_TXN *txn, const char *fname, const char *dbname, uint32_t flags);
static int
@@ -2700,6 +2716,9 @@ toku_env_create(DB_ENV ** envp, uint32_t flags) {
USENV(do_backtrace);
USENV(set_check_thp);
USENV(get_check_thp);
+ USENV(set_dir_per_db);
+ USENV(get_dir_per_db);
+ USENV(get_data_dir);
#undef USENV
// unlocked methods
@@ -3045,7 +3064,7 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co
if (env_is_db_with_dname_open(env, newname)) {
return toku_ydb_do_error(env, EINVAL, "Cannot rename dictionary; Dictionary with target name has an open handle.\n");
}
-
+
DBT old_dname_dbt;
DBT new_dname_dbt;
DBT iname_dbt;
@@ -3065,10 +3084,35 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co
r = EEXIST;
}
else if (r == DB_NOTFOUND) {
+ DBT new_iname_dbt;
+ // Do not rename ft file if 'dir_per_db' option is not set
+ auto new_iname =
+ env->get_dir_per_db(env)
+ ? generate_iname_for_rename_or_open(
+ env, txn, newname, false)
+ : std::unique_ptr<char[], decltype(&toku_free)>(
+ toku_strdup(iname), &toku_free);
+ toku_fill_dbt(
+ &new_iname_dbt, new_iname.get(), strlen(new_iname.get()) + 1);
+
// remove old (dname,iname) and insert (newname,iname) in directory
r = toku_db_del(env->i->directory, txn, &old_dname_dbt, DB_DELETE_ANY, true);
if (r != 0) { goto exit; }
- r = toku_db_put(env->i->directory, txn, &new_dname_dbt, &iname_dbt, 0, true);
+
+ // Do not rename ft file if 'dir_per_db' option is not set
+ if (env->get_dir_per_db(env))
+ r = toku_ft_rename_iname(txn,
+ env->get_data_dir(env),
+ iname,
+ new_iname.get(),
+ env->i->cachetable);
+
+ r = toku_db_put(env->i->directory,
+ txn,
+ &new_dname_dbt,
+ &new_iname_dbt,
+ 0,
+ true);
if (r != 0) { goto exit; }
//Now that we have writelocks on both dnames, verify that there are still no handles open. (to prevent race conditions)
@@ -3091,7 +3135,7 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co
// otherwise, we're okay in marking this ft as remove on
// commit. no new handles can open for this dictionary
// because the txn has directory write locks on the dname
- if (txn && !can_acquire_table_lock(env, txn, iname)) {
+ if (txn && !can_acquire_table_lock(env, txn, new_iname.get())) {
r = DB_LOCK_NOTGRANTED;
}
// We don't do anything at the ft or cachetable layer for rename.
diff --git a/storage/tokudb/PerconaFT/src/ydb_db.cc b/storage/tokudb/PerconaFT/src/ydb_db.cc
index e5bd4e7d089..100d1bfa20b 100644
--- a/storage/tokudb/PerconaFT/src/ydb_db.cc
+++ b/storage/tokudb/PerconaFT/src/ydb_db.cc
@@ -83,8 +83,7 @@ ydb_db_layer_get_status(YDB_DB_LAYER_STATUS statp) {
*statp = ydb_db_layer_status;
}
-static void
-create_iname_hint(const char *dname, char *hint) {
+void create_iname_hint(const char *dname, char *hint) {
//Requires: size of hint array must be > strlen(dname)
//Copy alphanumeric characters only.
//Replace strings of non-alphanumeric characters with a single underscore.
@@ -105,11 +104,43 @@ create_iname_hint(const char *dname, char *hint) {
*hint = '\0';
}
+void create_iname_hint_for_dbdir(const char *dname, char *hint) {
+ assert(dname);
+ if (*dname == '.')
+ ++dname;
+ if (*dname == '/')
+ ++dname;
+ bool underscored = false;
+ bool dbdir_is_parsed = false;
+ // Do not change the first '/' because this is
+ // delimiter which splits name into database dir
+ // and table dir.
+ while (*dname) {
+ if (isalnum(*dname) || (*dname == '/' && !dbdir_is_parsed)) {
+ char c = *dname++;
+ *hint++ = c;
+ if (c == '/')
+ dbdir_is_parsed = true;
+ underscored = false;
+ } else {
+ if (!underscored)
+ *hint++ = '_';
+ dname++;
+ underscored = true;
+ }
+ }
+ *hint = '\0';
+}
+
// n < 0 means to ignore mark and ignore n
// n >= 0 means to include mark ("_B_" or "_P_") with hex value of n in iname
// (intended for use by loader, which will create many inames using one txnid).
-static char *
-create_iname(DB_ENV *env, uint64_t id1, uint64_t id2, char *hint, const char *mark, int n) {
+char *create_iname(DB_ENV *env,
+ uint64_t id1,
+ uint64_t id2,
+ char *hint,
+ const char *mark,
+ int n) {
int bytes;
char inamebase[strlen(hint) +
8 + // hex file format version
@@ -138,6 +169,34 @@ create_iname(DB_ENV *env, uint64_t id1, uint64_t id2, char *hint, const char *ma
return rval;
}
+static uint64_t nontransactional_open_id = 0;
+
+std::unique_ptr<char[], decltype(&toku_free)> generate_iname_for_rename_or_open(
+ DB_ENV *env,
+ DB_TXN *txn,
+ const char *dname,
+ bool is_open) {
+ std::unique_ptr<char[], decltype(&toku_free)> result(nullptr, &toku_free);
+ char hint[strlen(dname) + 1];
+ uint64_t id1 = 0;
+ uint64_t id2 = 0;
+
+ if (txn) {
+ id1 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).parent_id64;
+ id2 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).child_id64;
+ } else if (is_open)
+ id1 = toku_sync_fetch_and_add(&nontransactional_open_id, 1);
+
+ if (env->get_dir_per_db(env) && !toku_os_is_absolute_name(dname))
+ create_iname_hint_for_dbdir(dname, hint);
+ else
+ create_iname_hint(dname, hint);
+
+ result.reset(create_iname(env, id1, id2, hint, NULL, -1));
+
+ return result;
+}
+
static int toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYPE dbtype, uint32_t flags, int mode);
// Effect: Do the work required of DB->close().
@@ -227,8 +286,6 @@ db_open_subdb(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTY
return r;
}
-static uint64_t nontransactional_open_id = 0;
-
// inames are created here.
// algorithm:
// begin txn
@@ -286,27 +343,15 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP
toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1);
toku_init_dbt_flags(&iname_dbt, DB_DBT_REALLOC);
r = toku_db_get(db->dbenv->i->directory, txn, &dname_dbt, &iname_dbt, DB_SERIALIZABLE); // allocates memory for iname
- char *iname = (char *) iname_dbt.data;
+ std::unique_ptr<char[], decltype(&toku_free)> iname(
+ static_cast<char *>(iname_dbt.data), &toku_free);
if (r == DB_NOTFOUND && !is_db_create) {
r = ENOENT;
} else if (r==0 && is_db_excl) {
r = EEXIST;
} else if (r == DB_NOTFOUND) {
- char hint[strlen(dname) + 1];
-
- // create iname and make entry in directory
- uint64_t id1 = 0;
- uint64_t id2 = 0;
-
- if (txn) {
- id1 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).parent_id64;
- id2 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).child_id64;
- } else {
- id1 = toku_sync_fetch_and_add(&nontransactional_open_id, 1);
- }
- create_iname_hint(dname, hint);
- iname = create_iname(db->dbenv, id1, id2, hint, NULL, -1); // allocated memory for iname
- toku_fill_dbt(&iname_dbt, iname, strlen(iname) + 1);
+ iname = generate_iname_for_rename_or_open(db->dbenv, txn, dname, true);
+ toku_fill_dbt(&iname_dbt, iname.get(), strlen(iname.get()) + 1);
//
// put_flags will be 0 for performance only, avoid unnecessary query
// if we are creating a hot index, per #3166, we do not want the write lock in directory grabbed.
@@ -318,16 +363,13 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP
// we now have an iname
if (r == 0) {
- r = toku_db_open_iname(db, txn, iname, flags, mode);
+ r = toku_db_open_iname(db, txn, iname.get(), flags, mode);
if (r == 0) {
db->i->dname = toku_xstrdup(dname);
env_note_db_opened(db->dbenv, db); // tell env that a new db handle is open (using dname)
}
}
- if (iname) {
- toku_free(iname);
- }
return r;
}
@@ -1181,7 +1223,10 @@ load_inames(DB_ENV * env, DB_TXN * txn, int N, DB * dbs[/*N*/], const char * new
toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1);
// now create new iname
char hint[strlen(dname) + 1];
- create_iname_hint(dname, hint);
+ if (env->get_dir_per_db(env) && !toku_os_is_absolute_name(dname))
+ create_iname_hint_for_dbdir(dname, hint);
+ else
+ create_iname_hint(dname, hint);
const char *new_iname = create_iname(env, xid.parent_id64, xid.child_id64, hint, mark, i); // allocates memory for iname_in_env
new_inames_in_env[i] = new_iname;
toku_fill_dbt(&iname_dbt, new_iname, strlen(new_iname) + 1); // iname_in_env goes in directory
diff --git a/storage/tokudb/PerconaFT/src/ydb_db.h b/storage/tokudb/PerconaFT/src/ydb_db.h
index 8b92dd1c3cb..8be28857c14 100644
--- a/storage/tokudb/PerconaFT/src/ydb_db.h
+++ b/storage/tokudb/PerconaFT/src/ydb_db.h
@@ -43,6 +43,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
#include "ydb-internal.h"
#include "ydb_txn.h"
+#include <memory>
+
typedef enum {
YDB_LAYER_DIRECTORY_WRITE_LOCKS = 0, /* total directory write locks taken */
YDB_LAYER_DIRECTORY_WRITE_LOCKS_FAIL, /* total directory write locks unable to be taken */
@@ -119,3 +121,17 @@ toku_db_destruct_autotxn(DB_TXN *txn, int r, bool changed) {
}
return r;
}
+
+void create_iname_hint_for_dbdir(const char *dname, char *hint);
+void create_iname_hint(const char *dname, char *hint);
+char *create_iname(DB_ENV *env,
+ uint64_t id1,
+ uint64_t id2,
+ char *hint,
+ const char *mark,
+ int n);
+std::unique_ptr<char[], decltype(&toku_free)> generate_iname_for_rename_or_open(
+ DB_ENV *env,
+ DB_TXN *txn,
+ const char *dname,
+ bool is_open);