diff options
author | Clarisse Cheah <clarisse.cheah@mongodb.com> | 2023-02-16 23:48:38 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-02-17 03:16:21 +0000 |
commit | ae27437baac78bc6ee1d6f6df8a324eeac256358 (patch) | |
tree | 23475cf8c49d1f16fafc2173a297e20e942e2f77 /src/third_party/wiredtiger/ext | |
parent | d62829f2ca02687b3ab8888aeebe7dac4c5322ee (diff) | |
download | mongo-ae27437baac78bc6ee1d6f6df8a324eeac256358.tar.gz |
Import wiredtiger: ab0cc1bfbf5ae12477f22d152f61b0e6c73dafc0 from branch mongodb-master
ref: 971995c617..ab0cc1bfbf
for: 7.0.0-rc0
WT-10367 Implement file open, read, close, exists, size and lock for GCP file handle
Diffstat (limited to 'src/third_party/wiredtiger/ext')
-rw-r--r-- | src/third_party/wiredtiger/ext/storage_sources/gcp_store/gcp_storage_source.cpp | 160 |
1 files changed, 148 insertions, 12 deletions
diff --git a/src/third_party/wiredtiger/ext/storage_sources/gcp_store/gcp_storage_source.cpp b/src/third_party/wiredtiger/ext/storage_sources/gcp_store/gcp_storage_source.cpp index 845b607e264..55861e1c5b9 100644 --- a/src/third_party/wiredtiger/ext/storage_sources/gcp_store/gcp_storage_source.cpp +++ b/src/third_party/wiredtiger/ext/storage_sources/gcp_store/gcp_storage_source.cpp @@ -20,7 +20,6 @@ struct gcp_store { WT_EXTENSION_API *wt_api; std::mutex fs_list_mutex; std::vector<gcp_file_system *> fs_list; - std::vector<gcp_file_system> gcp_fs; uint32_t reference_count; WT_VERBOSE_LEVEL verbose; }; @@ -30,14 +29,16 @@ struct gcp_file_system { gcp_store *storage_source; WT_FILE_SYSTEM *wt_file_system; std::unique_ptr<gcp_connection> gcp_conn; + std::mutex fh_list_mutex; std::vector<gcp_file_handle *> fh_list; std::string home_dir; }; struct gcp_file_handle { WT_FILE_HANDLE fh; - gcp_store *storage_source; - WT_FILE_HANDLE *wt_file_handle; + gcp_file_system *file_system; + std::string name; + uint32_t reference_count; }; static gcp_file_system *get_gcp_file_system(WT_FILE_SYSTEM *); @@ -58,8 +59,11 @@ static int gcp_remove(WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t) __attribute__((__unused__)); static int gcp_rename(WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t) __attribute__((__unused__)); -static int gcp_file_size(WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *) +static int gcp_file_lock(WT_FILE_HANDLE *, WT_SESSION *, bool) __attribute__((__unused__)); +static int gcp_file_size(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t *) __attribute__((__unused__)); +static int gcp_object_size(WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *) __attribute__((__unused__)); +static int gcp_file_read(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, void *); static int gcp_object_list( WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, char ***, uint32_t *); static int gcp_object_list_add(const gcp_store &, char ***, const std::vector<std::string> &, @@ -153,7 +157,7 @@ gcp_customize_file_system(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session fs->file_system.fs_open_file = gcp_file_open; fs->file_system.fs_remove = gcp_remove; fs->file_system.fs_rename = gcp_rename; - fs->file_system.fs_size = gcp_file_size; + fs->file_system.fs_size = gcp_object_size; // Add to the list of the active file systems. Lock will be freed when the scope is exited. { @@ -192,9 +196,25 @@ gcp_add_reference(WT_STORAGE_SOURCE *storage_source) static int gcp_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *session) { - WT_UNUSED(file_handle); WT_UNUSED(session); + gcp_file_handle *gcp_fh = reinterpret_cast<gcp_file_handle *>(file_handle); + + // If there are other active instances of the file being open, do not close file handle. + if (--gcp_fh->reference_count != 0) + return 0; + + // No more active instances of open file, close the file handle. + gcp_file_system *fs = gcp_fh->file_system; + { + std::lock_guard<std::mutex> lock(fs->fh_list_mutex); + // Erase remove idiom is used here to remove specific file system. + fs->fh_list.erase( + std::remove(fs->fh_list.begin(), fs->fh_list.end(), gcp_fh), fs->fh_list.end()); + } + + delete (gcp_fh); + return 0; } @@ -281,12 +301,84 @@ static int gcp_file_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handle_ptr) { - WT_UNUSED(file_system); WT_UNUSED(session); - WT_UNUSED(name); - WT_UNUSED(file_type); - WT_UNUSED(flags); - WT_UNUSED(file_handle_ptr); + gcp_file_system *fs = reinterpret_cast<gcp_file_system *>(file_system); + *file_handle_ptr = nullptr; + + // Google cloud only supports opening the file in read only mode. + if ((flags & WT_FS_OPEN_READONLY) == 0 || (flags & WT_FS_OPEN_CREATE) != 0) { + std::cerr << "gcp_file_open: read-only access required." << std::endl; + return EINVAL; + } + + // Only data files and regular files should be opened. + if (file_type != WT_FS_OPEN_FILE_TYPE_DATA && file_type != WT_FS_OPEN_FILE_TYPE_REGULAR) { + std::cerr << "gcp_file_open: only data file and regular types supported." << std::endl; + return EINVAL; + } + + // Check if object exists in the cloud. + bool exists; + size_t size; + int ret; + if (ret = (fs->gcp_conn->object_exists(name, exists, size) != 0)) { + std::cerr << "gcp_file_open: object_exists request to google cloud failed." << std::endl; + return ret; + } + if (!exists) { + std::cerr << "gcp_file_open: object named " << name << " does not exist in the bucket." + << std::endl; + return EINVAL; + } + + // Check if there is already an existing file handle open. + auto fh_iterator = std::find_if(fs->fh_list.begin(), fs->fh_list.end(), + [name](gcp_file_handle *fh) { return (fh->name.compare(name) == 0); }); + + if (fh_iterator != fs->fh_list.end()) { + (*fh_iterator)->reference_count++; + *file_handle_ptr = reinterpret_cast<WT_FILE_HANDLE *>(*fh_iterator); + + return 0; + } + + // If an active file handle does not exist, create a new file handle for the current file. + gcp_file_handle *gcp_fh; + try { + gcp_fh = new gcp_file_handle; + } catch (std::bad_alloc &e) { + std::cerr << "gcp_file_open: " << e.what() << std::endl; + return ENOMEM; + } + gcp_fh->file_system = fs; + gcp_fh->name = name; + gcp_fh->reference_count = 1; + + // Define functions needed for google cloud with read-only privilleges. + gcp_fh->fh.close = gcp_file_close; + gcp_fh->fh.fh_advise = nullptr; + gcp_fh->fh.fh_extend = nullptr; + gcp_fh->fh.fh_extend_nolock = nullptr; + gcp_fh->fh.fh_lock = gcp_file_lock; + gcp_fh->fh.fh_map = nullptr; + gcp_fh->fh.fh_map_discard = nullptr; + gcp_fh->fh.fh_map_preload = nullptr; + gcp_fh->fh.fh_unmap = nullptr; + gcp_fh->fh.fh_read = gcp_file_read; + gcp_fh->fh.fh_size = gcp_file_size; + gcp_fh->fh.fh_sync = nullptr; + gcp_fh->fh.fh_sync_nowait = nullptr; + gcp_fh->fh.fh_truncate = nullptr; + gcp_fh->fh.fh_write = nullptr; + + // Exclusive access is required when adding file handles to list of file handles. The lock_guard + // will unlock automatically when the scope is exited. + { + std::lock_guard<std::mutex> lock(fs->fh_list_mutex); + fs->fh_list.push_back(gcp_fh); + } + + *file_handle_ptr = &gcp_fh->fh; return 0; } @@ -316,7 +408,36 @@ gcp_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *from, c } static int -gcp_file_size(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, wt_off_t *sizep) +gcp_file_lock([[maybe_unused]] WT_FILE_HANDLE *file_handle, [[maybe_unused]] WT_SESSION *session, + [[maybe_unused]] bool lock) +{ + // Locks are always granted. + return 0; +} + +static int +gcp_file_size([[maybe_unused]] WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t *sizep) +{ + gcp_file_handle *gcp_fh = reinterpret_cast<gcp_file_handle *>(file_handle); + gcp_file_system *fs = gcp_fh->file_system; + bool exists; + size_t size; + int ret; + *sizep = 0; + + // Get file size if the object exists. + if ((fs->gcp_conn->object_exists(gcp_fh->name, exists, size))) { + std::cerr << "gcp_file_size: object_exists request to google cloud failed." << std::endl; + return ret; + } + + *sizep = size; + + return 0; +} + +static int +gcp_object_size(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, wt_off_t *sizep) { WT_UNUSED(file_system); WT_UNUSED(session); @@ -327,6 +448,21 @@ gcp_file_size(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name } static int +gcp_file_read([[maybe_unused]] WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset, + size_t len, void *buf) +{ + gcp_file_handle *gcp_fh = reinterpret_cast<gcp_file_handle *>(file_handle); + gcp_file_system *fs = gcp_fh->file_system; + + int ret; + + if ((ret = fs->gcp_conn->read_object(gcp_fh->name, offset, len, buf)) != 0) + std::cerr << "gcp_file_read: read attempt failed." << std::endl; + + return ret; +} + +static int gcp_object_list(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *directory, const char *prefix, char ***object_list, uint32_t *count) { |