diff options
author | Luke Chen <luke.chen@mongodb.com> | 2023-02-13 09:36:04 +1100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-02-12 23:24:00 +0000 |
commit | 2658f14a7b50e23e35ab42c0bedcbcc12123ab01 (patch) | |
tree | 1f343916aad4261cece355bf40021f8e0fcc4cee /src | |
parent | 4f522e492846363527e79c88cda23afbd2547bc9 (diff) | |
download | mongo-2658f14a7b50e23e35ab42c0bedcbcc12123ab01.tar.gz |
Import wiredtiger: d0ff1fd21fe69a00bb5a793c4a51e9747ae1e51d from branch mongodb-master
ref: 6e87e63889..d0ff1fd21f
for: 7.0.0-rc0
WT-10362 Implement object_list(), file_exists(), remove(), and rename() in file system class for Azure
Diffstat (limited to 'src')
3 files changed, 205 insertions, 61 deletions
diff --git a/src/third_party/wiredtiger/ext/storage_sources/azure_store/azure_storage_source.cpp b/src/third_party/wiredtiger/ext/storage_sources/azure_store/azure_storage_source.cpp index f7583b1c923..2f9d597959f 100644 --- a/src/third_party/wiredtiger/ext/storage_sources/azure_store/azure_storage_source.cpp +++ b/src/third_party/wiredtiger/ext/storage_sources/azure_store/azure_storage_source.cpp @@ -30,6 +30,7 @@ #include <algorithm> #include <fstream> #include <filesystem> +#include <fstream> #include <iostream> #include <memory> #include <mutex> @@ -78,20 +79,17 @@ static int azure_flush_finish( WT_STORAGE_SOURCE *, WT_SESSION *, WT_FILE_SYSTEM *, const char *, const char *, const char *); // WT_FILE_SYSTEM Interface -static int azure_object_list(WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, char ***, - uint32_t *) __attribute__((__unused__)); -static int azure_object_list_single(WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, - char ***, uint32_t *) __attribute__((__unused__)); -static int azure_object_list_free(WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t) - __attribute__((__unused__)); +static int azure_object_list( + WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, char ***, uint32_t *); +static int azure_object_list_single( + WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, char ***, uint32_t *); +static int azure_object_list_free(WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t); +static int azure_object_list_add(char ***, const std::vector<std::string> &, const uint32_t); static int azure_file_system_terminate(WT_FILE_SYSTEM *, WT_SESSION *); -static int azure_file_exists(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *); -static int azure_remove(WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t) - __attribute__((__unused__)); -static int azure_rename(WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t) - __attribute__((__unused__)); -static int azure_object_size(WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *) - __attribute__((__unused__)); +static int azure_file_system_exists(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *); +static int azure_remove(WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t); +static int azure_rename(WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t); +static int azure_object_size(WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *); static int azure_file_open( WT_FILE_SYSTEM *, WT_SESSION *, const char *, WT_FS_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **); @@ -157,7 +155,7 @@ azure_customize_file_system(WT_STORAGE_SOURCE *storage_source, WT_SESSION *sessi azure_fs->fs.fs_directory_list_single = azure_object_list_single; azure_fs->fs.fs_directory_list_free = azure_object_list_free; azure_fs->fs.terminate = azure_file_system_terminate; - azure_fs->fs.fs_exist = azure_file_exists; + azure_fs->fs.fs_exist = azure_file_system_exists; azure_fs->fs.fs_open_file = azure_file_open; azure_fs->fs.fs_remove = azure_remove; azure_fs->fs.fs_rename = azure_rename; @@ -234,30 +232,17 @@ static int azure_flush_finish(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session, WT_FILE_SYSTEM *file_system, const char *source, const char *object, const char *config) { - azure_file_system *azure_fs = reinterpret_cast<azure_file_system *>(file_system); + size_t size = 0; WT_UNUSED(storage_source); - WT_UNUSED(session); WT_UNUSED(config); WT_UNUSED(source); + WT_UNUSED(size); + bool existp = false; std::cout << "azure_flush_finish: Checking object: " << object << " exists in Azure." << std::endl; - - // Check whether the object exists in the cloud. - bool exists_cloud = false; - size_t size = 0; - azure_fs->azure_conn->object_exists(object, exists_cloud, size); - if (!exists_cloud) { - std::cerr << "azure_flush_finish: Object: " << object << " does not exist in Azure." - << std::endl; - return ENOENT; - } - std::cout << "azure_flush_finish: Object: " << object << " exists in Azure." << std::endl; - - WT_UNUSED(size); - - return 0; + return azure_file_system_exists(file_system, session, object, &existp); } // Discard any resources on termination. @@ -284,41 +269,102 @@ azure_terminate(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session) return 0; } +// Helper to return a list of object names for the given location. +static int +azure_object_list_helper(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *directory, + const char *search_prefix, char ***dirlistp, uint32_t *countp, bool list_single) +{ + azure_file_system *azure_fs = reinterpret_cast<azure_file_system *>(file_system); + std::vector<std::string> objects; + std::string complete_prefix; + + *countp = 0; + + if (directory != nullptr) { + complete_prefix.append(directory); + // Add a terminating '/' if one doesn't exist. + if (complete_prefix.length() > 1 && complete_prefix.back() != '/') + complete_prefix.push_back('/'); + } + if (search_prefix != nullptr) + complete_prefix.append(search_prefix); + + int ret; + + ret = list_single ? azure_fs->azure_conn->list_objects(complete_prefix, objects, true) : + azure_fs->azure_conn->list_objects(complete_prefix, objects, false); + + if (ret != 0) { + std::cerr << "azure_object_list: list_objects request to Azure failed." << std::endl; + return ret; + } + *countp = objects.size(); + + std::cerr << "azure_object_list: list_objects request to Azure succeeded. Received " << *countp + << " objects." << std::endl; + azure_object_list_add(dirlistp, objects, *countp); + + return ret; +} + +// Return a list of object names for the given location. static int azure_object_list(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) { - WT_UNUSED(file_system); - WT_UNUSED(session); - WT_UNUSED(directory); - WT_UNUSED(prefix); - WT_UNUSED(dirlistp); - WT_UNUSED(countp); - - return 0; + return azure_object_list_helper( + file_system, session, directory, prefix, dirlistp, countp, false); } +// Return a single object name for the given location. static int azure_object_list_single(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) { - WT_UNUSED(file_system); - WT_UNUSED(session); - WT_UNUSED(directory); - WT_UNUSED(prefix); - WT_UNUSED(dirlistp); - WT_UNUSED(countp); - - return 0; + return azure_object_list_helper( + file_system, session, directory, prefix, dirlistp, countp, true); } +// Free memory allocated by azure_object_list. static int azure_object_list_free( WT_FILE_SYSTEM *file_system, WT_SESSION *session, char **dirlist, uint32_t count) { WT_UNUSED(file_system); WT_UNUSED(session); - WT_UNUSED(dirlist); + + if (dirlist != nullptr) { + while (count > 0) + free(dirlist[--count]); + free(dirlist); + } + + return 0; +} + +// Add objects retrieved from Azure bucket into the object list, and allocate the memory needed. +static int +azure_object_list_add( + char ***dirlistp, const std::vector<std::string> &objects, const uint32_t count) +{ + + char **entries; + if ((entries = reinterpret_cast<char **>(malloc(sizeof(char *) * count))) == nullptr) { + std::cerr << "azure_object_list_add: Unable to allocate memory for object list." + << std::endl; + return ENOMEM; + } + + // Populate entries with the object string. + for (int i = 0; i < count; i++) { + if ((entries[i] = strdup(objects[i].c_str())) == nullptr) { + std::cerr << "azure_object_list_add: Unable to allocate memory for object string." + << std::endl; + return ENOMEM; + } + } + + *dirlistp = entries; return 0; } @@ -346,25 +392,37 @@ azure_file_system_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *session) return 0; } -// Check if the object (file) exists in the Azure storage source. +// Check if the object exists in the Azure storage source. static int -azure_file_exists(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, bool *existp) +azure_file_system_exists( + WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, bool *existp) { azure_file_system *azure_fs = reinterpret_cast<azure_file_system *>(file_system); - int ret; size_t size = 0; WT_UNUSED(session); WT_UNUSED(size); + WT_DECL_RET; - if ((ret = azure_fs->azure_conn->object_exists(std::string(name), *existp, size)) != 0) { - std::cerr << "azure_file_open: object_exists request to Azure failed." << std::endl; - return ret; - } + std::cout << "azure_file_system_exists: Checking object: " << name << " exists in Azure." + << std::endl; + // Check whether the object exists in the cloud. + WT_ERR(azure_fs->azure_conn->object_exists(name, *existp, size)); + if (!*existp) { + std::cout << "azure_file_system_exists: Object: " << name << " does not exist in Azure." + << std::endl; + } else + std::cout << "azure_file_system_exists: Object: " << name << " exists in Azure." + << std::endl; return 0; + +err: + std::cerr << "azure_file_system_exists: Error with searching for object: " << name << std::endl; + return ret; } +// POSIX remove, not supported for cloud objects. static int azure_remove(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, uint32_t flags) { @@ -373,9 +431,11 @@ azure_remove(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, WT_UNUSED(name); WT_UNUSED(flags); - return 0; + std::cerr << "azure_remove: Object: " << name << ": remove of file not supported." << std::endl; + return ENOTSUP; } +// POSIX rename, not supported for cloud objects. static int azure_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *from, const char *to, uint32_t flags) @@ -386,9 +446,11 @@ azure_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *from, WT_UNUSED(to); WT_UNUSED(flags); - return 0; + std::cerr << "azure_rename: Object: " << from << ": rename of file not supported." << std::endl; + return ENOTSUP; } +// Get the size of a file in bytes, by file name. static int azure_object_size( WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, wt_off_t *sizep) diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 7a0aa78bdce..a75057721eb 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-master", - "commit": "6e87e63889b2245ce883d5a55323a1c76c3a2d81" + "commit": "d0ff1fd21fe69a00bb5a793c4a51e9747ae1e51d" } diff --git a/src/third_party/wiredtiger/test/suite/test_tiered19.py b/src/third_party/wiredtiger/test/suite/test_tiered19.py index 75f6451b6d5..0f72987f554 100644 --- a/src/third_party/wiredtiger/test/suite/test_tiered19.py +++ b/src/third_party/wiredtiger/test/suite/test_tiered19.py @@ -170,8 +170,12 @@ class test_tiered19(wttest.WiredTigerTestCase, TieredConfigMixin): ss.ss_customize_file_system( session, self.bucket, None, self.get_fs_config(prefix_2)) - # Check fs exist for non-existing object. - self.assertFalse(azure_fs.fs_exist(session, 'foobar')) + # The object doesn't exist yet. + try: + exists = azure_fs.fs_exist(session, 'foobar') + except: + self.assertEquals(azure_fs.fs_exist(session, 'foobar'), -1) + self.assertFalse(exists) # We cannot use the file system to create files, it is readonly. # So use python I/O to build up the file. @@ -179,10 +183,25 @@ class test_tiered19(wttest.WiredTigerTestCase, TieredConfigMixin): outbytes = ('MORE THAN ENOUGH DATA\n'*100000).encode() f.write(outbytes) + # The object still doesn't exist yet. + try: + exists = azure_fs.fs_exist(session, 'foobar') + except: + self.assertEquals(azure_fs.fs_exist(session, 'foobar'), -1) + self.assertFalse(exists) + # Flush valid file into Azure. self.assertEqual(ss.ss_flush(session, azure_fs, 'foobar', 'foobar', None), 0) # Check that file exists in Azure. self.assertEqual(ss.ss_flush_finish(session, azure_fs, 'foobar', 'foobar', None), 0) + + # The object exists now. + self.assertEquals(azure_fs.fs_directory_list(session, '', prefix_1), [prefix_1 + 'foobar']) + try: + exists = azure_fs.fs_exist(session, 'foobar') + except: + self.assertEquals(azure_fs.fs_exist(session, 'foobar'), -1) + self.assertTrue(exists) # Check file system exists for an existing object. self.assertTrue(azure_fs.fs_exist(session, 'foobar')) @@ -230,8 +249,71 @@ class test_tiered19(wttest.WiredTigerTestCase, TieredConfigMixin): self.assertRaisesHavingMessage(wiredtiger.WiredTigerError, lambda: ss.ss_flush(session, azure_fs, 'non_existing_file', 'non_existing_file', None), err_msg) # Check that file does not exist in Azure. + self.assertEqual(ss.ss_flush_finish(session, azure_fs, 'non_existing_file', 'non_existing_file', None), 0) + + # Test that the no new objects exist after failed flush. + self.assertEquals(azure_fs.fs_directory_list(session, '', prefix_1), [prefix_1 + 'foobar']) + + err_msg = '/Exception: Operation not supported/' + + # Test that POSIX Remove and Rename are not supported. + self.assertRaisesHavingMessage(wiredtiger.WiredTigerError, + lambda: azure_fs.fs_remove(session, 'foobar', 0), err_msg) + self.assertEquals(azure_fs.fs_directory_list(session, '', prefix_1), [prefix_1 + 'foobar']) + self.assertRaisesHavingMessage(wiredtiger.WiredTigerError, - lambda: ss.ss_flush_finish(session, azure_fs, 'non_existing_file', 'non_existing_file', None), err_msg) + lambda: azure_fs.fs_rename(session, 'foobar', 'foobar2', 0), err_msg) + self.assertEquals(azure_fs.fs_directory_list(session, '', prefix_1), [prefix_1 + 'foobar']) + + # Flush second valid file into Azure. + self.assertEqual(ss.ss_flush(session, azure_fs, 'foobar', 'foobar2', None), 0) + # Check that second file exists in Azure. + self.assertEqual(ss.ss_flush_finish(session, azure_fs, 'foobar', 'foobar2', None), 0) + + # Directory list should show 2 objects in Azure. + self.assertEquals(azure_fs.fs_directory_list(session, '', prefix_1), [prefix_1 + 'foobar', prefix_1 + 'foobar2']) + + # Directory list single should show 1 object. + self.assertEquals(azure_fs.fs_directory_list_single(session, '', prefix_1), [prefix_1 + 'foobar']) + + # Open existing file in the cloud. Only one active file handle exists for each open file. + # A reference count keeps track of open file instances so we can get a pointer to the same + # file handle as long as there are more open file calls than close file calls (i.e. reference + # count is greater than 0). + fh_1 = azure_fs.fs_open_file(session, 'foobar', file_system.open_file_type_data, file_system.open_readonly) + assert(fh_1 != None) + fh_2 = azure_fs.fs_open_file(session, 'foobar', file_system.open_file_type_data, file_system.open_readonly) + assert(fh_2 != None) + + # File handle lock call not used in Azure implementation. + self.assertEqual(fh_1.fh_lock(session, True), 0) + self.assertEqual(fh_1.fh_lock(session, False), 0) + + # Read using a valid file handle. + inbytes_1 = bytes(1000000) + self.assertEqual(fh_1.fh_read(session, 0, inbytes_1), 0) + self.assertEquals(outbytes[0:1000000], inbytes_1) + + # Close a valid file handle. + self.assertEqual(fh_1.close(session), 0) + + # Read using a valid file handle. + inbytes_2 = bytes(1000000) + self.assertEqual(fh_2.fh_read(session, 0, inbytes_2), 0) + self.assertEquals(outbytes[0:1000000], inbytes_2) + + # File size succeeds. + self.assertEqual(fh_1.fh_size(session), 2200000) + + # Close a valid file handle. + self.assertEquals(fh_2.close(session), 0) + + # Test that opening invalid file fails. + bad_file = 'bad_file' + err_msg = '/Exception: Invalid argument/' + self.assertRaisesHavingMessage(wiredtiger.WiredTigerError, + lambda: azure_fs.fs_open_file(session, bad_file, + file_system.open_file_type_data,file_system.open_readonly), err_msg) # Test that azure file system terminate succeeds. self.assertEqual(azure_fs.terminate(session), 0) |