summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2023-02-13 09:36:04 +1100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-02-12 23:24:00 +0000
commit2658f14a7b50e23e35ab42c0bedcbcc12123ab01 (patch)
tree1f343916aad4261cece355bf40021f8e0fcc4cee /src
parent4f522e492846363527e79c88cda23afbd2547bc9 (diff)
downloadmongo-2658f14a7b50e23e35ab42c0bedcbcc12123ab01.tar.gz
Import wiredtiger: d0ff1fd21fe69a00bb5a793c4a51e9747ae1e51d from branch mongodb-master
ref: 6e87e63889..d0ff1fd21f for: 7.0.0-rc0 WT-10362 Implement object_list(), file_exists(), remove(), and rename() in file system class for Azure
Diffstat (limited to 'src')
-rw-r--r--src/third_party/wiredtiger/ext/storage_sources/azure_store/azure_storage_source.cpp176
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_tiered19.py88
3 files changed, 205 insertions, 61 deletions
diff --git a/src/third_party/wiredtiger/ext/storage_sources/azure_store/azure_storage_source.cpp b/src/third_party/wiredtiger/ext/storage_sources/azure_store/azure_storage_source.cpp
index f7583b1c923..2f9d597959f 100644
--- a/src/third_party/wiredtiger/ext/storage_sources/azure_store/azure_storage_source.cpp
+++ b/src/third_party/wiredtiger/ext/storage_sources/azure_store/azure_storage_source.cpp
@@ -30,6 +30,7 @@
#include <algorithm>
#include <fstream>
#include <filesystem>
+#include <fstream>
#include <iostream>
#include <memory>
#include <mutex>
@@ -78,20 +79,17 @@ static int azure_flush_finish(
WT_STORAGE_SOURCE *, WT_SESSION *, WT_FILE_SYSTEM *, const char *, const char *, const char *);
// WT_FILE_SYSTEM Interface
-static int azure_object_list(WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, char ***,
- uint32_t *) __attribute__((__unused__));
-static int azure_object_list_single(WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *,
- char ***, uint32_t *) __attribute__((__unused__));
-static int azure_object_list_free(WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t)
- __attribute__((__unused__));
+static int azure_object_list(
+ WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, char ***, uint32_t *);
+static int azure_object_list_single(
+ WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, char ***, uint32_t *);
+static int azure_object_list_free(WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t);
+static int azure_object_list_add(char ***, const std::vector<std::string> &, const uint32_t);
static int azure_file_system_terminate(WT_FILE_SYSTEM *, WT_SESSION *);
-static int azure_file_exists(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *);
-static int azure_remove(WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t)
- __attribute__((__unused__));
-static int azure_rename(WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t)
- __attribute__((__unused__));
-static int azure_object_size(WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *)
- __attribute__((__unused__));
+static int azure_file_system_exists(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *);
+static int azure_remove(WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t);
+static int azure_rename(WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t);
+static int azure_object_size(WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *);
static int azure_file_open(
WT_FILE_SYSTEM *, WT_SESSION *, const char *, WT_FS_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **);
@@ -157,7 +155,7 @@ azure_customize_file_system(WT_STORAGE_SOURCE *storage_source, WT_SESSION *sessi
azure_fs->fs.fs_directory_list_single = azure_object_list_single;
azure_fs->fs.fs_directory_list_free = azure_object_list_free;
azure_fs->fs.terminate = azure_file_system_terminate;
- azure_fs->fs.fs_exist = azure_file_exists;
+ azure_fs->fs.fs_exist = azure_file_system_exists;
azure_fs->fs.fs_open_file = azure_file_open;
azure_fs->fs.fs_remove = azure_remove;
azure_fs->fs.fs_rename = azure_rename;
@@ -234,30 +232,17 @@ static int
azure_flush_finish(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
WT_FILE_SYSTEM *file_system, const char *source, const char *object, const char *config)
{
- azure_file_system *azure_fs = reinterpret_cast<azure_file_system *>(file_system);
+ size_t size = 0;
WT_UNUSED(storage_source);
- WT_UNUSED(session);
WT_UNUSED(config);
WT_UNUSED(source);
+ WT_UNUSED(size);
+ bool existp = false;
std::cout << "azure_flush_finish: Checking object: " << object << " exists in Azure."
<< std::endl;
-
- // Check whether the object exists in the cloud.
- bool exists_cloud = false;
- size_t size = 0;
- azure_fs->azure_conn->object_exists(object, exists_cloud, size);
- if (!exists_cloud) {
- std::cerr << "azure_flush_finish: Object: " << object << " does not exist in Azure."
- << std::endl;
- return ENOENT;
- }
- std::cout << "azure_flush_finish: Object: " << object << " exists in Azure." << std::endl;
-
- WT_UNUSED(size);
-
- return 0;
+ return azure_file_system_exists(file_system, session, object, &existp);
}
// Discard any resources on termination.
@@ -284,41 +269,102 @@ azure_terminate(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session)
return 0;
}
+// Helper to return a list of object names for the given location.
+static int
+azure_object_list_helper(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *directory,
+ const char *search_prefix, char ***dirlistp, uint32_t *countp, bool list_single)
+{
+ azure_file_system *azure_fs = reinterpret_cast<azure_file_system *>(file_system);
+ std::vector<std::string> objects;
+ std::string complete_prefix;
+
+ *countp = 0;
+
+ if (directory != nullptr) {
+ complete_prefix.append(directory);
+ // Add a terminating '/' if one doesn't exist.
+ if (complete_prefix.length() > 1 && complete_prefix.back() != '/')
+ complete_prefix.push_back('/');
+ }
+ if (search_prefix != nullptr)
+ complete_prefix.append(search_prefix);
+
+ int ret;
+
+ ret = list_single ? azure_fs->azure_conn->list_objects(complete_prefix, objects, true) :
+ azure_fs->azure_conn->list_objects(complete_prefix, objects, false);
+
+ if (ret != 0) {
+ std::cerr << "azure_object_list: list_objects request to Azure failed." << std::endl;
+ return ret;
+ }
+ *countp = objects.size();
+
+ std::cerr << "azure_object_list: list_objects request to Azure succeeded. Received " << *countp
+ << " objects." << std::endl;
+ azure_object_list_add(dirlistp, objects, *countp);
+
+ return ret;
+}
+
+// Return a list of object names for the given location.
static int
azure_object_list(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *directory,
const char *prefix, char ***dirlistp, uint32_t *countp)
{
- WT_UNUSED(file_system);
- WT_UNUSED(session);
- WT_UNUSED(directory);
- WT_UNUSED(prefix);
- WT_UNUSED(dirlistp);
- WT_UNUSED(countp);
-
- return 0;
+ return azure_object_list_helper(
+ file_system, session, directory, prefix, dirlistp, countp, false);
}
+// Return a single object name for the given location.
static int
azure_object_list_single(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *directory,
const char *prefix, char ***dirlistp, uint32_t *countp)
{
- WT_UNUSED(file_system);
- WT_UNUSED(session);
- WT_UNUSED(directory);
- WT_UNUSED(prefix);
- WT_UNUSED(dirlistp);
- WT_UNUSED(countp);
-
- return 0;
+ return azure_object_list_helper(
+ file_system, session, directory, prefix, dirlistp, countp, true);
}
+// Free memory allocated by azure_object_list.
static int
azure_object_list_free(
WT_FILE_SYSTEM *file_system, WT_SESSION *session, char **dirlist, uint32_t count)
{
WT_UNUSED(file_system);
WT_UNUSED(session);
- WT_UNUSED(dirlist);
+
+ if (dirlist != nullptr) {
+ while (count > 0)
+ free(dirlist[--count]);
+ free(dirlist);
+ }
+
+ return 0;
+}
+
+// Add objects retrieved from Azure bucket into the object list, and allocate the memory needed.
+static int
+azure_object_list_add(
+ char ***dirlistp, const std::vector<std::string> &objects, const uint32_t count)
+{
+
+ char **entries;
+ if ((entries = reinterpret_cast<char **>(malloc(sizeof(char *) * count))) == nullptr) {
+ std::cerr << "azure_object_list_add: Unable to allocate memory for object list."
+ << std::endl;
+ return ENOMEM;
+ }
+
+ // Populate entries with the object string.
+ for (int i = 0; i < count; i++) {
+ if ((entries[i] = strdup(objects[i].c_str())) == nullptr) {
+ std::cerr << "azure_object_list_add: Unable to allocate memory for object string."
+ << std::endl;
+ return ENOMEM;
+ }
+ }
+
+ *dirlistp = entries;
return 0;
}
@@ -346,25 +392,37 @@ azure_file_system_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *session)
return 0;
}
-// Check if the object (file) exists in the Azure storage source.
+// Check if the object exists in the Azure storage source.
static int
-azure_file_exists(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, bool *existp)
+azure_file_system_exists(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, bool *existp)
{
azure_file_system *azure_fs = reinterpret_cast<azure_file_system *>(file_system);
- int ret;
size_t size = 0;
WT_UNUSED(session);
WT_UNUSED(size);
+ WT_DECL_RET;
- if ((ret = azure_fs->azure_conn->object_exists(std::string(name), *existp, size)) != 0) {
- std::cerr << "azure_file_open: object_exists request to Azure failed." << std::endl;
- return ret;
- }
+ std::cout << "azure_file_system_exists: Checking object: " << name << " exists in Azure."
+ << std::endl;
+ // Check whether the object exists in the cloud.
+ WT_ERR(azure_fs->azure_conn->object_exists(name, *existp, size));
+ if (!*existp) {
+ std::cout << "azure_file_system_exists: Object: " << name << " does not exist in Azure."
+ << std::endl;
+ } else
+ std::cout << "azure_file_system_exists: Object: " << name << " exists in Azure."
+ << std::endl;
return 0;
+
+err:
+ std::cerr << "azure_file_system_exists: Error with searching for object: " << name << std::endl;
+ return ret;
}
+// POSIX remove, not supported for cloud objects.
static int
azure_remove(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, uint32_t flags)
{
@@ -373,9 +431,11 @@ azure_remove(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name,
WT_UNUSED(name);
WT_UNUSED(flags);
- return 0;
+ std::cerr << "azure_remove: Object: " << name << ": remove of file not supported." << std::endl;
+ return ENOTSUP;
}
+// POSIX rename, not supported for cloud objects.
static int
azure_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *from, const char *to,
uint32_t flags)
@@ -386,9 +446,11 @@ azure_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *from,
WT_UNUSED(to);
WT_UNUSED(flags);
- return 0;
+ std::cerr << "azure_rename: Object: " << from << ": rename of file not supported." << std::endl;
+ return ENOTSUP;
}
+// Get the size of a file in bytes, by file name.
static int
azure_object_size(
WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, wt_off_t *sizep)
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 7a0aa78bdce..a75057721eb 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-master",
- "commit": "6e87e63889b2245ce883d5a55323a1c76c3a2d81"
+ "commit": "d0ff1fd21fe69a00bb5a793c4a51e9747ae1e51d"
}
diff --git a/src/third_party/wiredtiger/test/suite/test_tiered19.py b/src/third_party/wiredtiger/test/suite/test_tiered19.py
index 75f6451b6d5..0f72987f554 100644
--- a/src/third_party/wiredtiger/test/suite/test_tiered19.py
+++ b/src/third_party/wiredtiger/test/suite/test_tiered19.py
@@ -170,8 +170,12 @@ class test_tiered19(wttest.WiredTigerTestCase, TieredConfigMixin):
ss.ss_customize_file_system(
session, self.bucket, None, self.get_fs_config(prefix_2))
- # Check fs exist for non-existing object.
- self.assertFalse(azure_fs.fs_exist(session, 'foobar'))
+ # The object doesn't exist yet.
+ try:
+ exists = azure_fs.fs_exist(session, 'foobar')
+ except:
+ self.assertEquals(azure_fs.fs_exist(session, 'foobar'), -1)
+ self.assertFalse(exists)
# We cannot use the file system to create files, it is readonly.
# So use python I/O to build up the file.
@@ -179,10 +183,25 @@ class test_tiered19(wttest.WiredTigerTestCase, TieredConfigMixin):
outbytes = ('MORE THAN ENOUGH DATA\n'*100000).encode()
f.write(outbytes)
+ # The object still doesn't exist yet.
+ try:
+ exists = azure_fs.fs_exist(session, 'foobar')
+ except:
+ self.assertEquals(azure_fs.fs_exist(session, 'foobar'), -1)
+ self.assertFalse(exists)
+
# Flush valid file into Azure.
self.assertEqual(ss.ss_flush(session, azure_fs, 'foobar', 'foobar', None), 0)
# Check that file exists in Azure.
self.assertEqual(ss.ss_flush_finish(session, azure_fs, 'foobar', 'foobar', None), 0)
+
+ # The object exists now.
+ self.assertEquals(azure_fs.fs_directory_list(session, '', prefix_1), [prefix_1 + 'foobar'])
+ try:
+ exists = azure_fs.fs_exist(session, 'foobar')
+ except:
+ self.assertEquals(azure_fs.fs_exist(session, 'foobar'), -1)
+ self.assertTrue(exists)
# Check file system exists for an existing object.
self.assertTrue(azure_fs.fs_exist(session, 'foobar'))
@@ -230,8 +249,71 @@ class test_tiered19(wttest.WiredTigerTestCase, TieredConfigMixin):
self.assertRaisesHavingMessage(wiredtiger.WiredTigerError,
lambda: ss.ss_flush(session, azure_fs, 'non_existing_file', 'non_existing_file', None), err_msg)
# Check that file does not exist in Azure.
+ self.assertEqual(ss.ss_flush_finish(session, azure_fs, 'non_existing_file', 'non_existing_file', None), 0)
+
+ # Test that the no new objects exist after failed flush.
+ self.assertEquals(azure_fs.fs_directory_list(session, '', prefix_1), [prefix_1 + 'foobar'])
+
+ err_msg = '/Exception: Operation not supported/'
+
+ # Test that POSIX Remove and Rename are not supported.
+ self.assertRaisesHavingMessage(wiredtiger.WiredTigerError,
+ lambda: azure_fs.fs_remove(session, 'foobar', 0), err_msg)
+ self.assertEquals(azure_fs.fs_directory_list(session, '', prefix_1), [prefix_1 + 'foobar'])
+
self.assertRaisesHavingMessage(wiredtiger.WiredTigerError,
- lambda: ss.ss_flush_finish(session, azure_fs, 'non_existing_file', 'non_existing_file', None), err_msg)
+ lambda: azure_fs.fs_rename(session, 'foobar', 'foobar2', 0), err_msg)
+ self.assertEquals(azure_fs.fs_directory_list(session, '', prefix_1), [prefix_1 + 'foobar'])
+
+ # Flush second valid file into Azure.
+ self.assertEqual(ss.ss_flush(session, azure_fs, 'foobar', 'foobar2', None), 0)
+ # Check that second file exists in Azure.
+ self.assertEqual(ss.ss_flush_finish(session, azure_fs, 'foobar', 'foobar2', None), 0)
+
+ # Directory list should show 2 objects in Azure.
+ self.assertEquals(azure_fs.fs_directory_list(session, '', prefix_1), [prefix_1 + 'foobar', prefix_1 + 'foobar2'])
+
+ # Directory list single should show 1 object.
+ self.assertEquals(azure_fs.fs_directory_list_single(session, '', prefix_1), [prefix_1 + 'foobar'])
+
+ # Open existing file in the cloud. Only one active file handle exists for each open file.
+ # A reference count keeps track of open file instances so we can get a pointer to the same
+ # file handle as long as there are more open file calls than close file calls (i.e. reference
+ # count is greater than 0).
+ fh_1 = azure_fs.fs_open_file(session, 'foobar', file_system.open_file_type_data, file_system.open_readonly)
+ assert(fh_1 != None)
+ fh_2 = azure_fs.fs_open_file(session, 'foobar', file_system.open_file_type_data, file_system.open_readonly)
+ assert(fh_2 != None)
+
+ # File handle lock call not used in Azure implementation.
+ self.assertEqual(fh_1.fh_lock(session, True), 0)
+ self.assertEqual(fh_1.fh_lock(session, False), 0)
+
+ # Read using a valid file handle.
+ inbytes_1 = bytes(1000000)
+ self.assertEqual(fh_1.fh_read(session, 0, inbytes_1), 0)
+ self.assertEquals(outbytes[0:1000000], inbytes_1)
+
+ # Close a valid file handle.
+ self.assertEqual(fh_1.close(session), 0)
+
+ # Read using a valid file handle.
+ inbytes_2 = bytes(1000000)
+ self.assertEqual(fh_2.fh_read(session, 0, inbytes_2), 0)
+ self.assertEquals(outbytes[0:1000000], inbytes_2)
+
+ # File size succeeds.
+ self.assertEqual(fh_1.fh_size(session), 2200000)
+
+ # Close a valid file handle.
+ self.assertEquals(fh_2.close(session), 0)
+
+ # Test that opening invalid file fails.
+ bad_file = 'bad_file'
+ err_msg = '/Exception: Invalid argument/'
+ self.assertRaisesHavingMessage(wiredtiger.WiredTigerError,
+ lambda: azure_fs.fs_open_file(session, bad_file,
+ file_system.open_file_type_data,file_system.open_readonly), err_msg)
# Test that azure file system terminate succeeds.
self.assertEqual(azure_fs.terminate(session), 0)