diff options
author | Luke Chen <luke.chen@mongodb.com> | 2022-02-22 13:40:09 +1100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-02-22 03:06:00 +0000 |
commit | bd27885ec38c93f19c3814e4e42b9e88ad1f5510 (patch) | |
tree | a0c0c5b0dae338624c31c0b176402d245ee8ef2b | |
parent | a93d373852a7c44aac99af4fe9b277eeac26d398 (diff) | |
download | mongo-bd27885ec38c93f19c3814e4e42b9e88ad1f5510.tar.gz |
Import wiredtiger: 3e81642120afab735f116d2a5a0fce811795ac2a from branch mongodb-5.3
ref: f279f2e419..3e81642120
for: 5.3.0-rc1
WT-8821 Add a get object size method in S3Connection and implement file handle and file system size
6 files changed, 76 insertions, 24 deletions
diff --git a/src/third_party/wiredtiger/ext/storage_sources/s3_store/s3_connection.cpp b/src/third_party/wiredtiger/ext/storage_sources/s3_store/s3_connection.cpp index d2e37a6ecd8..9681fa89057 100644 --- a/src/third_party/wiredtiger/ext/storage_sources/s3_store/s3_connection.cpp +++ b/src/third_party/wiredtiger/ext/storage_sources/s3_store/s3_connection.cpp @@ -134,12 +134,14 @@ S3Connection::GetObject(const std::string &objectKey, const std::string &path) c /* * ObjectExists -- - * Checks whether an object with the given key exists in the S3 bucket. + * Checks whether an object with the given key exists in the S3 bucket and also retrieves + * size of the object. */ int -S3Connection::ObjectExists(const std::string &objectKey, bool &exists) const +S3Connection::ObjectExists(const std::string &objectKey, bool &exists, size_t &objectSize) const { exists = false; + objectSize = 0; Aws::S3Crt::Model::HeadObjectRequest request; request.SetBucket(_bucketName); @@ -152,6 +154,7 @@ S3Connection::ObjectExists(const std::string &objectKey, bool &exists) const */ if (outcome.IsSuccess()) { exists = true; + objectSize = outcome.GetResult().GetContentLength(); return (0); } else if (outcome.GetError().GetResponseCode() == Aws::Http::HttpResponseCode::NOT_FOUND) return (0); diff --git a/src/third_party/wiredtiger/ext/storage_sources/s3_store/s3_connection.h b/src/third_party/wiredtiger/ext/storage_sources/s3_store/s3_connection.h index 297f5674ba5..914c8742ee1 100644 --- a/src/third_party/wiredtiger/ext/storage_sources/s3_store/s3_connection.h +++ b/src/third_party/wiredtiger/ext/storage_sources/s3_store/s3_connection.h @@ -20,7 +20,7 @@ class S3Connection { uint32_t batchSize = 1000, bool listSingle = false) const; int PutObject(const std::string &objectKey, const std::string &fileName) const; int DeleteObject(const std::string &objectKey) const; - int ObjectExists(const std::string &objectKey, bool &exists) const; + int ObjectExists(const std::string &objectKey, bool &exists, size_t &objectSize) const; int GetObject(const std::string &objectKey, const std::string &path) const; ~S3Connection() = default; diff --git a/src/third_party/wiredtiger/ext/storage_sources/s3_store/s3_storage_source.cpp b/src/third_party/wiredtiger/ext/storage_sources/s3_store/s3_storage_source.cpp index a2a0842c927..4ecf1bb1509 100644 --- a/src/third_party/wiredtiger/ext/storage_sources/s3_store/s3_storage_source.cpp +++ b/src/third_party/wiredtiger/ext/storage_sources/s3_store/s3_storage_source.cpp @@ -133,6 +133,9 @@ static int S3ObjectListFree(WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t); static void S3ShowStatistics(const S3_STATISTICS &); static int S3FileClose(WT_FILE_HANDLE *, WT_SESSION *); +static int S3FileSize(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t *); +static int S3Size(WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *); + /* * S3Path -- * Construct a pathname from the directory and the object name. @@ -160,6 +163,7 @@ S3Path(const std::string &dir, const std::string &name) static int S3Exist(WT_FILE_SYSTEM *fileSystem, WT_SESSION *session, const char *name, bool *exist) { + size_t objectSize; S3_FILE_SYSTEM *fs = (S3_FILE_SYSTEM *)fileSystem; int ret = 0; @@ -170,7 +174,7 @@ S3Exist(WT_FILE_SYSTEM *fileSystem, WT_SESSION *session, const char *name, bool /* It's not in the cache, try the S3 bucket. */ FS2S3(fileSystem)->statistics.objectExistsCount++; - if ((ret = fs->connection->ObjectExists(name, *exist)) != 0) + if ((ret = fs->connection->ObjectExists(name, *exist, objectSize)) != 0) std::cerr << "S3Exist: ObjectExists request to S3 failed." << std::endl; return (ret); } @@ -218,7 +222,7 @@ S3GetDirectory(const std::string &home, const std::string &name, bool create, st ret = stat(dirName.c_str(), &sb); if (ret != 0 && errno == ENOENT && create) { - (void)mkdir(dirName.c_str(), 0777); + mkdir(dirName.c_str(), 0777); ret = stat(dirName.c_str(), &sb); } @@ -240,24 +244,23 @@ S3FileClose(WT_FILE_HANDLE *fileHandle, WT_SESSION *session) { int ret = 0; S3_FILE_HANDLE *s3FileHandle = (S3_FILE_HANDLE *)fileHandle; - S3_STORAGE *storage = s3FileHandle->storage; + S3_STORAGE *s3 = s3FileHandle->storage; WT_FILE_HANDLE *wtFileHandle = s3FileHandle->wtFileHandle; /* * We require exclusive access to the list of file handles when removing file handles. The * lock_guard will be unlocked automatically once the scope is exited. */ { - std::lock_guard<std::mutex> lock(storage->fhMutex); - storage->fhList.remove(s3FileHandle); + std::lock_guard<std::mutex> lock(s3->fhMutex); + s3->fhList.remove(s3FileHandle); } if (wtFileHandle != NULL) { - storage->statistics.fhOps++; + s3->statistics.fhOps++; ret = wtFileHandle->close(wtFileHandle, session); } free(s3FileHandle->iface.name); free(s3FileHandle); - return (ret); } @@ -328,7 +331,7 @@ S3Open(WT_FILE_SYSTEM *fileSystem, WT_SESSION *session, const char *name, fileHandle->fh_map_preload = NULL; fileHandle->fh_unmap = NULL; fileHandle->fh_read = S3FileRead; - fileHandle->fh_size = NULL; + fileHandle->fh_size = S3FileSize; fileHandle->fh_sync = NULL; fileHandle->fh_sync_nowait = NULL; fileHandle->fh_truncate = NULL; @@ -354,6 +357,27 @@ S3Open(WT_FILE_SYSTEM *fileSystem, WT_SESSION *session, const char *name, } /* + * S3Size -- + * Get the size of a file in bytes, by file name. + */ +static int +S3Size(WT_FILE_SYSTEM *fileSystem, WT_SESSION *session, const char *name, wt_off_t *sizep) +{ + S3_STORAGE *s3 = FS2S3(fileSystem); + size_t objectSize; + bool exist; + *sizep = 0; + int ret; + + S3_FILE_SYSTEM *fs = (S3_FILE_SYSTEM *)fileSystem; + s3->statistics.objectExistsCount++; + if ((ret = fs->connection->ObjectExists(name, exist, objectSize)) != 0) + return (ret); + *sizep = objectSize; + return (ret); +} + +/* * S3FileRead -- * Read a file using WiredTiger's native file handle read. */ @@ -361,16 +385,30 @@ static int S3FileRead(WT_FILE_HANDLE *fileHandle, WT_SESSION *session, wt_off_t offset, size_t len, void *buf) { S3_FILE_HANDLE *s3FileHandle = (S3_FILE_HANDLE *)fileHandle; - S3_STORAGE *storage = s3FileHandle->storage; + S3_STORAGE *s3 = s3FileHandle->storage; WT_FILE_HANDLE *wtFileHandle = s3FileHandle->wtFileHandle; int ret; - storage->statistics.fhReadOps++; + s3->statistics.fhReadOps++; if ((ret = wtFileHandle->fh_read(wtFileHandle, session, offset, len, buf)) != 0) std::cerr << "S3FileRead: fh_read failed." << std::endl; return (ret); } /* + * S3FileSize -- + * Get the size of a file in bytes, by file handle. + */ +static int +S3FileSize(WT_FILE_HANDLE *fileHandle, WT_SESSION *session, wt_off_t *sizep) +{ + S3_FILE_HANDLE *s3FileHandle = (S3_FILE_HANDLE *)fileHandle; + S3_STORAGE *s3 = s3FileHandle->storage; + WT_FILE_HANDLE *wtFileHandle = s3FileHandle->wtFileHandle; + s3->statistics.fhOps++; + return (wtFileHandle->fh_size(wtFileHandle, session, sizep)); +} + +/* * S3CustomizeFileSystem -- * Return a customized file system to access the s3 storage source objects. */ @@ -456,6 +494,7 @@ S3CustomizeFileSystem(WT_STORAGE_SOURCE *storageSource, WT_SESSION *session, con fs->fileSystem.terminate = S3FileSystemTerminate; fs->fileSystem.fs_exist = S3Exist; fs->fileSystem.fs_open_file = S3Open; + fs->fileSystem.fs_size = S3Size; /* Add to the list of the active file systems. Lock will be freed when the scope is exited. */ { @@ -570,8 +609,8 @@ S3ObjectListSingle(WT_FILE_SYSTEM *fileSystem, WT_SESSION *session, const char * static int S3ObjectListFree(WT_FILE_SYSTEM *fileSystem, WT_SESSION *session, char **objectList, uint32_t count) { - (void)fileSystem; - (void)session; + UNUSED(fileSystem); + UNUSED(session); if (objectList != NULL) { while (count > 0) @@ -708,7 +747,7 @@ S3ShowStatistics(const S3_STATISTICS &statistics) { std::cout << "S3 list objects count: " << statistics.listObjectsCount << std::endl; std::cout << "S3 put object count: " << statistics.putObjectCount << std::endl; - std::cout << "S3 get object count: " << statistics.putObjectCount << std::endl; + std::cout << "S3 get object count: " << statistics.getObjectCount << std::endl; std::cout << "S3 object exists count: " << statistics.objectExistsCount << std::endl; std::cout << "Non read/write file handle operations: " << statistics.fhOps << std::endl; diff --git a/src/third_party/wiredtiger/ext/storage_sources/s3_store/test/test_s3_connection.cpp b/src/third_party/wiredtiger/ext/storage_sources/s3_store/test/test_s3_connection.cpp index b53783038ed..a87cc2b88f2 100644 --- a/src/third_party/wiredtiger/ext/storage_sources/s3_store/test/test_s3_connection.cpp +++ b/src/third_party/wiredtiger/ext/storage_sources/s3_store/test/test_s3_connection.cpp @@ -282,7 +282,7 @@ TestGetObject(const Aws::S3Crt::ClientConfiguration &config) } /* * TestObjectExists -- - * Unit test to check if an object exists in an AWS bucket. + * Unit test to check if an object exists in an AWS bucket and size of the object is correct. */ int TestObjectExists(const Aws::S3Crt::ClientConfiguration &config) @@ -290,25 +290,34 @@ TestObjectExists(const Aws::S3Crt::ClientConfiguration &config) S3Connection conn(config, TestDefaults::bucketName, TestDefaults::objPrefix); bool exists = false; int ret = TEST_FAILURE; + size_t objectSize; const std::string objectName = "test_object"; const std::string fileName = "test_object.txt"; /* Create a file to upload to the bucket.*/ std::ofstream File(fileName); - File << "Test payload"; + std::string payload = "Test payload"; + File << payload; File.close(); - ret = conn.ObjectExists(objectName, exists); - if (ret != 0 || exists) + if ((ret = conn.ObjectExists(objectName, exists, objectSize)) != 0) + return (ret); + if (exists || objectSize != 0) return (TEST_FAILURE); if ((ret = conn.PutObject(objectName, fileName)) != 0) return (ret); - ret = conn.ObjectExists(objectName, exists); - if (ret != 0 || !exists) + if ((ret = conn.ObjectExists(objectName, exists, objectSize)) != 0) + return (ret); + if (!exists) + return (TEST_FAILURE); + + if (objectSize != payload.length()) { + std::cerr << "TestObjectExist().objectSize failed." << std::endl; return (TEST_FAILURE); + } if ((ret = conn.DeleteObject(objectName)) != 0) return (ret); diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index cddef996df3..d128bab6d3f 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-5.3", - "commit": "f279f2e4198b8740d6f49b266440f2cf9d18a35d" + "commit": "3e81642120afab735f116d2a5a0fce811795ac2a" } diff --git a/src/third_party/wiredtiger/test/suite/test_s3_store01.py b/src/third_party/wiredtiger/test/suite/test_s3_store01.py index ed4a98b1f2e..d9c55cc54aa 100644 --- a/src/third_party/wiredtiger/test/suite/test_s3_store01.py +++ b/src/third_party/wiredtiger/test/suite/test_s3_store01.py @@ -80,12 +80,13 @@ class test_s3_store01(wttest.WiredTigerTestCase): inbytes = bytes(1000000) # An empty buffer with a million zero bytes. fh.fh_read(session, 0, inbytes) # Read into the buffer. self.assertEquals(outbytes[0:1000000], inbytes) + self.assertTrue(fs.fs_size(session, filename), len(outbytes)) + self.assertEquals(fh.fh_size(session), len(outbytes)) fh.close(session) # Checking that the file still exists in S3 after removing it from the cache. os.remove(cache_prefix + self.bucket_name + '/' + filename) self.assertTrue(fs.fs_exist(session, filename)) - file_list = [self.prefix + object_name] self.assertEquals(fs.fs_directory_list(session, None, None), file_list) |