summaryrefslogtreecommitdiff
path: root/qpid/cpp/src/qpid/linearstore/journal
diff options
context:
space:
mode:
Diffstat (limited to 'qpid/cpp/src/qpid/linearstore/journal')
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/AtomicCounter.h133
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/Checksum.cpp45
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/Checksum.h54
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/EmptyFilePool.cpp477
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/EmptyFilePool.h118
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolManager.cpp211
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolManager.h83
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolPartition.cpp199
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolPartition.h82
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolTypes.h57
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/JournalFile.cpp349
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/JournalFile.h132
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/JournalLog.cpp63
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/JournalLog.h60
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/LinearFileController.cpp243
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/LinearFileController.h119
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp949
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.h157
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/aio.h201
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/aio_callback.h44
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/data_tok.cpp136
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/data_tok.h133
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/deq_rec.cpp313
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/deq_rec.h70
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/enq_map.cpp181
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/enq_map.h101
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/enq_rec.cpp397
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/enq_rec.h74
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/enums.h58
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/jcfg.h72
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/jcntl.cpp440
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/jcntl.h570
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/jdir.cpp457
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/jdir.h362
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp236
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/jerrno.h157
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/jexception.cpp168
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/jexception.h125
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/jrec.h122
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/pmgr.cpp192
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/pmgr.h119
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/slock.h71
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/smutex.h51
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/time_ns.cpp41
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/time_ns.h92
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/txn_map.cpp263
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/txn_map.h150
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/txn_rec.cpp305
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/txn_rec.h68
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/utils/deq_hdr.c46
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/utils/deq_hdr.h83
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/utils/enq_hdr.c63
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/utils/enq_hdr.h83
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/utils/file_hdr.c115
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/utils/file_hdr.h111
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/utils/rec_hdr.c51
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/utils/rec_hdr.h72
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/utils/rec_tail.c46
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/utils/rec_tail.h82
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/utils/txn_hdr.c33
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/utils/txn_hdr.h72
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/wmgr.cpp1086
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/wmgr.h156
63 files changed, 11399 insertions, 0 deletions
diff --git a/qpid/cpp/src/qpid/linearstore/journal/AtomicCounter.h b/qpid/cpp/src/qpid/linearstore/journal/AtomicCounter.h
new file mode 100644
index 0000000000..73e5fecf93
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/AtomicCounter.h
@@ -0,0 +1,133 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_ATOMICCOUNTER_H_
+#define QPID_LINEARSTORE_JOURNAL_ATOMICCOUNTER_H_
+
+#include "qpid/linearstore/journal/slock.h"
+#include <string>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+template <class T>
+class AtomicCounter
+{
+private:
+ std::string id_;
+ T count_;
+ mutable smutex countMutex;
+
+public:
+ AtomicCounter(const std::string& id, const T& initValue) : id_(id), count_(initValue) {}
+
+ virtual ~AtomicCounter() {}
+
+ T get() const {
+ slock l(countMutex);
+ return count_;
+ }
+
+ void set(const T v) {
+ slock l(countMutex);
+ count_ = v;
+ }
+
+ T increment() {
+ slock l(countMutex);
+ return ++count_;
+ }
+
+ T add(const T& a) {
+ slock l(countMutex);
+ count_ += a;
+ return count_;
+ }
+
+ T addLimit(const T& a, const T& limit, const uint32_t jerr) {
+ slock l(countMutex);
+ if (count_ + a > limit) throw jexception(jerr, id_, "AtomicCounter", "addLimit");
+ count_ += a;
+ return count_;
+ }
+
+ T decrement() {
+ slock l(countMutex);
+ return --count_;
+ }
+
+ T decrementLimit(const T& limit = T(0), const uint32_t jerr = jerrno::JERR__UNDERFLOW) {
+ slock l(countMutex);
+ if (count_ < limit + 1) {
+ throw jexception(jerr, id_, "AtomicCounter", "decrementLimit");
+ }
+ return --count_;
+ }
+
+ T subtract(const T& s) {
+ slock l(countMutex);
+ count_ -= s;
+ return count_;
+ }
+
+ T subtractLimit(const T& s, const T& limit = T(0), const uint32_t jerr = jerrno::JERR__UNDERFLOW) {
+ slock l(countMutex);
+ if (count_ < limit + s) throw jexception(jerr, id_, "AtomicCounter", "subtractLimit");
+ count_ -= s;
+ return count_;
+ }
+
+ bool operator==(const T& o) const {
+ slock l(countMutex);
+ return count_ == o;
+ }
+
+ bool operator<(const T& o) const {
+ slock l(countMutex);
+ return count_ < o;
+ }
+
+ bool operator<=(const T& o) const {
+ slock l(countMutex);
+ return count_ <= o;
+ }
+
+ friend T operator-(const T& a, const AtomicCounter& b) {
+ slock l(b.countMutex);
+ return a - b.count_;
+ }
+
+ friend T operator-(const AtomicCounter& a, const T& b) {
+ slock l(a.countMutex);
+ return a.count_ - b;
+ }
+
+ friend T operator-(const AtomicCounter&a, const AtomicCounter& b) {
+ slock l1(a.countMutex);
+ slock l2(b.countMutex);
+ return a.count_ - b.count_;
+ }
+};
+
+}}} // namespace qpid::qls_jrnl
+
+#endif // QPID_LINEARSTORE_JOURNAL_ATOMICCOUNTER_H_
diff --git a/qpid/cpp/src/qpid/linearstore/journal/Checksum.cpp b/qpid/cpp/src/qpid/linearstore/journal/Checksum.cpp
new file mode 100644
index 0000000000..eaede12d8e
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/Checksum.cpp
@@ -0,0 +1,45 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/Checksum.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+Checksum::Checksum() : a(1UL), b(0UL), MOD_ADLER(65521UL) {}
+
+Checksum::~Checksum() {}
+
+void Checksum::addData(const unsigned char* data, const std::size_t len) {
+ if (data) {
+ for (uint32_t i = 0; i < len; i++) {
+ a = (a + data[i]) % MOD_ADLER;
+ b = (a + b) % MOD_ADLER;
+ }
+ }
+}
+
+uint32_t Checksum::getChecksum() {
+ return (b << 16) | a;
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/Checksum.h b/qpid/cpp/src/qpid/linearstore/journal/Checksum.h
new file mode 100644
index 0000000000..d96aac2991
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/Checksum.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_CHECKSUM_H_
+#define QPID_LINEARSTORE_JOURNAL_CHECKSUM_H_
+
+#include <cstddef>
+#include <stdint.h>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+/*
+ * This checksum routine uses the Adler-32 algorithm as described in
+ * http://en.wikipedia.org/wiki/Adler-32. It is structured so that the
+ * data for which the checksum must be calculated can be added in several
+ * stages through the addData() function, and when complete, the checksum
+ * is obtained through a call to getChecksum().
+ */
+class Checksum
+{
+private:
+ uint32_t a;
+ uint32_t b;
+ const uint32_t MOD_ADLER;
+public:
+ Checksum();
+ virtual ~Checksum();
+ void addData(const unsigned char* data, const std::size_t len);
+ uint32_t getChecksum();
+};
+
+}}}
+
+#endif // QPID_LINEARSTORE_JOURNAL_CHECKSUM_H_
diff --git a/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePool.cpp b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePool.cpp
new file mode 100644
index 0000000000..08db3f75bd
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePool.cpp
@@ -0,0 +1,477 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "EmptyFilePool.h"
+
+#include <fstream>
+#include "qpid/linearstore/journal/EmptyFilePoolPartition.h"
+#include "qpid/linearstore/journal/jcfg.h"
+#include "qpid/linearstore/journal/jdir.h"
+#include "qpid/linearstore/journal/JournalLog.h"
+#include "qpid/linearstore/journal/slock.h"
+#include "qpid/linearstore/journal/utils/file_hdr.h"
+#include "qpid/types/Uuid.h"
+#include <sys/stat.h>
+#include <unistd.h>
+#include <vector>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+// static
+std::string EmptyFilePool::s_inuseFileDirectory_ = "in_use";
+
+// static
+std::string EmptyFilePool::s_returnedFileDirectory_ = "returned";
+
+EmptyFilePool::EmptyFilePool(const std::string& efpDirectory,
+ const EmptyFilePoolPartition* partitionPtr,
+ const bool overwriteBeforeReturnFlag,
+ const bool truncateFlag,
+ JournalLog& journalLogRef) :
+ efpDirectory_(efpDirectory),
+ efpDataSize_kib_(dataSizeFromDirName_kib(efpDirectory, partitionPtr->getPartitionNumber())),
+ partitionPtr_(partitionPtr),
+ overwriteBeforeReturnFlag_(overwriteBeforeReturnFlag),
+ truncateFlag_(truncateFlag),
+ journalLogRef_(journalLogRef)
+{}
+
+EmptyFilePool::~EmptyFilePool() {}
+
+void EmptyFilePool::initialize() {
+ if (::mkdir(efpDirectory_.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH)) { // Create EFP dir if it does not yet exist
+ if (errno != EEXIST) {
+ std::ostringstream oss;
+ oss << "directory=" << efpDirectory_ << " " << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_EFP_MKDIR, oss.str(), "EmptyFilePool", "initialize");
+ }
+ }
+
+ // Process empty files in main dir
+ std::vector<std::string> dirList;
+ jdir::read_dir(efpDirectory_, dirList, false, true, false, false);
+ for (std::vector<std::string>::iterator i = dirList.begin(); i != dirList.end(); ++i) {
+ size_t dotPos = i->rfind(".");
+ if (dotPos != std::string::npos) {
+ if (i->substr(dotPos).compare(".jrnl") == 0 && i->length() == 41) {
+ std::string emptyFileName(efpDirectory_ + "/" + (*i));
+ if (validateEmptyFile(emptyFileName)) {
+ pushEmptyFile(emptyFileName);
+ }
+ }
+ }
+ }
+
+ // Create 'in_use' and 'returned' subdirs if they don't already exist
+ // Retern files to EFP in 'in_use' and 'returned' subdirs if they do exist
+ initializeSubDirectory(efpDirectory_ + "/" + s_inuseFileDirectory_);
+ initializeSubDirectory(efpDirectory_ + "/" + s_returnedFileDirectory_);
+}
+
+efpDataSize_kib_t EmptyFilePool::dataSize_kib() const {
+ return efpDataSize_kib_;
+}
+
+efpFileSize_kib_t EmptyFilePool::fileSize_kib() const {
+ return efpDataSize_kib_ + (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_KIB);
+}
+
+efpDataSize_sblks_t EmptyFilePool::dataSize_sblks() const {
+ return efpDataSize_kib_ / QLS_SBLK_SIZE_KIB;
+}
+
+efpFileSize_sblks_t EmptyFilePool::fileSize_sblks() const {
+ return (efpDataSize_kib_ / QLS_SBLK_SIZE_KIB) + QLS_JRNL_FHDR_RES_SIZE_SBLKS;
+}
+
+efpFileCount_t EmptyFilePool::numEmptyFiles() const {
+ slock l(emptyFileListMutex_);
+ return efpFileCount_t(emptyFileList_.size());
+}
+
+efpDataSize_kib_t EmptyFilePool::cumFileSize_kib() const {
+ slock l(emptyFileListMutex_);
+ return efpDataSize_kib_t(emptyFileList_.size()) * efpDataSize_kib_;
+}
+
+efpPartitionNumber_t EmptyFilePool::getPartitionNumber() const {
+ return partitionPtr_->getPartitionNumber();
+}
+
+const EmptyFilePoolPartition* EmptyFilePool::getPartition() const {
+ return partitionPtr_;
+}
+
+const efpIdentity_t EmptyFilePool::getIdentity() const {
+ return efpIdentity_t(partitionPtr_->getPartitionNumber(), efpDataSize_kib_);
+}
+
+std::string EmptyFilePool::takeEmptyFile(const std::string& destDirectory) {
+ std::string emptyFileName = popEmptyFile();
+ std::string newFileName = efpDirectory_ + "/" + s_inuseFileDirectory_ + emptyFileName.substr(emptyFileName.rfind('/')); // NOTE: substr() includes leading '/'
+ std::string symlinkName = destDirectory + emptyFileName.substr(emptyFileName.rfind('/')); // NOTE: substr() includes leading '/'
+ if (!moveFile(emptyFileName, newFileName)) {
+ // Try again with new UUID for file name
+ newFileName = efpDirectory_ + "/" + s_inuseFileDirectory_ + "/" + getEfpFileName();
+ if (!moveFile(emptyFileName, newFileName)) {
+//std::cerr << "*** DEBUG: pushEmptyFile " << emptyFileName << "from EmptyFilePool::takeEmptyFile()" << std::endl; // DEBUG
+ pushEmptyFile(emptyFileName); // Return empty file to pool
+ std::ostringstream oss;
+ oss << "file=\"" << emptyFileName << "\" dest=\"" << newFileName << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JDIR_FMOVE, oss.str(), "EmptyFilePool", "takeEmptyFile");
+ }
+ }
+ if (createSymLink(newFileName, symlinkName)) {
+ std::ostringstream oss;
+ oss << "file=\"" << emptyFileName << "\" dest=\"" << newFileName << "\" symlink=\"" << symlinkName << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR__SYMLINK, oss.str(), "EmptyFilePool", "takeEmptyFile");
+ }
+ return symlinkName;
+}
+
+void EmptyFilePool::returnEmptyFileSymlink(const std::string& emptyFileSymlink) {
+ if (isFile(emptyFileSymlink)) {
+ returnEmptyFile(emptyFileSymlink);
+ } else if(isSymlink(emptyFileSymlink)) {
+ returnEmptyFile(deleteSymlink(emptyFileSymlink));
+ } else {
+ std::ostringstream oss;
+ oss << "File \"" << emptyFileSymlink << "\" is neither a file nor a symlink";
+ throw jexception(jerrno::JERR_EFP_BADFILETYPE, oss.str(), "EmptyFilePool", "returnEmptyFileSymlink");
+ }
+}
+
+//static
+std::string EmptyFilePool::dirNameFromDataSize(const efpDataSize_kib_t efpDataSize_kib) {
+ std::ostringstream oss;
+ oss << efpDataSize_kib << "k";
+ return oss.str();
+}
+
+
+// static
+efpDataSize_kib_t EmptyFilePool::dataSizeFromDirName_kib(const std::string& dirName,
+ const efpPartitionNumber_t partitionNumber) {
+ // Check for dirName format 'NNNk', where NNN is a number, convert NNN into an integer. NNN cannot be 0.
+ std::string n(dirName.substr(dirName.rfind('/')+1));
+ bool valid = true;
+ for (uint16_t charNum = 0; charNum < n.length(); ++charNum) {
+ if (charNum < n.length()-1) {
+ if (!::isdigit((int)n[charNum])) {
+ valid = false;
+ break;
+ }
+ } else {
+ valid = n[charNum] == 'k';
+ }
+ }
+ efpDataSize_kib_t s = ::atol(n.c_str());
+ if (!valid || s == 0 || s % QLS_SBLK_SIZE_KIB != 0) {
+ std::ostringstream oss;
+ oss << "Partition: " << partitionNumber << "; EFP directory: \'" << n << "\'";
+ throw jexception(jerrno::JERR_EFP_BADEFPDIRNAME, oss.str(), "EmptyFilePool", "fileSizeKbFromDirName");
+ }
+ return s;
+}
+
+// --- protected functions ---
+void EmptyFilePool::checkIosState(std::ofstream& ofs,
+ const uint32_t jerrno,
+ const std::string& fqFileName,
+ const std::string& operation,
+ const std::string& errorMessage,
+ const std::string& className,
+ const std::string& fnName) {
+ if (!ofs.good()) {
+ if (ofs.is_open()) {
+ ofs.close();
+ }
+ std::ostringstream oss;
+ oss << "IO failure: eofbit=" << (ofs.eof()?"T":"F") << " failbit=" << (ofs.fail()?"T":"F") << " badbit="
+ << (ofs.bad()?"T":"F") << " file=" << fqFileName << " operation=" << operation << ": " << errorMessage;
+ throw jexception(jerrno, oss.str(), className, fnName);
+ }
+}
+
+std::string EmptyFilePool::createEmptyFile() {
+ std::string efpfn = getEfpFileName();
+ overwriteFileContents(efpfn);
+ return efpfn;
+}
+
+std::string EmptyFilePool::getEfpFileName() {
+ qpid::types::Uuid uuid(true);
+ std::ostringstream oss;
+ oss << efpDirectory_ << "/" << uuid << QLS_JRNL_FILE_EXTENSION;
+ return oss.str();
+}
+
+void EmptyFilePool::initializeSubDirectory(const std::string& fqDirName) {
+ std::vector<std::string> dirList;
+ if (jdir::exists(fqDirName)) {
+ if (truncateFlag_) {
+ jdir::read_dir(fqDirName, dirList, false, true, false, false);
+ for (std::vector<std::string>::iterator i = dirList.begin(); i != dirList.end(); ++i) {
+ size_t dotPos = i->rfind(".");
+ if (i->substr(dotPos).compare(".jrnl") == 0 && i->length() == 41) {
+ returnEmptyFile(fqDirName + "/" + (*i));
+ } else {
+ std::ostringstream oss;
+ oss << "File \'" << *i << "\' was not a journal file and was not returned to EFP.";
+ journalLogRef_.log(JournalLog::LOG_WARN, oss.str());
+ }
+ }
+ }
+ } else {
+ jdir::create_dir(fqDirName);
+ }
+}
+
+void EmptyFilePool::overwriteFileContents(const std::string& fqFileName) {
+ ::file_hdr_t fh;
+ ::file_hdr_create(&fh, QLS_FILE_MAGIC, QLS_JRNL_VERSION, QLS_JRNL_FHDR_RES_SIZE_SBLKS, partitionPtr_->getPartitionNumber(), efpDataSize_kib_);
+ std::ofstream ofs(fqFileName.c_str(), std::ofstream::out | std::ofstream::binary);
+ checkIosState(ofs, jerrno::JERR_EFP_FOPEN, fqFileName, "constructor", "Failed to create file", "EmptyFilePool", "overwriteFileContents");
+ ofs.write((char*)&fh, sizeof(::file_hdr_t));
+ checkIosState(ofs, jerrno::JERR_EFP_FWRITE, fqFileName, "write()", "Failed to write header", "EmptyFilePool", "overwriteFileContents");
+ uint64_t rem = ((efpDataSize_kib_ + (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_KIB)) * 1024) - sizeof(::file_hdr_t);
+ while (rem--) {
+ ofs.put('\0');
+ checkIosState(ofs, jerrno::JERR_EFP_FWRITE, fqFileName, "put()", "Failed to put \0", "EmptyFilePool", "overwriteFileContents");
+ }
+ ofs.close();
+//std::cout << "*** WARNING: EFP " << efpDirectory_ << " is empty - created new journal file " << fqFileName.substr(fqFileName.rfind('/') + 1) << " on the fly" << std::endl; // DEBUG
+}
+
+std::string EmptyFilePool::popEmptyFile() {
+ std::string emptyFileName;
+ bool listEmptyFlag;
+ {
+ slock l(emptyFileListMutex_);
+ listEmptyFlag = emptyFileList_.empty();
+ if (!listEmptyFlag) {
+ emptyFileName = emptyFileList_.front();
+ emptyFileList_.pop_front();
+ }
+ }
+ // If the list is empty, create a new file and return the file name.
+ if (listEmptyFlag) {
+ emptyFileName = createEmptyFile();
+ }
+ return emptyFileName;
+}
+
+void EmptyFilePool::pushEmptyFile(const std::string fqFileName) {
+ slock l(emptyFileListMutex_);
+ emptyFileList_.push_back(fqFileName);
+}
+
+void EmptyFilePool::returnEmptyFile(const std::string& emptyFileName) {
+ std::string returnedFileName = efpDirectory_ + "/" + s_returnedFileDirectory_ + emptyFileName.substr(emptyFileName.rfind('/')); // NOTE: substr() includes leading '/'
+ if (!moveFile(emptyFileName, returnedFileName)) {
+ ::unlink(emptyFileName.c_str());
+//std::cerr << "*** WARNING: Unable to move file " << emptyFileName << " to " << returnedFileName << "; deleted." << std::endl; // DEBUG
+ }
+
+ // TODO: On a separate thread, process returned files by overwriting headers and, optionally, their contents and
+ // returning them to the EFP directory
+ resetEmptyFileHeader(returnedFileName);
+ if (overwriteBeforeReturnFlag_) {
+ overwriteFileContents(returnedFileName);
+ }
+ std::string sanitizedEmptyFileName = efpDirectory_ + returnedFileName.substr(returnedFileName.rfind('/')); // NOTE: substr() includes leading '/'
+ if (!moveFile(returnedFileName, sanitizedEmptyFileName)) {
+ ::unlink(returnedFileName.c_str());
+//std::cerr << "*** WARNING: Unable to move file " << returnedFileName << " to " << sanitizedEmptyFileName << "; deleted." << std::endl; // DEBUG
+ } else {
+ pushEmptyFile(sanitizedEmptyFileName);
+ }
+}
+
+void EmptyFilePool::resetEmptyFileHeader(const std::string& fqFileName) {
+ std::fstream fs(fqFileName.c_str(), std::fstream::in | std::fstream::out | std::fstream::binary);
+ if (fs.good()) {
+ const std::streamsize buffsize = QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES;
+ char buff[buffsize];
+ fs.read((char*)buff, buffsize);
+ std::streampos bytesRead = fs.tellg();
+ if (std::streamoff(bytesRead) == buffsize) {
+ ::file_hdr_reset((::file_hdr_t*)buff);
+ // set rest of buffer to 0
+ ::memset(buff + sizeof(::file_hdr_t), 0, MAX_FILE_HDR_LEN - sizeof(::file_hdr_t));
+ fs.seekp(0, std::fstream::beg);
+ fs.write(buff, buffsize);
+ std::streampos bytesWritten = fs.tellp();
+ if (std::streamoff(bytesWritten) != buffsize) {
+//std::cerr << "*** ERROR: Unable to write file header of file \"" << fqFileName << "\": tried to write " << buffsize << " bytes; wrote " << bytesWritten << " bytes." << std::endl; // DEBUG
+ }
+ } else {
+//std::cerr << "*** ERROR: Unable to read file header of file \"" << fqFileName << "\": tried to read " << sizeof(::file_hdr_t) << " bytes; read " << bytesRead << " bytes." << std::endl; // DEBUG
+ }
+ fs.close();
+ } else {
+//std::cerr << "*** ERROR: Unable to open file \"" << fqFileName << "\" for reading" << std::endl; // DEBUG
+ }
+}
+
+bool EmptyFilePool::validateEmptyFile(const std::string& emptyFileName) const {
+ std::ostringstream oss;
+ struct stat s;
+ if (::stat(emptyFileName.c_str(), &s))
+ {
+ oss << "stat: file=\"" << emptyFileName << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JDIR_STAT, oss.str(), "EmptyFilePool", "validateEmptyFile");
+ }
+
+ // Size matches pool
+ efpDataSize_kib_t expectedSize = (QLS_SBLK_SIZE_KIB + efpDataSize_kib_) * 1024;
+ if ((efpDataSize_kib_t)s.st_size != expectedSize) {
+ oss << "ERROR: File " << emptyFileName << ": Incorrect size: Expected=" << expectedSize
+ << "; actual=" << s.st_size;
+ journalLogRef_.log(JournalLog::LOG_ERROR, oss.str());
+ return false;
+ }
+
+ // Open file and read header
+ std::fstream fs(emptyFileName.c_str(), std::fstream::in | std::fstream::out | std::fstream::binary);
+ if (!fs) {
+ oss << "ERROR: File " << emptyFileName << ": Unable to open for reading";
+ journalLogRef_.log(JournalLog::LOG_ERROR, oss.str());
+ return false;
+ }
+ const std::streamsize buffsize = QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES;
+ char buff[buffsize];
+ fs.read((char*)buff, buffsize);
+ std::streampos bytesRead = fs.tellg();
+ if (std::streamoff(bytesRead) != buffsize) {
+ oss << "ERROR: Unable to read file header of file \"" << emptyFileName << "\": tried to read "
+ << buffsize << " bytes; read " << bytesRead << " bytes";
+ journalLogRef_.log(JournalLog::LOG_ERROR, oss.str());
+ fs.close();
+ return false;
+ }
+
+ // Check file header
+ ::file_hdr_t* fhp = (::file_hdr_t*)buff;
+ const bool jrnlMagicError = fhp->_rhdr._magic != QLS_FILE_MAGIC;
+ const bool jrnlVersionError = fhp->_rhdr._version != QLS_JRNL_VERSION;
+ const bool jrnlPartitionError = fhp->_efp_partition != partitionPtr_->getPartitionNumber();
+ const bool jrnlFileSizeError = fhp->_data_size_kib != efpDataSize_kib_;
+ if (jrnlMagicError || jrnlVersionError || jrnlPartitionError || jrnlFileSizeError)
+ {
+ oss << "ERROR: File " << emptyFileName << ": Invalid file header - mismatched header fields: " <<
+ (jrnlMagicError ? "magic " : "") <<
+ (jrnlVersionError ? "version " : "") <<
+ (jrnlPartitionError ? "partition" : "") <<
+ (jrnlFileSizeError ? "file-size" : "");
+ journalLogRef_.log(JournalLog::LOG_ERROR, oss.str());
+ fs.close();
+ return false;
+ }
+
+ // Check file header is reset
+ if (!::is_file_hdr_reset(fhp)) {
+ ::file_hdr_reset(fhp);
+ ::memset(buff + sizeof(::file_hdr_t), 0, MAX_FILE_HDR_LEN - sizeof(::file_hdr_t)); // set rest of buffer to 0
+ fs.seekp(0, std::fstream::beg);
+ fs.write(buff, buffsize);
+ std::streampos bytesWritten = fs.tellp();
+ if (std::streamoff(bytesWritten) != buffsize) {
+ oss << "ERROR: Unable to write file header of file \"" << emptyFileName << "\": tried to write "
+ << buffsize << " bytes; wrote " << bytesWritten << " bytes";
+ journalLogRef_.log(JournalLog::LOG_ERROR, oss.str());
+ fs.close();
+ return false;
+ }
+ oss << "WARNING: File " << emptyFileName << ": File header not reset";
+ journalLogRef_.log(JournalLog::LOG_WARN, oss.str());
+ }
+
+ // Close file
+ fs.close();
+ return true;
+}
+
+//static
+int EmptyFilePool::createSymLink(const std::string& fqFileName,
+ const std::string& fqLinkName) {
+ if(::symlink(fqFileName.c_str(), fqLinkName.c_str())) {
+ if (errno == EEXIST) return errno; // File name exists
+ std::ostringstream oss;
+ oss << "file=\"" << fqFileName << "\" symlink=\"" << fqLinkName << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR__SYMLINK, oss.str(), "EmptyFilePool", "createSymLink");
+ }
+ return 0;
+}
+
+//static
+std::string EmptyFilePool::deleteSymlink(const std::string& fqLinkName) {
+ char buff[1024];
+ ssize_t len = ::readlink(fqLinkName.c_str(), buff, 1024);
+ if (len < 0) {
+ std::ostringstream oss;
+ oss << "symlink=\"" << fqLinkName << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR__SYMLINK, oss.str(), "EmptyFilePool", "deleteSymlink");
+ }
+ ::unlink(fqLinkName.c_str());
+ return std::string(buff, len);
+}
+
+//static
+bool EmptyFilePool::isFile(const std::string& fqName) {
+ struct stat buff;
+ if (::lstat(fqName.c_str(), &buff)) {
+ std::ostringstream oss;
+ oss << "lstat file=\"" << fqName << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_EFP_LSTAT, oss.str(), "EmptyFilePool", "isFile");
+ }
+ return S_ISREG(buff.st_mode);
+}
+
+//static
+bool EmptyFilePool::isSymlink(const std::string& fqName) {
+ struct stat buff;
+ if (::lstat(fqName.c_str(), &buff)) {
+ std::ostringstream oss;
+ oss << "lstat file=\"" << fqName << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_EFP_LSTAT, oss.str(), "EmptyFilePool", "isSymlink");
+ }
+ return S_ISLNK(buff.st_mode);
+
+}
+
+// static
+bool EmptyFilePool::moveFile(const std::string& from,
+ const std::string& to) {
+ if (::rename(from.c_str(), to.c_str())) {
+ if (errno == EEXIST) {
+ return false; // File name exists
+ }
+ std::ostringstream oss;
+ oss << "file=\"" << from << "\" dest=\"" << to << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JDIR_FMOVE, oss.str(), "EmptyFilePool", "returnEmptyFile");
+ }
+ return true;
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePool.h b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePool.h
new file mode 100644
index 0000000000..dc567ff917
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePool.h
@@ -0,0 +1,118 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOL_H_
+#define QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOL_H_
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+ class EmptyFilePool;
+}}}
+
+#include <deque>
+#include "qpid/linearstore/journal/EmptyFilePoolTypes.h"
+#include "qpid/linearstore/journal/smutex.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+class EmptyFilePoolPartition;
+class jdir;
+class JournalFile;
+class JournalLog;
+
+class EmptyFilePool
+{
+protected:
+ typedef std::deque<std::string> emptyFileList_t;
+ typedef emptyFileList_t::const_iterator emptyFileListConstItr_t;
+
+ static std::string s_inuseFileDirectory_;
+ static std::string s_returnedFileDirectory_;
+
+ const std::string efpDirectory_;
+ const efpDataSize_kib_t efpDataSize_kib_;
+ const EmptyFilePoolPartition* partitionPtr_;
+ const bool overwriteBeforeReturnFlag_;
+ const bool truncateFlag_;
+ JournalLog& journalLogRef_;
+
+private:
+ emptyFileList_t emptyFileList_;
+ smutex emptyFileListMutex_;
+
+public:
+ EmptyFilePool(const std::string& efpDirectory,
+ const EmptyFilePoolPartition* partitionPtr,
+ const bool overwriteBeforeReturnFlag,
+ const bool truncateFlag,
+ JournalLog& journalLogRef);
+ virtual ~EmptyFilePool();
+
+ void initialize();
+ efpDataSize_kib_t dataSize_kib() const;
+ efpFileSize_kib_t fileSize_kib() const;
+ efpDataSize_sblks_t dataSize_sblks() const;
+ efpFileSize_sblks_t fileSize_sblks() const;
+ efpFileCount_t numEmptyFiles() const;
+ efpDataSize_kib_t cumFileSize_kib() const;
+ efpPartitionNumber_t getPartitionNumber() const;
+ const EmptyFilePoolPartition* getPartition() const;
+ const efpIdentity_t getIdentity() const;
+
+ std::string takeEmptyFile(const std::string& destDirectory);
+ void returnEmptyFileSymlink(const std::string& emptyFileSymlink);
+
+ static std::string dirNameFromDataSize(const efpDataSize_kib_t efpDataSize_kib);
+ static efpDataSize_kib_t dataSizeFromDirName_kib(const std::string& dirName,
+ const efpPartitionNumber_t partitionNumber);
+
+protected:
+ void checkIosState(std::ofstream& ofs,
+ const uint32_t jerrno,
+ const std::string& fqFileName,
+ const std::string& operation,
+ const std::string& errorMessage,
+ const std::string& className,
+ const std::string& fnName);
+ std::string createEmptyFile();
+ std::string getEfpFileName();
+ void initializeSubDirectory(const std::string& fqDirName);
+ void overwriteFileContents(const std::string& fqFileName);
+ std::string popEmptyFile();
+ void pushEmptyFile(const std::string fqFileName);
+ void returnEmptyFile(const std::string& emptyFileName);
+ void resetEmptyFileHeader(const std::string& fqFileName);
+ bool validateEmptyFile(const std::string& emptyFileName) const;
+
+ static int createSymLink(const std::string& fqFileName,
+ const std::string& fqLinkName);
+ static std::string deleteSymlink(const std::string& fqLinkName);
+ static bool isFile(const std::string& fqName);
+ static bool isSymlink(const std::string& fqName);
+ static bool moveFile(const std::string& fromFqPath,
+ const std::string& toFqPath);
+};
+
+}}}
+
+#endif /* QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOL_H_ */
diff --git a/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolManager.cpp b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolManager.cpp
new file mode 100644
index 0000000000..a02679736e
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolManager.cpp
@@ -0,0 +1,211 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "EmptyFilePoolManager.h"
+
+#include "qpid/linearstore/journal/EmptyFilePool.h"
+#include "qpid/linearstore/journal/EmptyFilePoolPartition.h"
+#include "qpid/linearstore/journal/jdir.h"
+#include "qpid/linearstore/journal/JournalLog.h"
+#include "qpid/linearstore/journal/slock.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+EmptyFilePoolManager::EmptyFilePoolManager(const std::string& qlsStorePath,
+ const efpPartitionNumber_t defaultPartitionNumber,
+ const efpDataSize_kib_t defaultEfpDataSize_kib,
+ const bool overwriteBeforeReturnFlag,
+ const bool truncateFlag,
+ JournalLog& journalLogRef) :
+ qlsStorePath_(qlsStorePath),
+ defaultPartitionNumber_(defaultPartitionNumber),
+ defaultEfpDataSize_kib_(defaultEfpDataSize_kib),
+ overwriteBeforeReturnFlag_(overwriteBeforeReturnFlag),
+ truncateFlag_(truncateFlag),
+ journalLogRef_(journalLogRef)
+{}
+
+EmptyFilePoolManager::~EmptyFilePoolManager() {
+ slock l(partitionMapMutex_);
+ for (partitionMapItr_t i = partitionMap_.begin(); i != partitionMap_.end(); ++i) {
+ delete i->second;
+ }
+ partitionMap_.clear();
+}
+
+void EmptyFilePoolManager::findEfpPartitions() {
+//std::cout << "*** Reading " << qlsStorePath_ << std::endl; // DEBUG
+ bool foundPartition = false;
+ std::vector<std::string> dirList;
+ while (!foundPartition) {
+ jdir::read_dir(qlsStorePath_, dirList, true, false, true, false);
+ for (std::vector<std::string>::iterator i = dirList.begin(); i != dirList.end(); ++i) {
+ efpPartitionNumber_t pn = EmptyFilePoolPartition::getPartitionNumber(*i);
+ if (pn > 0) { // valid partition name found
+ std::string fullDirPath(qlsStorePath_ + "/" + (*i));
+ EmptyFilePoolPartition* efppp = insertPartition(pn, fullDirPath);
+ if (efppp != 0)
+ efppp->findEmptyFilePools();
+ foundPartition = true;
+ }
+ }
+
+ // If no partition was found, create an empty default partition.
+ if (!foundPartition) {
+ std::ostringstream oss1;
+ oss1 << qlsStorePath_ << "/" << EmptyFilePoolPartition::getPartionDirectoryName(defaultPartitionNumber_)
+ << "/" << EmptyFilePoolPartition::s_efpTopLevelDir_
+ << "/" << EmptyFilePool::dirNameFromDataSize(defaultEfpDataSize_kib_);
+ jdir::create_dir(oss1.str());
+ insertPartition(defaultPartitionNumber_, oss1.str());
+ std::ostringstream oss2;
+ oss2 << "No EFP partition found, creating an empty partition at " << oss1.str();
+ journalLogRef_.log(JournalLog::LOG_INFO, oss2.str());
+ }
+ }
+
+ journalLogRef_.log(JournalLog::LOG_INFO, "EFP Manager initialization complete");
+ std::vector<qpid::linearstore::journal::EmptyFilePoolPartition*> partitionList;
+ getEfpPartitions(partitionList);
+ if (partitionList.size() == 0) {
+ journalLogRef_.log(JournalLog::LOG_WARN, "NO EFP PARTITIONS FOUND! No queue creation is possible.");
+ } else {
+ std::stringstream oss;
+ oss << "EFP Partitions found: " << partitionList.size();
+ journalLogRef_.log(JournalLog::LOG_INFO, oss.str());
+ for (std::vector<qpid::linearstore::journal::EmptyFilePoolPartition*>::const_iterator i=partitionList.begin(); i!= partitionList.end(); ++i) {
+ journalLogRef_.log(JournalLog::LOG_INFO, (*i)->toString(5U));
+ }
+ }
+}
+
+void EmptyFilePoolManager::getEfpFileSizes(std::vector<efpDataSize_kib_t>& efpFileSizeList,
+ const efpPartitionNumber_t efpPartitionNumber) const {
+ if (efpPartitionNumber == 0) {
+ for (partitionMapConstItr_t i=partitionMap_.begin(); i!=partitionMap_.end(); ++i) {
+ i->second->getEmptyFilePoolSizes_kib(efpFileSizeList);
+ }
+ } else {
+ partitionMapConstItr_t i = partitionMap_.find(efpPartitionNumber);
+ if (i != partitionMap_.end()) {
+ i->second->getEmptyFilePoolSizes_kib(efpFileSizeList);
+ }
+ }
+}
+
+EmptyFilePoolPartition* EmptyFilePoolManager::getEfpPartition(const efpPartitionNumber_t partitionNumber) {
+ partitionMapItr_t i = partitionMap_.find(partitionNumber);
+ if (i == partitionMap_.end())
+ return 0;
+ else
+ return i->second;
+}
+
+void EmptyFilePoolManager::getEfpPartitionNumbers(std::vector<efpPartitionNumber_t>& partitionNumberList,
+ const efpDataSize_kib_t efpDataSize_kib) const {
+ slock l(partitionMapMutex_);
+ for (partitionMapConstItr_t i=partitionMap_.begin(); i!=partitionMap_.end(); ++i) {
+ if (efpDataSize_kib == 0) {
+ partitionNumberList.push_back(i->first);
+ } else {
+ std::vector<efpDataSize_kib_t> efpFileSizeList;
+ i->second->getEmptyFilePoolSizes_kib(efpFileSizeList);
+ for (std::vector<efpDataSize_kib_t>::iterator j=efpFileSizeList.begin(); j!=efpFileSizeList.end(); ++j) {
+ if (*j == efpDataSize_kib) {
+ partitionNumberList.push_back(i->first);
+ break;
+ }
+ }
+ }
+ }
+}
+
+void EmptyFilePoolManager::getEfpPartitions(std::vector<EmptyFilePoolPartition*>& partitionList,
+ const efpDataSize_kib_t efpDataSize_kib) {
+ slock l(partitionMapMutex_);
+ for (partitionMapConstItr_t i=partitionMap_.begin(); i!=partitionMap_.end(); ++i) {
+ if (efpDataSize_kib == 0) {
+ partitionList.push_back(i->second);
+ } else {
+ std::vector<efpDataSize_kib_t> efpFileSizeList;
+ i->second->getEmptyFilePoolSizes_kib(efpFileSizeList);
+ for (std::vector<efpDataSize_kib_t>::iterator j=efpFileSizeList.begin(); j!=efpFileSizeList.end(); ++j) {
+ if (*j == efpDataSize_kib) {
+ partitionList.push_back(i->second);
+ break;
+ }
+ }
+ }
+ }
+}
+
+EmptyFilePool* EmptyFilePoolManager::getEmptyFilePool(const efpIdentity_t efpIdentity) {
+ return getEmptyFilePool(efpIdentity.pn_, efpIdentity.ds_);
+}
+
+EmptyFilePool* EmptyFilePoolManager::getEmptyFilePool(const efpPartitionNumber_t partitionNumber,
+ const efpDataSize_kib_t efpDataSize_kib) {
+ EmptyFilePoolPartition* efppp = getEfpPartition(partitionNumber > 0 ? partitionNumber : defaultPartitionNumber_);
+ if (efppp == 0) {
+ return 0;
+ }
+ return efppp->getEmptyFilePool(efpDataSize_kib > 0 ? efpDataSize_kib : defaultEfpDataSize_kib_, true);
+}
+
+void EmptyFilePoolManager::getEmptyFilePools(std::vector<EmptyFilePool*>& emptyFilePoolList,
+ const efpPartitionNumber_t efpPartitionNumber) {
+ if (efpPartitionNumber == 0) {
+ for (partitionMapConstItr_t i=partitionMap_.begin(); i!=partitionMap_.end(); ++i) {
+ i->second->getEmptyFilePools(emptyFilePoolList);
+ }
+ } else {
+ partitionMapConstItr_t i = partitionMap_.find(efpPartitionNumber);
+ if (i != partitionMap_.end()) {
+ i->second->getEmptyFilePools(emptyFilePoolList);
+ }
+ }
+}
+
+uint16_t EmptyFilePoolManager::getNumEfpPartitions() const {
+ return partitionMap_.size();
+}
+
+EmptyFilePoolPartition* EmptyFilePoolManager::insertPartition(const efpPartitionNumber_t pn, const std::string& fullPartitionPath) {
+ EmptyFilePoolPartition* efppp = 0;
+ try {
+ efppp = new EmptyFilePoolPartition(pn, fullPartitionPath, overwriteBeforeReturnFlag_, truncateFlag_, journalLogRef_);
+ {
+ slock l(partitionMapMutex_);
+ partitionMap_[pn] = efppp;
+ }
+ } catch (const std::exception& e) {
+ if (efppp != 0) {
+ delete efppp;
+ efppp = 0;
+ }
+//std::cerr << "*** Unable to initialize partition " << pn << " (\'" << fullPartitionPath << "\'): " << e.what() << std::endl; // DEBUG
+ }
+ return efppp;
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolManager.h b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolManager.h
new file mode 100644
index 0000000000..d0aa7fa7d6
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolManager.h
@@ -0,0 +1,83 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_QLS_JRNL_EMPTYFILEPOOLMANAGER_H_
+#define QPID_QLS_JRNL_EMPTYFILEPOOLMANAGER_H_
+
+#include <map>
+#include "qpid/linearstore/journal/EmptyFilePoolTypes.h"
+#include "qpid/linearstore/journal/smutex.h"
+#include <vector>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+class EmptyFilePool;
+class EmptyFilePoolPartition;
+class JournalLog;
+
+class EmptyFilePoolManager
+{
+protected:
+ typedef std::map<efpPartitionNumber_t, EmptyFilePoolPartition*> partitionMap_t;
+ typedef partitionMap_t::iterator partitionMapItr_t;
+ typedef partitionMap_t::const_iterator partitionMapConstItr_t;
+
+ const std::string qlsStorePath_;
+ const efpPartitionNumber_t defaultPartitionNumber_;
+ const efpDataSize_kib_t defaultEfpDataSize_kib_;
+ const bool overwriteBeforeReturnFlag_;
+ const bool truncateFlag_;
+ JournalLog& journalLogRef_;
+ partitionMap_t partitionMap_;
+ smutex partitionMapMutex_;
+
+public:
+ EmptyFilePoolManager(const std::string& qlsStorePath_,
+ const efpPartitionNumber_t defaultPartitionNumber,
+ const efpDataSize_kib_t defaultEfpDataSize_kib,
+ const bool overwriteBeforeReturnFlag,
+ const bool truncateFlag,
+ JournalLog& journalLogRef_);
+ virtual ~EmptyFilePoolManager();
+
+ void findEfpPartitions();
+ void getEfpFileSizes(std::vector<efpDataSize_kib_t>& efpFileSizeList,
+ const efpPartitionNumber_t efpPartitionNumber = 0) const;
+ EmptyFilePoolPartition* getEfpPartition(const efpPartitionNumber_t partitionNumber);
+ void getEfpPartitionNumbers(std::vector<efpPartitionNumber_t>& partitionNumberList,
+ const efpDataSize_kib_t efpDataSize_kib = 0) const;
+ void getEfpPartitions(std::vector<EmptyFilePoolPartition*>& partitionList,
+ const efpDataSize_kib_t efpDataSize_kib = 0);
+ EmptyFilePool* getEmptyFilePool(const efpIdentity_t efpIdentity);
+ EmptyFilePool* getEmptyFilePool(const efpPartitionNumber_t partitionNumber,
+ const efpDataSize_kib_t efpDataSize_kib);
+ void getEmptyFilePools(std::vector<EmptyFilePool*>& emptyFilePoolList,
+ const efpPartitionNumber_t efpPartitionNumber = 0);
+ uint16_t getNumEfpPartitions() const;
+protected:
+ EmptyFilePoolPartition* insertPartition(const efpPartitionNumber_t pn, const std::string& fullPartitionPath);
+};
+
+}}}
+
+#endif /* QPID_QLS_JRNL_EMPTYFILEPOOLMANAGER_H_ */
diff --git a/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolPartition.cpp b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolPartition.cpp
new file mode 100644
index 0000000000..12d2db74b8
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolPartition.cpp
@@ -0,0 +1,199 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/EmptyFilePoolPartition.h"
+
+#include <iomanip>
+#include "qpid/linearstore/journal/EmptyFilePool.h"
+#include "qpid/linearstore/journal/jdir.h"
+#include "qpid/linearstore/journal/JournalLog.h"
+#include "qpid/linearstore/journal/slock.h"
+#include <unistd.h>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+// static
+const std::string EmptyFilePoolPartition::s_efpTopLevelDir_("efp"); // Sets the top-level efp dir within a partition
+
+EmptyFilePoolPartition::EmptyFilePoolPartition(const efpPartitionNumber_t partitionNum,
+ const std::string& partitionDir,
+ const bool overwriteBeforeReturnFlag,
+ const bool truncateFlag,
+ JournalLog& journalLogRef) :
+ partitionNum_(partitionNum),
+ partitionDir_(partitionDir),
+ overwriteBeforeReturnFlag_(overwriteBeforeReturnFlag),
+ truncateFlag_(truncateFlag),
+ journalLogRef_(journalLogRef)
+{
+ validatePartitionDir();
+}
+
+EmptyFilePoolPartition::~EmptyFilePoolPartition() {
+ slock l(efpMapMutex_);
+ for (efpMapItr_t i = efpMap_.begin(); i != efpMap_.end(); ++i) {
+ delete i->second;
+ }
+ efpMap_.clear();
+}
+
+void
+EmptyFilePoolPartition::findEmptyFilePools() {
+//std::cout << "*** EmptyFilePoolPartition::findEmptyFilePools(): Reading " << partitionDir_ << std::endl; // DEBUG
+ std::string efpDir(partitionDir_ + "/" + s_efpTopLevelDir_);
+ if (jdir::is_dir(efpDir)) {
+ std::vector<std::string> dirList;
+ jdir::read_dir(efpDir, dirList, true, false, false, true);
+ for (std::vector<std::string>::iterator i = dirList.begin(); i != dirList.end(); ++i) {
+ createEmptyFilePool(*i);
+ }
+ } else {
+ std::ostringstream oss;
+ oss << "Partition \"" << partitionDir_ << "\" does not contain top level EFP dir \"" << s_efpTopLevelDir_ << "\"";
+ journalLogRef_.log(JournalLog::LOG_WARN, oss.str());
+ }
+}
+
+EmptyFilePool* EmptyFilePoolPartition::getEmptyFilePool(const efpDataSize_kib_t efpDataSize_kib, const bool createIfNonExistent) {
+ {
+ slock l(efpMapMutex_);
+ efpMapItr_t i = efpMap_.find(efpDataSize_kib);
+ if (i != efpMap_.end())
+ return i->second;
+ }
+ if (createIfNonExistent) {
+ return createEmptyFilePool(efpDataSize_kib);
+ }
+ return 0;
+}
+
+void EmptyFilePoolPartition::getEmptyFilePools(std::vector<EmptyFilePool*>& efpList) {
+ slock l(efpMapMutex_);
+ for (efpMapItr_t i=efpMap_.begin(); i!=efpMap_.end(); ++i) {
+ efpList.push_back(i->second);
+ }
+}
+
+void EmptyFilePoolPartition::getEmptyFilePoolSizes_kib(std::vector<efpDataSize_kib_t>& efpDataSizesList_kib) const {
+ slock l(efpMapMutex_);
+ for (efpMapConstItr_t i=efpMap_.begin(); i!=efpMap_.end(); ++i) {
+ efpDataSizesList_kib.push_back(i->first);
+ }
+}
+
+std::string EmptyFilePoolPartition::getPartitionDirectory() const {
+ return partitionDir_;
+}
+
+efpPartitionNumber_t EmptyFilePoolPartition::getPartitionNumber() const {
+ return partitionNum_;
+}
+
+std::string EmptyFilePoolPartition::toString(const uint16_t indent) const {
+ std::string indentStr(indent, ' ');
+ std::stringstream oss;
+ oss << "EFP Partition " << partitionNum_ << ":" << std::endl;
+ oss << indentStr << "EFP Partition Analysis (partition " << partitionNum_ << " at \"" << partitionDir_ << "\"):" << std::endl;
+ if (efpMap_.empty()) {
+ oss << indentStr << "<Partition empty, no EFPs found>" << std::endl;
+ } else {
+ oss << indentStr << std::setw(12) << "efp_size_kib"
+ << std::setw(12) << "num_files"
+ << std::setw(18) << "tot_capacity_kib" << std::endl;
+ oss << indentStr << std::setw(12) << "------------"
+ << std::setw(12) << "----------"
+ << std::setw(18) << "----------------" << std::endl;
+ {
+ slock l(efpMapMutex_);
+ for (efpMapConstItr_t i=efpMap_.begin(); i!= efpMap_.end(); ++i) {
+ oss << indentStr << std::setw(12) << i->first
+ << std::setw(12) << i->second->numEmptyFiles()
+ << std::setw(18) << i->second->cumFileSize_kib() << std::endl;
+ }
+ }
+ }
+ return oss.str();
+}
+
+// static
+std::string EmptyFilePoolPartition::getPartionDirectoryName(const efpPartitionNumber_t partitionNumber) {
+ std::ostringstream oss;
+ oss << "p" << std::setfill('0') << std::setw(3) << partitionNumber;
+ return oss.str();
+}
+
+//static
+efpPartitionNumber_t EmptyFilePoolPartition::getPartitionNumber(const std::string& name) {
+ if (name.length() == 4 && name[0] == 'p' && ::isdigit(name[1]) && ::isdigit(name[2]) && ::isdigit(name[3])) {
+ long pn = ::strtol(name.c_str() + 1, 0, 10);
+ if (pn == 0 && errno) {
+ return 0;
+ } else {
+ return (efpPartitionNumber_t)pn;
+ }
+ }
+ return 0;
+}
+
+// --- protected functions ---
+
+EmptyFilePool* EmptyFilePoolPartition::createEmptyFilePool(const efpDataSize_kib_t efpDataSize_kib) {
+ std::string fqEfpDirectoryName(partitionDir_ + "/" + EmptyFilePoolPartition::s_efpTopLevelDir_ + "/" + EmptyFilePool::dirNameFromDataSize(efpDataSize_kib));
+ return createEmptyFilePool(fqEfpDirectoryName);
+}
+
+EmptyFilePool* EmptyFilePoolPartition::createEmptyFilePool(const std::string fqEfpDirectoryName) {
+ EmptyFilePool* efpp = 0;
+ try {
+ efpp = new EmptyFilePool(fqEfpDirectoryName, this, overwriteBeforeReturnFlag_, truncateFlag_, journalLogRef_);
+ {
+ slock l(efpMapMutex_);
+ efpMap_[efpp->dataSize_kib()] = efpp;
+ }
+ }
+ catch (const std::exception& e) {
+ if (efpp != 0) {
+ delete efpp;
+ efpp = 0;
+ }
+ std::ostringstream oss;
+ oss << "EmptyFilePool create failed: " << e.what();
+ journalLogRef_.log(JournalLog::LOG_WARN, oss.str());
+ }
+ if (efpp != 0) {
+ efpp->initialize();
+ }
+ return efpp;
+}
+
+void EmptyFilePoolPartition::validatePartitionDir() {
+ std::ostringstream ss;
+ if (!jdir::is_dir(partitionDir_)) {
+ ss << "Invalid partition directory: \'" << partitionDir_ << "\' is not a directory";
+ throw jexception(jerrno::JERR_EFP_BADPARTITIONDIR, ss.str(), "EmptyFilePoolPartition", "validatePartitionDir");
+ }
+
+ // TODO: other validity checks here
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolPartition.h b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolPartition.h
new file mode 100644
index 0000000000..570e2b073f
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolPartition.h
@@ -0,0 +1,82 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOLPARTITION_H_
+#define QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOLPARTITION_H_
+
+#include <map>
+#include "qpid/linearstore/journal/EmptyFilePoolTypes.h"
+#include "qpid/linearstore/journal/smutex.h"
+#include <string>
+#include <vector>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+class EmptyFilePool;
+class JournalLog;
+
+class EmptyFilePoolPartition
+{
+public:
+ static const std::string s_efpTopLevelDir_;
+protected:
+ typedef std::map<efpDataSize_kib_t, EmptyFilePool*> efpMap_t;
+ typedef efpMap_t::iterator efpMapItr_t;
+ typedef efpMap_t::const_iterator efpMapConstItr_t;
+
+ const efpPartitionNumber_t partitionNum_;
+ const std::string partitionDir_;
+ const bool overwriteBeforeReturnFlag_;
+ const bool truncateFlag_;
+ JournalLog& journalLogRef_;
+ efpMap_t efpMap_;
+ smutex efpMapMutex_;
+
+public:
+ EmptyFilePoolPartition(const efpPartitionNumber_t partitionNum,
+ const std::string& partitionDir,
+ const bool overwriteBeforeReturnFlag,
+ const bool truncateFlag,
+ JournalLog& journalLogRef);
+ virtual ~EmptyFilePoolPartition();
+
+ void findEmptyFilePools();
+ EmptyFilePool* getEmptyFilePool(const efpDataSize_kib_t efpDataSize_kib, const bool createIfNonExistent);
+ void getEmptyFilePools(std::vector<EmptyFilePool*>& efpList);
+ void getEmptyFilePoolSizes_kib(std::vector<efpDataSize_kib_t>& efpDataSizesList) const;
+ std::string getPartitionDirectory() const;
+ efpPartitionNumber_t getPartitionNumber() const;
+ std::string toString(const uint16_t indent) const;
+
+ static std::string getPartionDirectoryName(const efpPartitionNumber_t partitionNumber);
+ static efpPartitionNumber_t getPartitionNumber(const std::string& name);
+
+protected:
+ EmptyFilePool* createEmptyFilePool(const efpDataSize_kib_t efpDataSize_kib);
+ EmptyFilePool* createEmptyFilePool(const std::string fqEfpDirectoryName);
+ void validatePartitionDir();
+};
+
+}}}
+
+#endif /* QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOLPARTITION_H_ */
diff --git a/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolTypes.h b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolTypes.h
new file mode 100644
index 0000000000..4cae4e6538
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolTypes.h
@@ -0,0 +1,57 @@
+ /*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOLTYPES_H_
+#define QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOLTYPES_H_
+
+#include <iostream>
+#include <sstream>
+#include <stdint.h>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+typedef uint64_t efpDataSize_kib_t; ///< Size of data part of file (excluding file header) in kib
+typedef uint64_t efpFileSize_kib_t; ///< Size of file (header + data) in kib
+typedef uint32_t efpDataSize_sblks_t; ///< Size of data part of file (excluding file header) in sblks
+typedef uint32_t efpFileSize_sblks_t; ///< Size of file (header + data) in sblks
+typedef uint32_t efpFileCount_t; ///< Number of files in a partition or pool
+typedef uint16_t efpPartitionNumber_t; ///< Number assigned to a partition
+
+typedef struct efpIdentity_t {
+ efpPartitionNumber_t pn_;
+ efpDataSize_kib_t ds_;
+ efpIdentity_t() : pn_(0), ds_(0) {}
+ efpIdentity_t(efpPartitionNumber_t pn, efpDataSize_kib_t ds) : pn_(pn), ds_(ds) {}
+ efpIdentity_t(const efpIdentity_t& ei) : pn_(ei.pn_), ds_(ei.ds_) {}
+ friend std::ostream& operator<<(std::ostream& os, const efpIdentity_t& id) {
+ // This two-stage write allows this << operator to be used with std::setw() for formatted writes
+ std::ostringstream oss;
+ oss << id.pn_ << "," << id.ds_;
+ os << oss.str();
+ return os;
+ }
+} efpIdentity_t;
+
+}}}
+
+#endif /* QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOLTYPES_H_ */
diff --git a/qpid/cpp/src/qpid/linearstore/journal/JournalFile.cpp b/qpid/cpp/src/qpid/linearstore/journal/JournalFile.cpp
new file mode 100644
index 0000000000..ed03a8413f
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/JournalFile.cpp
@@ -0,0 +1,349 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/JournalFile.h"
+
+#include <fcntl.h>
+#include "qpid/linearstore/journal/jcfg.h"
+#include "qpid/linearstore/journal/pmgr.h"
+#include "qpid/linearstore/journal/utils/file_hdr.h"
+#include <unistd.h>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+JournalFile::JournalFile(const std::string& fqFileName,
+ const efpIdentity_t& efpIdentity,
+ const uint64_t fileSeqNum,
+ const std::string queueName) :
+ efpIdentity_(efpIdentity),
+ fqFileName_(fqFileName),
+ fileSeqNum_(fileSeqNum),
+ queueName_(queueName),
+ serial_(getRandom64()),
+ firstRecordOffset_(0ULL),
+ fileHandle_(-1),
+ fileCloseFlag_(false),
+ fileHeaderBasePtr_ (0),
+ fileHeaderPtr_(0),
+ aioControlBlockPtr_(0),
+ fileSize_dblks_(((efpIdentity.ds_ * 1024) + (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES)) / QLS_DBLK_SIZE_BYTES),
+ initializedFlag_(false),
+ enqueuedRecordCount_("JournalFile::enqueuedRecordCount", 0),
+ submittedDblkCount_("JournalFile::submittedDblkCount", 0),
+ completedDblkCount_("JournalFile::completedDblkCount", 0),
+ outstandingAioOpsCount_("JournalFile::outstandingAioOpsCount", 0)
+{}
+
+JournalFile::JournalFile(const std::string& fqFileName,
+ const ::file_hdr_t& fileHeader,
+ const std::string queueName) :
+ efpIdentity_(fileHeader._efp_partition, fileHeader._data_size_kib),
+ fqFileName_(fqFileName),
+ fileSeqNum_(fileHeader._file_number),
+ queueName_(queueName),
+ serial_(fileHeader._rhdr._serial),
+ firstRecordOffset_(fileHeader._fro),
+ fileHandle_(-1),
+ fileCloseFlag_(false),
+ fileHeaderBasePtr_ (0),
+ fileHeaderPtr_(0),
+ aioControlBlockPtr_(0),
+ fileSize_dblks_(((fileHeader._data_size_kib * 1024) + (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES)) / QLS_DBLK_SIZE_BYTES),
+ initializedFlag_(false),
+ enqueuedRecordCount_("JournalFile::enqueuedRecordCount", 0),
+ submittedDblkCount_("JournalFile::submittedDblkCount", 0),
+ completedDblkCount_("JournalFile::completedDblkCount", 0),
+ outstandingAioOpsCount_("JournalFile::outstandingAioOpsCount", 0)
+{}
+
+JournalFile::~JournalFile() {
+ finalize();
+}
+
+void
+JournalFile::initialize(const uint32_t completedDblkCount) {
+ if (!initializedFlag_) {
+ if (::posix_memalign(&fileHeaderBasePtr_, QLS_AIO_ALIGN_BOUNDARY_BYTES, QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_KIB * 1024))
+ {
+ std::ostringstream oss;
+ oss << "posix_memalign(): blksize=" << QLS_AIO_ALIGN_BOUNDARY_BYTES << " size=" << (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_KIB * 1024);
+ oss << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR__MALLOC, oss.str(), "JournalFile", "initialize");
+ }
+ fileHeaderPtr_ = (::file_hdr_t*)fileHeaderBasePtr_;
+ aioControlBlockPtr_ = new aio_cb;
+ initializedFlag_ = true;
+ }
+ if (completedDblkCount > 0UL) {
+ submittedDblkCount_.set(completedDblkCount);
+ completedDblkCount_.set(completedDblkCount);
+ }
+}
+
+void
+JournalFile::finalize() {
+ if (fileHeaderBasePtr_ != 0) {
+ std::free(fileHeaderBasePtr_);
+ fileHeaderBasePtr_ = 0;
+ fileHeaderPtr_ = 0;
+ }
+ if (aioControlBlockPtr_ != 0) {
+ delete(aioControlBlockPtr_);
+ aioControlBlockPtr_ = 0;
+ }
+}
+
+const std::string JournalFile::getFqFileName() const {
+ return fqFileName_;
+}
+
+uint64_t JournalFile::getFileSeqNum() const {
+ return fileSeqNum_;
+}
+
+uint64_t JournalFile::getSerial() const {
+ return serial_;
+}
+
+int JournalFile::open() {
+ fileHandle_ = ::open(fqFileName_.c_str(), O_WRONLY | O_DIRECT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); // 0644 -rw-r--r--
+ if (fileHandle_ < 0) {
+ std::ostringstream oss;
+ oss << "file=\"" << fqFileName_ << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JNLF_OPEN, oss.str(), "JournalFile", "open");
+ }
+ return fileHandle_;
+}
+
+void JournalFile::close() {
+ if (fileHandle_ >= 0) {
+ if (getOutstandingAioDblks()) {
+ fileCloseFlag_ = true; // Close later when all outstanding AIOs have returned
+ } else {
+ int res = ::close(fileHandle_);
+ fileHandle_ = -1;
+ if (res != 0) {
+ std::ostringstream oss;
+ oss << "file=\"" << fqFileName_ << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JNLF_CLOSE, oss.str(), "JournalFile", "open");
+ }
+ }
+ }
+}
+
+void JournalFile::asyncFileHeaderWrite(io_context_t ioContextPtr,
+ const efpPartitionNumber_t efpPartitionNumber,
+ const efpDataSize_kib_t efpDataSize_kib,
+ const uint16_t userFlags,
+ const uint64_t recordId,
+ const uint64_t firstRecordOffset) {
+ firstRecordOffset_ = firstRecordOffset;
+ ::file_hdr_create(fileHeaderPtr_, QLS_FILE_MAGIC, QLS_JRNL_VERSION, QLS_JRNL_FHDR_RES_SIZE_SBLKS, efpPartitionNumber, efpDataSize_kib);
+ ::file_hdr_init(fileHeaderBasePtr_,
+ QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_KIB * 1024,
+ userFlags,
+ serial_,
+ recordId,
+ firstRecordOffset,
+ fileSeqNum_,
+ queueName_.size(),
+ queueName_.data());
+ const std::size_t wr_size = QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_KIB * 1024;
+ if (!isOpen()) open();
+ aio::prep_pwrite(aioControlBlockPtr_, fileHandle_, (void*)fileHeaderBasePtr_, wr_size, 0UL);
+ if (!aio::is_aligned(aioControlBlockPtr_->u.c.buf, QLS_AIO_ALIGN_BOUNDARY_BYTES)) {
+ std::ostringstream oss;
+ oss << "AIO operation on misaligned buffer: iocb->u.c.buf=" << aioControlBlockPtr_->u.c.buf << std::endl;
+ throw jexception(jerrno::JERR__AIO, oss.str(), "JournalFile", "asyncFileHeaderWrite");
+ }
+ if (aio::submit(ioContextPtr, 1, &aioControlBlockPtr_) < 0) {
+ std::ostringstream oss;
+ oss << "queue=\"" << queueName_ << "\" fid=0x" << std::hex << fileSeqNum_ << " wr_size=0x" << wr_size << " foffs=0x0";
+ throw jexception(jerrno::JERR__AIO, oss.str(), "JournalFile", "asyncFileHeaderWrite");
+ }
+ addSubmittedDblkCount(QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_DBLKS);
+ incrOutstandingAioOperationCount();
+}
+
+void JournalFile::asyncPageWrite(io_context_t ioContextPtr,
+ aio_cb* aioControlBlockPtr,
+ void* data,
+ uint32_t dataSize_dblks) {
+ const std::size_t wr_size = dataSize_dblks * QLS_DBLK_SIZE_BYTES;
+ const uint64_t foffs = submittedDblkCount_.get() * QLS_DBLK_SIZE_BYTES;
+ if (!isOpen()) open();
+ aio::prep_pwrite_2(aioControlBlockPtr, fileHandle_, data, wr_size, foffs);
+ if (!aio::is_aligned(aioControlBlockPtr->u.c.buf, QLS_AIO_ALIGN_BOUNDARY_BYTES)) {
+ std::ostringstream oss;
+ oss << "AIO operation on misaligned buffer: iocb->u.c.buf=" << aioControlBlockPtr->u.c.buf << std::endl;
+ throw jexception(jerrno::JERR__AIO, oss.str(), "JournalFile", "asyncPageWrite");
+ }
+ pmgr::page_cb* pcbp = (pmgr::page_cb*)(aioControlBlockPtr->data); // This page's control block (pcb)
+ pcbp->_wdblks = dataSize_dblks;
+ pcbp->_jfp = this;
+ if (aio::submit(ioContextPtr, 1, &aioControlBlockPtr) < 0) {
+ std::ostringstream oss;
+ oss << "queue=\"" << queueName_ << "\" fid=0x" << std::hex << fileSeqNum_ << " wr_size=0x" << wr_size << " foffs=0x" << foffs;
+ throw jexception(jerrno::JERR__AIO, oss.str(), "JournalFile", "asyncPageWrite");
+ }
+ addSubmittedDblkCount(dataSize_dblks);
+ incrOutstandingAioOperationCount();
+}
+
+uint32_t JournalFile::getEnqueuedRecordCount() const {
+ return enqueuedRecordCount_.get();
+}
+
+uint32_t JournalFile::incrEnqueuedRecordCount() {
+ return enqueuedRecordCount_.increment();
+}
+
+uint32_t JournalFile::decrEnqueuedRecordCount() {
+ return enqueuedRecordCount_.decrementLimit();
+}
+
+uint32_t JournalFile::addCompletedDblkCount(const uint32_t a) {
+ return completedDblkCount_.addLimit(a, submittedDblkCount_.get(), jerrno::JERR_JNLF_CMPLOFFSOVFL);
+}
+
+uint16_t JournalFile::getOutstandingAioOperationCount() const {
+ return outstandingAioOpsCount_.get();
+}
+
+uint16_t JournalFile::decrOutstandingAioOperationCount() {
+ uint16_t r = outstandingAioOpsCount_.decrementLimit();
+ if (fileCloseFlag_ && outstandingAioOpsCount_ == 0) { // Delayed close
+ close();
+ }
+ return r;
+}
+
+efpIdentity_t JournalFile::getEfpIdentity() const {
+ return efpIdentity_;
+}
+
+uint64_t JournalFile::getFirstRecordOffset() const {
+ return firstRecordOffset_;
+}
+
+void JournalFile::setFirstRecordOffset(const uint64_t firstRecordOffset) {
+ firstRecordOffset_ = firstRecordOffset;
+}
+
+// --- Status helper functions ---
+
+bool JournalFile::isEmpty() const {
+ return submittedDblkCount_ == 0;
+}
+
+bool JournalFile::isNoEnqueuedRecordsRemaining() const {
+ return /*!enqueueStarted_ &&*/ // Not part-way through encoding an enqueue
+ isFullAndComplete() && // Full with all AIO returned
+ enqueuedRecordCount_ == 0; // No remaining enqueued records
+}
+
+// debug aid
+const std::string JournalFile::status_str(const uint8_t indentDepth) const {
+ std::string indent((size_t)indentDepth, '.');
+ std::ostringstream oss;
+ oss << indent << "JournalFile: fileName=" << getFileName() << std::endl;
+ oss << indent << " directory=" << getDirectory() << std::endl;
+ oss << indent << " fileSizeDblks=" << fileSize_dblks_ << std::endl;
+ oss << indent << " open=" << (isOpen() ? "T" : "F") << std::endl;
+ oss << indent << " fileHandle=" << fileHandle_ << std::endl;
+ oss << indent << " enqueuedRecordCount=" << getEnqueuedRecordCount() << std::endl;
+ oss << indent << " submittedDblkCount=" << getSubmittedDblkCount() << std::endl;
+ oss << indent << " completedDblkCount=" << getCompletedDblkCount() << std::endl;
+ oss << indent << " outstandingAioOpsCount=" << getOutstandingAioOperationCount() << std::endl;
+ oss << indent << " isEmpty()=" << (isEmpty() ? "T" : "F") << std::endl;
+ oss << indent << " isDataEmpty()=" << (isDataEmpty() ? "T" : "F") << std::endl;
+ oss << indent << " dblksRemaining()=" << dblksRemaining() << std::endl;
+ oss << indent << " isFull()=" << (isFull() ? "T" : "F") << std::endl;
+ oss << indent << " isFullAndComplete()=" << (isFullAndComplete() ? "T" : "F") << std::endl;
+ oss << indent << " getOutstandingAioDblks()=" << getOutstandingAioDblks() << std::endl;
+ oss << indent << " getNextFile()=" << (getNextFile() ? "T" : "F") << std::endl;
+ return oss.str();
+}
+
+// --- protected functions ---
+
+const std::string JournalFile::getDirectory() const {
+ return fqFileName_.substr(0, fqFileName_.rfind('/'));
+}
+
+const std::string JournalFile::getFileName() const {
+ return fqFileName_.substr(fqFileName_.rfind('/')+1);
+}
+
+//static
+uint64_t JournalFile::getRandom64() {
+ // TODO: ::rand() is not thread safe, either lock or use rand_r(seed) with a thread-local seed.
+ return ((uint64_t)::rand() << QLS_RAND_SHIFT1) | ((uint64_t)::rand() << QLS_RAND_SHIFT2) | (::rand() & QLS_RAND_MASK);
+}
+
+bool JournalFile::isOpen() const {
+ return fileHandle_ >= 0;
+}
+
+uint32_t JournalFile::getSubmittedDblkCount() const {
+ return submittedDblkCount_.get();
+}
+
+uint32_t JournalFile::addSubmittedDblkCount(const uint32_t a) {
+ return submittedDblkCount_.addLimit(a, fileSize_dblks_, jerrno::JERR_JNLF_FILEOFFSOVFL);
+}
+
+uint32_t JournalFile::getCompletedDblkCount() const {
+ return completedDblkCount_.get();
+}
+
+uint16_t JournalFile::incrOutstandingAioOperationCount() {
+ return outstandingAioOpsCount_.increment();
+}
+
+u_int32_t JournalFile::dblksRemaining() const {
+ return fileSize_dblks_ - submittedDblkCount_;
+}
+
+bool JournalFile::getNextFile() const {
+ return isFull();
+}
+
+u_int32_t JournalFile::getOutstandingAioDblks() const {
+ return submittedDblkCount_ - completedDblkCount_;
+}
+
+bool JournalFile::isDataEmpty() const {
+ return submittedDblkCount_ <= QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_DBLKS;
+}
+
+bool JournalFile::isFull() const {
+ return submittedDblkCount_ == fileSize_dblks_;
+}
+
+bool JournalFile::isFullAndComplete() const {
+ return completedDblkCount_ == fileSize_dblks_;
+}
+
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/JournalFile.h b/qpid/cpp/src/qpid/linearstore/journal/JournalFile.h
new file mode 100644
index 0000000000..e33830ef7f
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/JournalFile.h
@@ -0,0 +1,132 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_JOURNALFILE_H_
+#define QPID_LINEARSTORE_JOURNAL_JOURNALFILE_H_
+
+#include "qpid/linearstore/journal/aio.h"
+#include "qpid/linearstore/journal/AtomicCounter.h"
+#include "qpid/linearstore/journal/EmptyFilePoolTypes.h"
+
+class file_hdr_t;
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+class JournalFile
+{
+protected:
+ const efpIdentity_t efpIdentity_;
+ const std::string fqFileName_;
+ const uint64_t fileSeqNum_;
+ const std::string queueName_;
+ const uint64_t serial_;
+ uint64_t firstRecordOffset_;
+ int fileHandle_;
+ bool fileCloseFlag_;
+ void* fileHeaderBasePtr_;
+ ::file_hdr_t* fileHeaderPtr_;
+ aio_cb* aioControlBlockPtr_;
+ uint32_t fileSize_dblks_; ///< File size in data blocks, including file header
+ bool initializedFlag_;
+
+ AtomicCounter<uint32_t> enqueuedRecordCount_; ///< Count of enqueued records
+ AtomicCounter<uint32_t> submittedDblkCount_; ///< Write file count (data blocks) for submitted AIO
+ AtomicCounter<uint32_t> completedDblkCount_; ///< Write file count (data blocks) for completed AIO
+ AtomicCounter<uint16_t> outstandingAioOpsCount_; ///< Outstanding AIO operations on this file
+
+public:
+ // Constructor for creating new file with known fileSeqNum and random serial
+ JournalFile(const std::string& fqFileName,
+ const efpIdentity_t& efpIdentity,
+ const uint64_t fileSeqNum,
+ const std::string queueName);
+ // Constructor for recovery in which fileSeqNum and serial are recovered from fileHeader param
+ JournalFile(const std::string& fqFileName,
+ const ::file_hdr_t& fileHeader,
+ const std::string queueName);
+ virtual ~JournalFile();
+
+ void initialize(const uint32_t completedDblkCount);
+ void finalize();
+
+ const std::string getFqFileName() const;
+ uint64_t getFileSeqNum() const;
+ uint64_t getSerial() const;
+
+ int open();
+ void close();
+ void asyncFileHeaderWrite(io_context_t ioContextPtr,
+ const efpPartitionNumber_t efpPartitionNumber,
+ const efpDataSize_kib_t efpDataSize_kib,
+ const uint16_t userFlags,
+ const uint64_t recordId,
+ const uint64_t firstRecordOffset);
+ void asyncPageWrite(io_context_t ioContextPtr,
+ aio_cb* aioControlBlockPtr,
+ void* data,
+ uint32_t dataSize_dblks);
+
+ uint32_t getSubmittedDblkCount() const;
+ uint32_t getEnqueuedRecordCount() const;
+ uint32_t incrEnqueuedRecordCount();
+ uint32_t decrEnqueuedRecordCount();
+
+ uint32_t addCompletedDblkCount(const uint32_t a);
+
+ uint16_t getOutstandingAioOperationCount() const;
+ uint16_t decrOutstandingAioOperationCount();
+
+ efpIdentity_t getEfpIdentity() const;
+ uint64_t getFirstRecordOffset() const;
+ void setFirstRecordOffset(const uint64_t firstRecordOffset);
+
+ // Status helper functions
+ bool isEmpty() const; ///< True if no writes of any kind have occurred
+ bool isNoEnqueuedRecordsRemaining() const; ///< True when all enqueued records (or parts) have been dequeued
+
+ // debug aid
+ const std::string status_str(const uint8_t indentDepth) const;
+
+protected:
+ const std::string getDirectory() const;
+ const std::string getFileName() const;
+ static uint64_t getRandom64();
+ bool isOpen() const;
+
+ uint32_t addSubmittedDblkCount(const uint32_t a);
+
+ uint32_t getCompletedDblkCount() const;
+
+ uint16_t incrOutstandingAioOperationCount();
+
+ u_int32_t dblksRemaining() const; ///< Dblks remaining until full
+ bool getNextFile() const; ///< True when next file is needed
+ u_int32_t getOutstandingAioDblks() const; ///< Dblks still to be written
+ bool isDataEmpty() const; ///< True if only file header written, data is still empty
+ bool isFull() const; ///< True if all possible dblks have been submitted (but may not yet have returned from AIO)
+ bool isFullAndComplete() const; ///< True if all submitted dblks have returned from AIO
+};
+
+}}}
+
+#endif // QPID_LINEARSTORE_JOURNAL_JOURNALFILE_H_
diff --git a/qpid/cpp/src/qpid/linearstore/journal/JournalLog.cpp b/qpid/cpp/src/qpid/linearstore/journal/JournalLog.cpp
new file mode 100644
index 0000000000..c35ec97e91
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/JournalLog.cpp
@@ -0,0 +1,63 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/JournalLog.h"
+
+#include <iostream>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+JournalLog::JournalLog(log_level_t logLevelThreshold) : logLevelThreshold_(logLevelThreshold) {}
+
+JournalLog::~JournalLog() {}
+
+void JournalLog::log(const log_level_t logLevel,
+ const std::string& logStatement) const {
+ if (logLevel >= logLevelThreshold_) {
+ std::cerr << log_level_str(logLevel) << ": " << logStatement << std::endl;
+ }
+}
+
+void JournalLog::log(log_level_t logLevel,
+ const std::string& journalId,
+ const std::string& logStatement) const {
+ if (logLevel >= logLevelThreshold_) {
+ std::cerr << log_level_str(logLevel) << ": Journal \"" << journalId << "\": " << logStatement << std::endl;
+ }
+}
+
+const char* JournalLog::log_level_str(log_level_t logLevel) {
+ switch (logLevel)
+ {
+ case LOG_TRACE: return "TRACE";
+ case LOG_DEBUG: return "DEBUG";
+ case LOG_INFO: return "INFO";
+ case LOG_NOTICE: return "NOTICE";
+ case LOG_WARN: return "WARN";
+ case LOG_ERROR: return "ERROR";
+ case LOG_CRITICAL: return "CRITICAL";
+ }
+ return "<log level unknown>";
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/JournalLog.h b/qpid/cpp/src/qpid/linearstore/journal/JournalLog.h
new file mode 100644
index 0000000000..cf503cb9d2
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/JournalLog.h
@@ -0,0 +1,60 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_JOURNALLOG_H_
+#define QPID_LINEARSTORE_JOURNAL_JOURNALLOG_H_
+
+#include <string>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+class JournalLog
+{
+public:
+ typedef enum _log_level {
+ LOG_TRACE = 0,
+ LOG_DEBUG,
+ LOG_INFO,
+ LOG_NOTICE,
+ LOG_WARN,
+ LOG_ERROR,
+ LOG_CRITICAL
+ } log_level_t;
+
+protected:
+ const log_level_t logLevelThreshold_;
+
+public:
+ JournalLog(log_level_t logLevelThreshold);
+ virtual ~JournalLog();
+ virtual void log(const log_level_t logLevel,
+ const std::string& logStatement) const;
+ virtual void log(const log_level_t logLevel,
+ const std::string& journalId,
+ const std::string& logStatement) const;
+ static const char* log_level_str(const log_level_t logLevel);
+};
+
+}}}
+
+#endif // QPID_LINEARSTORE_JOURNAL_JOURNALLOG_H_
diff --git a/qpid/cpp/src/qpid/linearstore/journal/LinearFileController.cpp b/qpid/cpp/src/qpid/linearstore/journal/LinearFileController.cpp
new file mode 100644
index 0000000000..08d565ca2e
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/LinearFileController.cpp
@@ -0,0 +1,243 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/LinearFileController.h"
+
+#include "qpid/linearstore/journal/EmptyFilePool.h"
+#include "qpid/linearstore/journal/jcntl.h"
+#include "qpid/linearstore/journal/JournalFile.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+LinearFileController::LinearFileController(jcntl& jcntlRef) :
+ jcntlRef_(jcntlRef),
+ emptyFilePoolPtr_(0),
+ fileSeqCounter_("LinearFileController::fileSeqCounter", 0),
+ recordIdCounter_("LinearFileController::recordIdCounter", 0),
+ decrCounter_("LinearFileController::decrCounter", 0),
+ currentJournalFilePtr_(0)
+{}
+
+LinearFileController::~LinearFileController() {}
+
+void LinearFileController::initialize(const std::string& journalDirectory,
+ EmptyFilePool* emptyFilePoolPtr,
+ uint64_t initialFileNumberVal) {
+ journalDirectory_.assign(journalDirectory);
+ emptyFilePoolPtr_ = emptyFilePoolPtr;
+ fileSeqCounter_.set(initialFileNumberVal);
+}
+
+void LinearFileController::finalize() {
+ if (currentJournalFilePtr_) {
+ currentJournalFilePtr_->close();
+ currentJournalFilePtr_ = 0;
+ }
+ while (!journalFileList_.empty()) {
+ delete journalFileList_.front();
+ journalFileList_.pop_front();
+ }
+}
+
+void LinearFileController::addJournalFile(JournalFile* journalFilePtr,
+ const uint32_t completedDblkCount,
+ const bool makeCurrentFlag) {
+ if (makeCurrentFlag && currentJournalFilePtr_) {
+ currentJournalFilePtr_->close();
+ currentJournalFilePtr_ = 0;
+ }
+ journalFilePtr->initialize(completedDblkCount);
+ {
+ slock l(journalFileListMutex_);
+ journalFileList_.push_back(journalFilePtr);
+ }
+ if (makeCurrentFlag) {
+ currentJournalFilePtr_ = journalFilePtr;
+ }
+}
+
+efpDataSize_sblks_t LinearFileController::dataSize_sblks() const {
+ return emptyFilePoolPtr_->dataSize_sblks();
+}
+
+efpFileSize_sblks_t LinearFileController::fileSize_sblks() const {
+ return emptyFilePoolPtr_->fileSize_sblks();
+}
+
+void LinearFileController::getNextJournalFile() {
+ if (currentJournalFilePtr_)
+ currentJournalFilePtr_->close();
+ pullEmptyFileFromEfp();
+}
+
+uint64_t LinearFileController::getNextRecordId() {
+ return recordIdCounter_.increment();
+}
+
+void LinearFileController::removeFileToEfp(const std::string& fileName) {
+ if (emptyFilePoolPtr_) {
+ emptyFilePoolPtr_->returnEmptyFileSymlink(fileName);
+ }
+}
+
+void LinearFileController::restoreEmptyFile(const std::string& fileName) {
+ // TODO: Add checks that this file is of a valid size; if not, delete this and get one from the EFP
+ addJournalFile(fileName, emptyFilePoolPtr_->getIdentity(), getNextFileSeqNum(), 0);
+}
+
+void LinearFileController::purgeEmptyFilesToEfp() {
+ slock l(journalFileListMutex_);
+ while (journalFileList_.front()->isNoEnqueuedRecordsRemaining() && journalFileList_.size() > 1) { // Can't purge last file, even if it has no enqueued records
+ emptyFilePoolPtr_->returnEmptyFileSymlink(journalFileList_.front()->getFqFileName());
+ delete journalFileList_.front();
+ journalFileList_.pop_front();
+ }
+}
+
+uint32_t LinearFileController::getEnqueuedRecordCount(const uint64_t fileSeqNumber) {
+ return find(fileSeqNumber)->getEnqueuedRecordCount();
+}
+
+uint32_t LinearFileController::incrEnqueuedRecordCount(const uint64_t fileSeqNumber) {
+ return find(fileSeqNumber)->incrEnqueuedRecordCount();
+}
+
+uint32_t LinearFileController::decrEnqueuedRecordCount(const uint64_t fileSeqNumber) {
+ uint32_t r = find(fileSeqNumber)->decrEnqueuedRecordCount();
+
+ // TODO: Re-evaluate after testing and profiling
+ // This is the first go at implementing auto-purge, which checks for all trailing empty files and recycles
+ // them back to the EFP. This version checks every 100 decrements using decrCounter_ (an action which releases
+ // records). We need to check this rather simple scheme works for outlying scenarios (large and tiny data
+ // records) without impacting performance or performing badly (leaving excessive empty files in the journals).
+ if (decrCounter_.increment() % 100ULL == 0ULL) {
+ purgeEmptyFilesToEfp();
+ }
+ return r;
+}
+
+uint32_t LinearFileController::addWriteCompletedDblkCount(const uint64_t fileSeqNumber, const uint32_t a) {
+ return find(fileSeqNumber)->addCompletedDblkCount(a);
+}
+
+uint16_t LinearFileController::decrOutstandingAioOperationCount(const uint64_t fileSeqNumber) {
+ return find(fileSeqNumber)->decrOutstandingAioOperationCount();
+}
+
+void LinearFileController::asyncFileHeaderWrite(io_context_t ioContextPtr,
+ const uint16_t userFlags,
+ const uint64_t recordId,
+ const uint64_t firstRecordOffset) {
+ currentJournalFilePtr_->asyncFileHeaderWrite(ioContextPtr,
+ emptyFilePoolPtr_->getPartitionNumber(),
+ emptyFilePoolPtr_->dataSize_kib(),
+ userFlags,
+ recordId,
+ firstRecordOffset);
+}
+
+void LinearFileController::asyncPageWrite(io_context_t ioContextPtr,
+ aio_cb* aioControlBlockPtr,
+ void* data,
+ uint32_t dataSize_dblks) {
+ assertCurrentJournalFileValid("asyncPageWrite");
+ currentJournalFilePtr_->asyncPageWrite(ioContextPtr, aioControlBlockPtr, data, dataSize_dblks);
+}
+
+uint64_t LinearFileController::getCurrentFileSeqNum() const {
+ assertCurrentJournalFileValid("getCurrentFileSeqNum");
+ return currentJournalFilePtr_->getFileSeqNum();
+}
+
+uint64_t LinearFileController::getCurrentSerial() const {
+ assertCurrentJournalFileValid("getCurrentSerial");
+ return currentJournalFilePtr_->getSerial();
+}
+
+bool LinearFileController::isEmpty() const {
+ assertCurrentJournalFileValid("isEmpty");
+ return currentJournalFilePtr_->isEmpty();
+}
+
+const std::string LinearFileController::status(const uint8_t indentDepth) const {
+ std::string indent((size_t)indentDepth, '.');
+ std::ostringstream oss;
+ oss << indent << "LinearFileController: queue=" << jcntlRef_.id() << std::endl;
+ oss << indent << " journalDirectory=" << journalDirectory_ << std::endl;
+ oss << indent << " fileSeqCounter=" << fileSeqCounter_.get() << std::endl;
+ oss << indent << " recordIdCounter=" << recordIdCounter_.get() << std::endl;
+ oss << indent << " journalFileList.size=" << journalFileList_.size() << std::endl;
+ if (checkCurrentJournalFileValid()) {
+ oss << currentJournalFilePtr_->status_str(indentDepth+2);
+ } else {
+ oss << indent << " <No current journal file>" << std::endl;
+ }
+ return oss.str();
+}
+
+// --- protected functions ---
+
+void LinearFileController::addJournalFile(const std::string& fileName,
+ const efpIdentity_t& efpIdentity,
+ const uint64_t fileSeqNumber,
+ const uint32_t completedDblkCount) {
+ JournalFile* jfp = new JournalFile(fileName, efpIdentity, fileSeqNumber, jcntlRef_.id());
+ addJournalFile(jfp, completedDblkCount, true);
+}
+
+void LinearFileController::assertCurrentJournalFileValid(const char* const functionName) const {
+ if (!checkCurrentJournalFileValid()) {
+ throw jexception(jerrno::JERR__NULL, "LinearFileController", functionName);
+ }
+}
+
+bool LinearFileController::checkCurrentJournalFileValid() const {
+ return currentJournalFilePtr_ != 0;
+}
+
+JournalFile* LinearFileController::find(const uint64_t fileSeqNumber) {
+ if (currentJournalFilePtr_ && currentJournalFilePtr_->getFileSeqNum() == fileSeqNumber)
+ return currentJournalFilePtr_;
+
+ slock l(journalFileListMutex_);
+ for (JournalFileListItr_t i=journalFileList_.begin(); i!=journalFileList_.end(); ++i) {
+ if ((*i)->getFileSeqNum() == fileSeqNumber) {
+ return *i;
+ }
+ }
+
+ std::ostringstream oss;
+ oss << "fileSeqNumber=" << fileSeqNumber;
+ throw jexception(jerrno::JERR_LFCR_SEQNUMNOTFOUND, oss.str(), "LinearFileController", "find");
+}
+
+uint64_t LinearFileController::getNextFileSeqNum() {
+ return fileSeqCounter_.increment();
+}
+
+void LinearFileController::pullEmptyFileFromEfp() {
+ std::string efn = emptyFilePoolPtr_->takeEmptyFile(journalDirectory_); // Moves file from EFP only (ie no file init), returns new file name
+ addJournalFile(efn, emptyFilePoolPtr_->getIdentity(), getNextFileSeqNum(), 0);
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/LinearFileController.h b/qpid/cpp/src/qpid/linearstore/journal/LinearFileController.h
new file mode 100644
index 0000000000..3cdfb72a37
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/LinearFileController.h
@@ -0,0 +1,119 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_LINEARFILECONTROLLER_H_
+#define QPID_LINEARSTORE_JOURNAL_LINEARFILECONTROLLER_H_
+
+#include <deque>
+#include "qpid/linearstore/journal/aio.h"
+#include "qpid/linearstore/journal/AtomicCounter.h"
+#include "qpid/linearstore/journal/EmptyFilePoolTypes.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+class EmptyFilePool;
+class jcntl;
+class JournalFile;
+
+class LinearFileController
+{
+protected:
+ typedef std::deque<JournalFile*> JournalFileList_t;
+ typedef JournalFileList_t::iterator JournalFileListItr_t;
+
+ jcntl& jcntlRef_;
+ std::string journalDirectory_;
+ EmptyFilePool* emptyFilePoolPtr_;
+ AtomicCounter<uint64_t> fileSeqCounter_;
+ AtomicCounter<uint64_t> recordIdCounter_;
+ AtomicCounter<uint64_t> decrCounter_;
+
+ JournalFileList_t journalFileList_;
+ JournalFile* currentJournalFilePtr_;
+ smutex journalFileListMutex_;
+
+public:
+ LinearFileController(jcntl& jcntlRef);
+ virtual ~LinearFileController();
+
+ void initialize(const std::string& journalDirectory,
+ EmptyFilePool* emptyFilePoolPtr,
+ uint64_t initialFileNumberVal);
+ void finalize();
+
+ void addJournalFile(JournalFile* journalFilePtr,
+ const uint32_t completedDblkCount,
+ const bool makeCurrentFlag);
+
+ efpDataSize_sblks_t dataSize_sblks() const;
+ efpFileSize_sblks_t fileSize_sblks() const;
+ void getNextJournalFile();
+ uint64_t getNextRecordId();
+ void removeFileToEfp(const std::string& fileName);
+ void restoreEmptyFile(const std::string& fileName);
+ void purgeEmptyFilesToEfp();
+
+ // Functions for manipulating counts of non-current JournalFile instances in journalFileList_
+ uint32_t getEnqueuedRecordCount(const uint64_t fileSeqNumber);
+ uint32_t incrEnqueuedRecordCount(const uint64_t fileSeqNumber);
+ uint32_t decrEnqueuedRecordCount(const uint64_t fileSeqNumber);
+ uint32_t addWriteCompletedDblkCount(const uint64_t fileSeqNumber,
+ const uint32_t a);
+ uint16_t decrOutstandingAioOperationCount(const uint64_t fileSeqNumber);
+
+ // Pass-through functions for current JournalFile class
+ void asyncFileHeaderWrite(io_context_t ioContextPtr,
+ const uint16_t userFlags,
+ const uint64_t recordId,
+ const uint64_t firstRecordOffset);
+ void asyncPageWrite(io_context_t ioContextPtr,
+ aio_cb* aioControlBlockPtr,
+ void* data,
+ uint32_t dataSize_dblks);
+
+ uint64_t getCurrentFileSeqNum() const;
+ uint64_t getCurrentSerial() const;
+ bool isEmpty() const;
+
+ // Debug aid
+ const std::string status(const uint8_t indentDepth) const;
+
+protected:
+ void addJournalFile(const std::string& fileName,
+ const efpIdentity_t& efpIdentity,
+ const uint64_t fileSeqNumber,
+ const uint32_t completedDblkCount);
+ void assertCurrentJournalFileValid(const char* const functionName) const;
+ bool checkCurrentJournalFileValid() const;
+ JournalFile* find(const uint64_t fileSeqNumber);
+ uint64_t getNextFileSeqNum();
+ void pullEmptyFileFromEfp();
+};
+
+typedef void (LinearFileController::*lfcAddJournalFileFn)(JournalFile* journalFilePtr,
+ const uint32_t completedDblkCount,
+ const bool makeCurrentFlag);
+
+}}}
+
+#endif // QPID_LINEARSTORE_JOURNAL_LINEARFILECONTROLLER_H_
diff --git a/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp b/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp
new file mode 100644
index 0000000000..254566e824
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp
@@ -0,0 +1,949 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/RecoveryManager.h"
+
+#include <algorithm>
+#include <cstdlib>
+#include <iomanip>
+#include "qpid/linearstore/journal/Checksum.h"
+#include "qpid/linearstore/journal/data_tok.h"
+#include "qpid/linearstore/journal/deq_rec.h"
+#include "qpid/linearstore/journal/EmptyFilePool.h"
+#include "qpid/linearstore/journal/EmptyFilePoolManager.h"
+#include "qpid/linearstore/journal/enq_map.h"
+#include "qpid/linearstore/journal/enq_rec.h"
+#include "qpid/linearstore/journal/jcfg.h"
+#include "qpid/linearstore/journal/jdir.h"
+#include "qpid/linearstore/journal/JournalFile.h"
+#include "qpid/linearstore/journal/JournalLog.h"
+#include "qpid/linearstore/journal/jrec.h"
+#include "qpid/linearstore/journal/LinearFileController.h"
+#include "qpid/linearstore/journal/txn_map.h"
+#include "qpid/linearstore/journal/txn_rec.h"
+#include "qpid/linearstore/journal/utils/enq_hdr.h"
+#include "qpid/linearstore/journal/utils/file_hdr.h"
+#include <sstream>
+#include <string>
+#include <unistd.h>
+#include <vector>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+RecoveredRecordData_t::RecoveredRecordData_t(const uint64_t rid, const uint64_t fid, const std::streampos foffs, bool ptxn) :
+ recordId_(rid),
+ fileId_(fid),
+ fileOffset_(foffs),
+ pendingTransaction_(ptxn)
+{}
+
+bool recordIdListCompare(RecoveredRecordData_t a, RecoveredRecordData_t b) {
+ return a.recordId_ < b.recordId_;
+}
+
+RecoveredFileData_t::RecoveredFileData_t(JournalFile* journalFilePtr, const uint32_t completedDblkCount) :
+ journalFilePtr_(journalFilePtr),
+ completedDblkCount_(completedDblkCount)
+{}
+
+RecoveryManager::RecoveryManager(const std::string& journalDirectory,
+ const std::string& queuename,
+ enq_map& enqueueMapRef,
+ txn_map& transactionMapRef,
+ JournalLog& journalLogRef) :
+ journalDirectory_(journalDirectory),
+ queueName_(queuename),
+ enqueueMapRef_(enqueueMapRef),
+ transactionMapRef_(transactionMapRef),
+ journalLogRef_(journalLogRef),
+ journalEmptyFlag_(false),
+ firstRecordOffset_(0),
+ endOffset_(0),
+ highestRecordId_(0ULL),
+ highestFileNumber_(0ULL),
+ lastFileFullFlag_(false),
+ initial_fid_(0),
+ currentSerial_(0),
+ efpFileSize_kib_(0)
+{}
+
+RecoveryManager::~RecoveryManager() {
+ for (fileNumberMapItr_t i = fileNumberMap_.begin(); i != fileNumberMap_.end(); ++i) {
+ delete i->second;
+ }
+ fileNumberMap_.clear();
+}
+
+void RecoveryManager::analyzeJournals(const std::vector<std::string>* preparedTransactionListPtr,
+ EmptyFilePoolManager* emptyFilePoolManager,
+ EmptyFilePool** emptyFilePoolPtrPtr) {
+ // Analyze file headers of existing journal files
+ efpIdentity_t efpIdentity;
+ analyzeJournalFileHeaders(efpIdentity);
+
+ if (journalEmptyFlag_) {
+ if (uninitFileList_.empty()) {
+ *emptyFilePoolPtrPtr = emptyFilePoolManager->getEmptyFilePool(0, 0); // Use default EFP
+ } else {
+ *emptyFilePoolPtrPtr = emptyFilePoolManager->getEmptyFilePool(efpIdentity);
+ }
+ } else {
+ *emptyFilePoolPtrPtr = emptyFilePoolManager->getEmptyFilePool(efpIdentity);
+ if (! *emptyFilePoolPtrPtr) {
+ // TODO: At a later time, this could be used to establish a new pool size provided the partition exists.
+ // If the partition does not exist, this is always an error. For now, throw an exception, as this should
+ // not occur in any practical application. Once multiple partitions and mixed EFPs are supported, this
+ // needs to be resolved. Note that EFP size is always a multiple of QLS_SBLK_SIZE_BYTES (currently 4096
+ // bytes, any other value cannot be used and should be rejected as an error.
+ std::ostringstream oss;
+ oss << "Invalid EFP identity: Partition=" << efpIdentity.pn_ << " Size=" << efpIdentity.ds_ << "k";
+ throw jexception(jerrno::JERR_RCVM_INVALIDEFPID, oss.str(), "RecoveryManager", "analyzeJournals");
+ }
+ efpFileSize_kib_ = (*emptyFilePoolPtrPtr)->fileSize_kib();
+
+ // Read all records, establish remaining enqueued records
+ if (inFileStream_.is_open()) {
+ inFileStream_.close();
+ }
+ while (getNextRecordHeader()) {}
+ if (inFileStream_.is_open()) {
+ inFileStream_.close();
+ }
+
+ // Check for file full condition
+ lastFileFullFlag_ = endOffset_ == (std::streamoff)(*emptyFilePoolPtrPtr)->fileSize_kib() * 1024;
+
+ // Remove leading files which have no enqueued records
+ removeEmptyFiles(*emptyFilePoolPtrPtr);
+
+ // Remove all txns from tmap that are not in the prepared list
+ if (preparedTransactionListPtr) {
+ std::vector<std::string> xidList;
+ transactionMapRef_.xid_list(xidList);
+ for (std::vector<std::string>::iterator itr = xidList.begin(); itr != xidList.end(); itr++) {
+ std::vector<std::string>::const_iterator pitr =
+ std::find(preparedTransactionListPtr->begin(), preparedTransactionListPtr->end(), *itr);
+ if (pitr == preparedTransactionListPtr->end()) { // not found in prepared list
+ txn_data_list_t tdl = transactionMapRef_.get_remove_tdata_list(*itr); // tdl will be empty if xid not found
+ // Unlock any affected enqueues in emap
+ for (tdl_itr_t i=tdl.begin(); i<tdl.end(); i++) {
+ if (i->enq_flag_) { // enq op - decrement enqueue count
+ fileNumberMap_[i->fid_]->journalFilePtr_->decrEnqueuedRecordCount();
+ } else if (enqueueMapRef_.is_enqueued(i->drid_, true)) { // deq op - unlock enq record
+ if (enqueueMapRef_.unlock(i->drid_) < enq_map::EMAP_OK) { // fail
+ // enq_map::unlock()'s only error is enq_map::EMAP_RID_NOT_FOUND
+ std::ostringstream oss;
+ oss << std::hex << "_emap.unlock(): drid=0x\"" << i->drid_;
+ throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "RecoveryManager", "analyzeJournals");
+ }
+ }
+ }
+ }
+ }
+ }
+ prepareRecordList();
+ }
+}
+
+std::streamoff RecoveryManager::getEndOffset() const {
+ return endOffset_;
+}
+
+uint64_t RecoveryManager::getHighestFileNumber() const {
+ return highestFileNumber_;
+}
+
+uint64_t RecoveryManager::getHighestRecordId() const {
+ return highestRecordId_;
+}
+
+bool RecoveryManager::isLastFileFull() const {
+ return lastFileFullFlag_;
+}
+
+bool RecoveryManager::readNextRemainingRecord(void** const dataPtrPtr,
+ std::size_t& dataSize,
+ void** const xidPtrPtr,
+ std::size_t& xidSize,
+ bool& transient,
+ bool& external,
+ data_tok* const dtokp,
+ bool ignore_pending_txns) {
+ bool foundRecord = false;
+ do {
+ if (recordIdListConstItr_ == recordIdList_.end()) {
+ return false;
+ }
+ if (recordIdListConstItr_->pendingTransaction_ && ignore_pending_txns) { // Pending transaction
+ ++recordIdListConstItr_; // ignore, go to next record
+ } else {
+ foundRecord = true;
+ }
+ } while (!foundRecord);
+
+ if (!inFileStream_.is_open() || currentJournalFileItr_->first != recordIdListConstItr_->fileId_) {
+ if (!getFile(recordIdListConstItr_->fileId_, false)) {
+ std::ostringstream oss;
+ oss << "Failed to open file with file-id=" << recordIdListConstItr_->fileId_;
+ throw jexception(jerrno::JERR__FILEIO, oss.str(), "RecoveryManager", "readNextRemainingRecord");
+ }
+ }
+ inFileStream_.seekg(recordIdListConstItr_->fileOffset_, std::ifstream::beg);
+ if (!inFileStream_.good()) {
+ std::ostringstream oss;
+ oss << "Could not find offset 0x" << std::hex << recordIdListConstItr_->fileOffset_ << " in file " << getCurrentFileName();
+ throw jexception(jerrno::JERR__FILEIO, oss.str(), "RecoveryManager", "readNextRemainingRecord");
+ }
+
+ ::enq_hdr_t enqueueHeader;
+ inFileStream_.read((char*)&enqueueHeader, sizeof(::enq_hdr_t));
+ if (inFileStream_.gcount() != sizeof(::enq_hdr_t)) {
+ std::ostringstream oss;
+ oss << "Could not read enqueue header from file " << getCurrentFileName() << " at offset 0x" << std::hex << recordIdListConstItr_->fileOffset_;
+ throw jexception(jerrno::JERR__FILEIO, oss.str(), "RecoveryManager", "readNextRemainingRecord");
+ }
+ // check flags
+ transient = ::is_enq_transient(&enqueueHeader);
+ external = ::is_enq_external(&enqueueHeader);
+
+ // read xid
+ xidSize = enqueueHeader._xidsize;
+ *xidPtrPtr = ::malloc(xidSize);
+ if (*xidPtrPtr == 0) {
+ std::ostringstream oss;
+ oss << "xidPtr, size=0x" << std::hex << xidSize;
+ throw jexception(jerrno::JERR__MALLOC, oss.str(), "RecoveryManager", "readNextRemainingRecord");
+ }
+ readJournalData((char*)*xidPtrPtr, xidSize);
+
+ // read data
+ dataSize = enqueueHeader._dsize;
+ *dataPtrPtr = ::malloc(dataSize);
+ if (*xidPtrPtr == 0) {
+ std::ostringstream oss;
+ oss << "dataPtr, size=0x" << std::hex << dataSize;
+ throw jexception(jerrno::JERR__MALLOC, oss.str(), "RecoveryManager", "readNextRemainingRecord");
+ }
+ readJournalData((char*)*dataPtrPtr, dataSize);
+
+ // Check enqueue record checksum
+ Checksum checksum;
+ checksum.addData((const unsigned char*)&enqueueHeader, sizeof(::enq_hdr_t));
+ if (xidSize > 0) {
+ checksum.addData((const unsigned char*)*xidPtrPtr, xidSize);
+ }
+ if (dataSize > 0) {
+ checksum.addData((const unsigned char*)*dataPtrPtr, dataSize);
+ }
+ ::rec_tail_t enqueueTail;
+ readJournalData((char*)&enqueueTail, sizeof(::rec_tail_t));
+ uint32_t cs = checksum.getChecksum();
+ uint16_t res = ::rec_tail_check(&enqueueTail, &enqueueHeader._rhdr, cs);
+ if (res != 0) {
+ std::stringstream oss;
+ oss << "Bad record tail:" << std::hex;
+ if (res & ::REC_TAIL_MAGIC_ERR_MASK) {
+ oss << std::endl << " Magic: expected 0x" << ~enqueueHeader._rhdr._magic << "; found 0x" << enqueueTail._xmagic;
+ }
+ if (res & ::REC_TAIL_SERIAL_ERR_MASK) {
+ oss << std::endl << " Serial: expected 0x" << enqueueHeader._rhdr._serial << "; found 0x" << enqueueTail._serial;
+ }
+ if (res & ::REC_TAIL_RID_ERR_MASK) {
+ oss << std::endl << " Record Id: expected 0x" << enqueueHeader._rhdr._rid << "; found 0x" << enqueueTail._rid;
+ }
+ if (res & ::REC_TAIL_CHECKSUM_ERR_MASK) {
+ oss << std::endl << " Checksum: expected 0x" << cs << "; found 0x" << enqueueTail._checksum;
+ }
+ throw jexception(jerrno::JERR_JREC_BADRECTAIL, oss.str(), "RecoveryManager", "readNextRemainingRecord"); // TODO: Don't throw exception, log info
+ }
+
+ // Set data token
+ dtokp->set_wstate(data_tok::ENQ);
+ dtokp->set_rid(enqueueHeader._rhdr._rid);
+ dtokp->set_dsize(dataSize);
+ if (xidSize) {
+ dtokp->set_xid(*xidPtrPtr, xidSize);
+ }
+
+ ++recordIdListConstItr_;
+ return true;
+}
+
+void RecoveryManager::recoveryComplete() {
+ if(inFileStream_.is_open()) {
+ inFileStream_.close();
+ }
+}
+
+void RecoveryManager::setLinearFileControllerJournals(lfcAddJournalFileFn fnPtr,
+ LinearFileController* lfcPtr) {
+ if (journalEmptyFlag_) {
+ if (uninitFileList_.size() > 0) {
+ // TODO: Handle case if uninitFileList_.size() > 1, but this should not happen in normal operation. Here we assume only one item in the list.
+ std::string uninitFile = uninitFileList_.back();
+ uninitFileList_.pop_back();
+ lfcPtr->restoreEmptyFile(uninitFile);
+ }
+ } else {
+ if (initial_fid_ == 0) {
+ throw jexception(jerrno::JERR_RCVM_NULLFID, "RecoveryManager", "setLinearFileControllerJournals");
+ }
+ for (fileNumberMapConstItr_t i = fileNumberMap_.begin(); i != fileNumberMap_.end(); ++i) {
+ (lfcPtr->*fnPtr)(i->second->journalFilePtr_, i->second->completedDblkCount_, i->first == initial_fid_);
+ }
+ }
+
+ std::ostringstream oss;
+ bool logFlag = !notNeededFilesList_.empty();
+ if (logFlag) {
+ oss << "Files removed from head of journal: prior truncation during recovery:";
+ }
+ while (!notNeededFilesList_.empty()) {
+ lfcPtr->removeFileToEfp(notNeededFilesList_.back());
+ oss << std::endl << " * " << notNeededFilesList_.back();
+ notNeededFilesList_.pop_back();
+ }
+ if (logFlag) {
+ journalLogRef_.log(JournalLog::LOG_NOTICE, queueName_, oss.str());
+ }
+}
+
+std::string RecoveryManager::toString(const std::string& jid, const uint16_t indent) const {
+ std::string indentStr(indent, ' ');
+ std::ostringstream oss;
+ oss << std::endl << indentStr << "Journal recovery analysis (jid=\"" << jid << "\"):" << std::endl;
+ if (journalEmptyFlag_) {
+ oss << indentStr << "<Journal empty, no journal files found>" << std::endl;
+ } else {
+ oss << indentStr << std::setw(7) << "file_id"
+ << std::setw(43) << "file_name"
+ << std::setw(12) << "record_cnt"
+ << std::setw(16) << "fro"
+ << std::setw(12) << "efp_id"
+ << std::endl;
+ oss << indentStr << std::setw(7) << "-------"
+ << std::setw(43) << "-----------------------------------------"
+ << std::setw(12) << "----------"
+ << std::setw(16) << "--------------"
+ << std::setw(12) << "----------"
+ << std::endl;
+ uint32_t totalRecordCount(0UL);
+ for (fileNumberMapConstItr_t k=fileNumberMap_.begin(); k!=fileNumberMap_.end(); ++k) {
+ std::string fqFileName = k->second->journalFilePtr_->getFqFileName();
+ std::ostringstream fid;
+ fid << std::hex << "0x" << k->first;
+ std::ostringstream fro;
+ fro << std::hex << "0x" << k->second->journalFilePtr_->getFirstRecordOffset();
+ oss << indentStr << std::setw(7) << fid.str()
+ << std::setw(43) << fqFileName.substr(fqFileName.rfind('/')+1)
+ << std::setw(12) << k->second->journalFilePtr_->getEnqueuedRecordCount()
+ << std::setw(16) << fro.str()
+ << std::setw(12) << k->second->journalFilePtr_->getEfpIdentity()
+ << std::endl;
+ totalRecordCount += k->second->journalFilePtr_->getEnqueuedRecordCount();
+ }
+ oss << indentStr << std::setw(62) << "----------" << std::endl;
+ oss << indentStr << std::setw(62) << totalRecordCount << std::endl;
+ oss << indentStr << "First record offset in first file = 0x" << std::hex << firstRecordOffset_ <<
+ std::dec << " (" << (firstRecordOffset_/QLS_DBLK_SIZE_BYTES) << " dblks)" << std::endl;
+ oss << indentStr << "End offset in last file = 0x" << std::hex << endOffset_ << std::dec << " (" <<
+ (endOffset_/QLS_DBLK_SIZE_BYTES) << " dblks)" << std::endl;
+ oss << indentStr << "Highest rid found = 0x" << std::hex << highestRecordId_ << std::dec << std::endl;
+ oss << indentStr << "Last file full = " << (lastFileFullFlag_ ? "TRUE" : "FALSE") << std::endl;
+ }
+ return oss.str();
+}
+
+// --- protected functions ---
+
+void RecoveryManager::analyzeJournalFileHeaders(efpIdentity_t& efpIdentity) {
+ std::string headerQueueName;
+ ::file_hdr_t fileHeader;
+ stringList_t directoryList;
+ jdir::read_dir(journalDirectory_, directoryList, false, true, false, true);
+ for (stringListConstItr_t i = directoryList.begin(); i != directoryList.end(); ++i) {
+ bool hdrOk = readJournalFileHeader(*i, fileHeader, headerQueueName);
+ bool hdrEmpty = ::is_file_hdr_reset(&fileHeader);
+ if (!hdrOk) {
+ std::ostringstream oss;
+ oss << "Journal file " << (*i) << " is corrupted or invalid";
+ journalLogRef_.log(JournalLog::LOG_WARN, queueName_, oss.str());
+ } else if (hdrEmpty) {
+ // Read symlink, find efp directory name which is efp size in KiB
+ // TODO: place this bit into a common function as it is also used in EmptyFilePool.cpp::deleteSymlink()
+ char buff[1024];
+ ssize_t len = ::readlink((*i).c_str(), buff, 1024);
+ if (len < 0) {
+ std::ostringstream oss;
+ oss << "symlink=\"" << (*i) << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR__SYMLINK, oss.str(), "RecoveryManager", "analyzeJournalFileHeaders");
+ }
+ // Find second and third '/' from back of string, which contains the EFP directory name
+ *(::strrchr(buff, '/')) = '\0';
+ *(::strrchr(buff, '/')) = '\0';
+ int efpDataSize_kib = atoi(::strrchr(buff, '/') + 1);
+ uninitFileList_.push_back(*i);
+ efpIdentity.pn_ = fileHeader._efp_partition;
+ efpIdentity.ds_ = efpDataSize_kib;
+ } else if (headerQueueName.compare(queueName_) != 0) {
+ std::ostringstream oss;
+ oss << "Journal file " << (*i) << " belongs to queue \"" << headerQueueName << "\": ignoring";
+ journalLogRef_.log(JournalLog::LOG_WARN, queueName_, oss.str());
+ } else {
+ JournalFile* jfp = new JournalFile(*i, fileHeader, queueName_);
+ std::pair<fileNumberMapItr_t, bool> res = fileNumberMap_.insert(
+ std::pair<uint64_t, RecoveredFileData_t*>(fileHeader._file_number, new RecoveredFileData_t(jfp, 0)));
+ if (!res.second) {
+ std::ostringstream oss;
+ oss << "Journal file " << (*i) << " has fid=0x" << std::hex << jfp->getFileSeqNum() << " which already exists for this journal.";
+ throw jexception(oss.str()); // TODO: complete this exception
+ }
+ if (fileHeader._file_number > highestFileNumber_) {
+ highestFileNumber_ = fileHeader._file_number;
+ }
+ // TODO: Logic weak here for detecting error conditions in journal, specifically when no
+ // valid files exist, or files from mixed EFPs. Currently last read file header determines
+ // efpIdentity.
+ efpIdentity.pn_ = fileHeader._efp_partition;
+ efpIdentity.ds_ = fileHeader._data_size_kib;
+ }
+ }
+
+//std::cerr << "*** RecoveryManager::analyzeJournalFileHeaders() fileNumberMap_.size()=" << fileNumberMap_.size() << std::endl; // DEBUG
+ if (fileNumberMap_.empty()) {
+ journalEmptyFlag_ = true;
+ } else {
+ currentJournalFileItr_ = fileNumberMap_.begin();
+ }
+}
+
+void RecoveryManager::checkFileStreamOk(bool checkEof) {
+ if (inFileStream_.fail() || inFileStream_.bad() || checkEof ? inFileStream_.eof() : false) {
+ std::ostringstream oss;
+ oss << "Stream status: fail=" << (inFileStream_.fail()?"T":"F") << " bad=" << (inFileStream_.bad()?"T":"F");
+ if (checkEof) {
+ oss << " eof=" << (inFileStream_.eof()?"T":"F");
+ }
+ throw jexception(jerrno::JERR_RCVM_STREAMBAD, oss.str(), "RecoveryManager", "checkFileStreamOk");
+ }
+}
+
+void RecoveryManager::checkJournalAlignment(const uint64_t start_fid, const std::streampos recordPosition) {
+ if (recordPosition % QLS_DBLK_SIZE_BYTES != 0) {
+ std::ostringstream oss;
+ oss << "Current read pointer not dblk aligned: recordPosition=0x" << std::hex << recordPosition;
+ oss << " (dblk alignment offset = 0x" << (recordPosition % QLS_DBLK_SIZE_BYTES);
+ throw jexception(jerrno::JERR_RCVM_NOTDBLKALIGNED, oss.str(), "RecoveryManager", "checkJournalAlignment");
+ }
+ std::streampos currentPosn = recordPosition;
+ unsigned sblkOffset = currentPosn % QLS_SBLK_SIZE_BYTES;
+ if (sblkOffset)
+ {
+ std::ostringstream oss1;
+ oss1 << std::hex << "Bad record alignment found at fid=0x" << start_fid;
+ oss1 << " offs=0x" << currentPosn << " (likely journal overwrite boundary); " << std::dec;
+ oss1 << (QLS_SBLK_SIZE_DBLKS - (sblkOffset/QLS_DBLK_SIZE_BYTES)) << " filler record(s) required.";
+ journalLogRef_.log(JournalLog::LOG_WARN, queueName_, oss1.str());
+
+ fileNumberMapConstItr_t fnmItr = fileNumberMap_.find(start_fid);
+ std::ofstream outFileStream(fnmItr->second->journalFilePtr_->getFqFileName().c_str(), std::ios_base::in | std::ios_base::out | std::ios_base::binary);
+ if (!outFileStream.good()) {
+ throw jexception(jerrno::JERR__FILEIO, getCurrentFileName(), "RecoveryManager", "checkJournalAlignment");
+ }
+ outFileStream.seekp(currentPosn);
+
+ // Prepare write buffer containing a single empty record (1 dblk)
+ void* writeBuffer = std::malloc(QLS_DBLK_SIZE_BYTES);
+ if (writeBuffer == 0) {
+ throw jexception(jerrno::JERR__MALLOC, "RecoveryManager", "checkJournalAlignment");
+ }
+ const uint32_t xmagic = QLS_EMPTY_MAGIC;
+ ::memcpy(writeBuffer, (const void*)&xmagic, sizeof(xmagic));
+ ::memset((char*)writeBuffer + sizeof(xmagic), QLS_CLEAN_CHAR, QLS_DBLK_SIZE_BYTES - sizeof(xmagic));
+
+ // Write as many empty records as are needed to get to sblk boundary
+ while (currentPosn % QLS_SBLK_SIZE_BYTES) {
+ outFileStream.write((const char*)writeBuffer, QLS_DBLK_SIZE_BYTES);
+ if (outFileStream.fail()) {
+ throw jexception(jerrno::JERR_RCVM_WRITE, "RecoveryManager", "checkJournalAlignment");
+ }
+ std::ostringstream oss2;
+ oss2 << std::hex << "Recover phase write: Wrote filler record: fid=0x" << start_fid;
+ oss2 << " offs=0x" << currentPosn;
+ journalLogRef_.log(JournalLog::LOG_NOTICE, queueName_, oss2.str());
+ currentPosn = outFileStream.tellp();
+ }
+ outFileStream.close();
+ std::free(writeBuffer);
+ journalLogRef_.log(JournalLog::LOG_INFO, queueName_, "Bad record alignment fixed.");
+ }
+ lastRecord(start_fid, currentPosn);
+}
+
+bool RecoveryManager::decodeRecord(jrec& record,
+ std::size_t& cumulativeSizeRead,
+ ::rec_hdr_t& headerRecord,
+ const uint64_t start_fid,
+ const std::streampos recordOffset)
+{
+ if (highestRecordId_ == 0) {
+ highestRecordId_ = headerRecord._rid;
+ } else if (headerRecord._rid - highestRecordId_ < 0x8000000000000000ULL) { // RFC 1982 comparison for unsigned 64-bit
+ highestRecordId_ = headerRecord._rid;
+ }
+
+ bool done = false;
+ while (!done) {
+ try {
+ done = record.decode(headerRecord, &inFileStream_, cumulativeSizeRead, recordOffset);
+ }
+ catch (const jexception& e) {
+ if (e.err_code() == jerrno::JERR_JREC_BADRECTAIL) {
+ std::ostringstream oss;
+ oss << jerrno::err_msg(e.err_code()) << e.additional_info();
+ journalLogRef_.log(JournalLog::LOG_INFO, queueName_, oss.str());
+ } else {
+ journalLogRef_.log(JournalLog::LOG_INFO, queueName_, e.what());
+ }
+ checkJournalAlignment(start_fid, recordOffset);
+ return false;
+ }
+ if (!done && needNextFile()) {
+ if (!getNextFile(false)) {
+ checkJournalAlignment(start_fid, recordOffset);
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+std::string RecoveryManager::getCurrentFileName() const {
+ return currentJournalFileItr_->second->journalFilePtr_->getFqFileName();
+}
+
+uint64_t RecoveryManager::getCurrentFileNumber() const {
+ return currentJournalFileItr_->first;
+}
+
+bool RecoveryManager::getFile(const uint64_t fileNumber, bool jumpToFirstRecordOffsetFlag) {
+ if (inFileStream_.is_open()) {
+ inFileStream_.close();
+//std::cout << " f=" << getCurrentFileName() << "]" << std::flush; // DEBUG
+ inFileStream_.clear(); // clear eof flag, req'd for older versions of c++
+ }
+ currentJournalFileItr_ = fileNumberMap_.find(fileNumber);
+ if (currentJournalFileItr_ == fileNumberMap_.end()) {
+ return false;
+ }
+ inFileStream_.open(getCurrentFileName().c_str(), std::ios_base::in | std::ios_base::binary);
+ if (!inFileStream_.good()) {
+ throw jexception(jerrno::JERR__FILEIO, getCurrentFileName(), "RecoveryManager", "getFile");
+ }
+//std::cout << " [F=" << getCurrentFileName() << std::flush; // DEBUG
+
+ if (!readFileHeader()) {
+ return false;
+ }
+ std::streamoff foffs = jumpToFirstRecordOffsetFlag ? firstRecordOffset_ : QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES;
+ inFileStream_.seekg(foffs);
+ return true;
+}
+
+bool RecoveryManager::getNextFile(bool jumpToFirstRecordOffsetFlag) {
+ if (fileNumberMap_.empty()) {
+ return false;
+ }
+ if (inFileStream_.is_open()) {
+ inFileStream_.close();
+//std::cout << " .f=" << getCurrentFileName() << "]" << std::flush; // DEBUG
+ currentJournalFileItr_->second->completedDblkCount_ = efpFileSize_kib_ * 1024 / QLS_DBLK_SIZE_BYTES;
+ if (++currentJournalFileItr_ == fileNumberMap_.end()) {
+ return false;
+ }
+ inFileStream_.clear(); // clear eof flag, req'd for older versions of c++
+ }
+ inFileStream_.open(getCurrentFileName().c_str(), std::ios_base::in | std::ios_base::binary);
+ if (!inFileStream_.good()) {
+ throw jexception(jerrno::JERR__FILEIO, getCurrentFileName(), "RecoveryManager", "getNextFile");
+ }
+//std::cout << " [.F=" << getCurrentFileName() << std::flush; // DEBUG
+
+ if (!readFileHeader()) {
+ return false;
+ }
+ std::streamoff foffs = jumpToFirstRecordOffsetFlag ? firstRecordOffset_ : QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES;
+ inFileStream_.seekg(foffs);
+ return true;
+}
+
+bool RecoveryManager::getNextRecordHeader()
+{
+ std::size_t cum_size_read = 0;
+ void* xidp = 0;
+ rec_hdr_t h;
+
+ bool hdr_ok = false;
+ uint64_t file_id = currentJournalFileItr_->second->journalFilePtr_->getFileSeqNum();
+ std::streampos file_pos = 0;
+ if (inFileStream_.is_open()) {
+ inFileStream_.clear();
+ file_pos = inFileStream_.tellg();
+ }
+ if (file_pos == std::streampos(-1)) {
+ std::ostringstream oss;
+ oss << "tellg() failure: fail=" << (inFileStream_.fail()?"T":"F") << " bad=" << (inFileStream_.bad()?"T":"F");
+ oss << " eof=" << (inFileStream_.eof()?"T":"F") << " good=" << (inFileStream_.good()?"T":"F");
+ oss << " rdstate=0x" << std::hex << inFileStream_.rdstate() << std::dec;
+ throw jexception(jerrno::JERR_RCVM_STREAMBAD, oss.str(), "RecoveryManager", "getNextRecordHeader");
+ }
+ while (!hdr_ok) {
+ if (needNextFile()) {
+ if (!getNextFile(true)) {
+ lastRecord(file_id, file_pos);
+ return false;
+ }
+ }
+ file_id = currentJournalFileItr_->second->journalFilePtr_->getFileSeqNum();
+ file_pos = inFileStream_.tellg();
+ if (file_pos == std::streampos(-1)) {
+ std::ostringstream oss;
+ oss << "tellg() failure: fail=" << (inFileStream_.fail()?"T":"F") << " bad=" << (inFileStream_.bad()?"T":"F");
+ oss << " eof=" << (inFileStream_.eof()?"T":"F") << " good=" << (inFileStream_.good()?"T":"F");
+ oss << " rdstate=0x" << std::hex << inFileStream_.rdstate() << std::dec;
+ throw jexception(jerrno::JERR_RCVM_STREAMBAD, oss.str(), "RecoveryManager", "getNextRecordHeader");
+ }
+ inFileStream_.read((char*)&h, sizeof(rec_hdr_t));
+ if (inFileStream_.gcount() == sizeof(rec_hdr_t)) {
+ hdr_ok = true;
+ } else {
+ if (needNextFile()) {
+ if (!getNextFile(true)) {
+ lastRecord(file_id, file_pos);
+ return false;
+ }
+ }
+ }
+ }
+
+ uint64_t start_fid = getCurrentFileNumber(); // fid may increment in decode() if record folds over file boundary
+ switch(h._magic) {
+ case QLS_ENQ_MAGIC:
+ {
+//std::cout << " 0x" << std::hex << file_pos << ".e.0x" << h._rid << std::dec << std::flush; // DEBUG
+ if (::rec_hdr_check(&h, QLS_ENQ_MAGIC, QLS_JRNL_VERSION, currentSerial_) != 0) {
+ checkJournalAlignment(file_id, file_pos);
+ return false;
+ }
+ enq_rec er;
+ if (!decodeRecord(er, cum_size_read, h, start_fid, file_pos)) {
+ return false;
+ }
+ if (!er.is_transient()) { // Ignore transient msgs
+ fileNumberMap_[start_fid]->journalFilePtr_->incrEnqueuedRecordCount();
+ if (er.xid_size()) {
+ er.get_xid(&xidp);
+ if (xidp == 0) {
+ throw jexception(jerrno::JERR_RCVM_NULLXID, "ENQ", "RecoveryManager", "getNextRecordHeader");
+ }
+ std::string xid((char*)xidp, er.xid_size());
+ transactionMapRef_.insert_txn_data(xid, txn_data_t(h._rid, 0, start_fid, file_pos, true, false, false));
+ if (transactionMapRef_.set_aio_compl(xid, h._rid) < txn_map::TMAP_OK) { // fail - xid or rid not found
+ std::ostringstream oss;
+ oss << std::hex << "_tmap.set_aio_compl: txn_enq xid=\"" << xid << "\" rid=0x" << h._rid;
+ throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "RecoveryManager", "getNextRecordHeader");
+ }
+ } else {
+ if (enqueueMapRef_.insert_pfid(h._rid, start_fid, file_pos) < enq_map::EMAP_OK) { // fail
+ // The only error code emap::insert_pfid() returns is enq_map::EMAP_DUP_RID.
+ std::ostringstream oss;
+ oss << std::hex << "rid=0x" << h._rid << " _pfid=0x" << start_fid;
+ throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "RecoveryManager", "getNextRecordHeader");
+ }
+ }
+ }
+ }
+ break;
+ case QLS_DEQ_MAGIC:
+ {
+//std::cout << " 0x" << std::hex << file_pos << ".d.0x" << h._rid << std::dec << std::flush; // DEBUG
+ if (::rec_hdr_check(&h, QLS_DEQ_MAGIC, QLS_JRNL_VERSION, currentSerial_) != 0) {
+ checkJournalAlignment(file_id, file_pos);
+ return false;
+ }
+ deq_rec dr;
+ if (!decodeRecord(dr, cum_size_read, h, start_fid, file_pos)) {
+ return false;
+ }
+ if (dr.xid_size()) {
+ // If the enqueue is part of a pending txn, it will not yet be in emap
+ enqueueMapRef_.lock(dr.deq_rid()); // ignore not found error
+ dr.get_xid(&xidp);
+ if (xidp == 0) {
+ throw jexception(jerrno::JERR_RCVM_NULLXID, "DEQ", "RecoveryManager", "getNextRecordHeader");
+ }
+ std::string xid((char*)xidp, dr.xid_size());
+ transactionMapRef_.insert_txn_data(xid, txn_data_t(dr.rid(), dr.deq_rid(), start_fid, file_pos,
+ false, false, dr.is_txn_coml_commit()));
+ if (transactionMapRef_.set_aio_compl(xid, dr.rid()) < txn_map::TMAP_OK) { // fail - xid or rid not found
+ std::ostringstream oss;
+ oss << std::hex << "_tmap.set_aio_compl: txn_deq xid=\"" << xid << "\" rid=0x" << dr.rid();
+ throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "RecoveryManager", "getNextRecordHeader");
+ }
+ } else {
+ uint64_t enq_fid;
+ if (enqueueMapRef_.get_remove_pfid(dr.deq_rid(), enq_fid, true) == enq_map::EMAP_OK) { // ignore not found error
+ fileNumberMap_[enq_fid]->journalFilePtr_->decrEnqueuedRecordCount();
+ }
+ }
+ }
+ break;
+ case QLS_TXA_MAGIC:
+ {
+//std::cout << " 0x" << std::hex << file_pos << ".a.0x" << h._rid << std::dec << std::flush; // DEBUG
+ if (::rec_hdr_check(&h, QLS_TXA_MAGIC, QLS_JRNL_VERSION, currentSerial_) != 0) {
+ checkJournalAlignment(file_id, file_pos);
+ return false;
+ }
+ txn_rec ar;
+ if (!decodeRecord(ar, cum_size_read, h, start_fid, file_pos)) {
+ return false;
+ }
+ // Delete this txn from tmap, unlock any locked records in emap
+ ar.get_xid(&xidp);
+ if (xidp == 0) {
+ throw jexception(jerrno::JERR_RCVM_NULLXID, "ABT", "RecoveryManager", "getNextRecordHeader");
+ }
+ std::string xid((char*)xidp, ar.xid_size());
+ txn_data_list_t tdl = transactionMapRef_.get_remove_tdata_list(xid); // tdl will be empty if xid not found
+ for (tdl_itr_t itr = tdl.begin(); itr != tdl.end(); itr++) {
+ if (itr->enq_flag_) {
+ fileNumberMap_[itr->fid_]->journalFilePtr_->decrEnqueuedRecordCount();
+ } else {
+ enqueueMapRef_.unlock(itr->drid_); // ignore not found error
+ }
+ }
+ }
+ break;
+ case QLS_TXC_MAGIC:
+ {
+//std::cout << " 0x" << std::hex << file_pos << ".c.0x" << h._rid << std::dec << std::flush; // DEBUG
+ if (::rec_hdr_check(&h, QLS_TXC_MAGIC, QLS_JRNL_VERSION, currentSerial_) != 0) {
+ checkJournalAlignment(file_id, file_pos);
+ return false;
+ }
+ txn_rec cr;
+ if (!decodeRecord(cr, cum_size_read, h, start_fid, file_pos)) {
+ return false;
+ }
+ // Delete this txn from tmap, process records into emap
+ cr.get_xid(&xidp);
+ if (xidp == 0) {
+ throw jexception(jerrno::JERR_RCVM_NULLXID, "CMT", "RecoveryManager", "getNextRecordHeader");
+ }
+ std::string xid((char*)xidp, cr.xid_size());
+ txn_data_list_t tdl = transactionMapRef_.get_remove_tdata_list(xid); // tdl will be empty if xid not found
+ for (tdl_itr_t itr = tdl.begin(); itr != tdl.end(); itr++) {
+ if (itr->enq_flag_) { // txn enqueue
+//std::cout << "[rid=0x" << std::hex << itr->rid_ << std::dec << " fid=" << itr->fid_ << " fpos=0x" << std::hex << itr->foffs_ << "]" << std::dec << std::flush; // DEBUG
+ if (enqueueMapRef_.insert_pfid(itr->rid_, itr->fid_, itr->foffs_) < enq_map::EMAP_OK) { // fail
+ // The only error code emap::insert_pfid() returns is enq_map::EMAP_DUP_RID.
+ std::ostringstream oss;
+ oss << std::hex << "rid=0x" << itr->rid_ << " _pfid=0x" << itr->fid_;
+ throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "RecoveryManager", "getNextRecordHeader");
+ }
+ } else { // txn dequeue
+ uint64_t enq_fid;
+ if (enqueueMapRef_.get_remove_pfid(itr->drid_, enq_fid, true) == enq_map::EMAP_OK) // ignore not found error
+ fileNumberMap_[enq_fid]->journalFilePtr_->decrEnqueuedRecordCount();
+ }
+ }
+ }
+ break;
+ case QLS_EMPTY_MAGIC:
+ {
+//std::cout << ".x" << std::flush; // DEBUG
+ uint32_t rec_dblks = jrec::size_dblks(sizeof(::rec_hdr_t));
+ inFileStream_.ignore(rec_dblks * QLS_DBLK_SIZE_BYTES - sizeof(::rec_hdr_t));
+ checkFileStreamOk(false);
+ if (needNextFile()) {
+ file_pos += rec_dblks * QLS_DBLK_SIZE_BYTES;
+ if (!getNextFile(false)) {
+ lastRecord(start_fid, file_pos);
+ return false;
+ }
+ }
+ }
+ break;
+ case 0:
+//std::cout << " 0x" << std::hex << file_pos << ".0" << std::dec << std::endl << std::flush; // DEBUG
+ checkJournalAlignment(getCurrentFileNumber(), file_pos);
+ return false;
+ default:
+//std::cout << " 0x" << std::hex << file_pos << ".?" << std::dec << std::endl << std::flush; // DEBUG
+ // Stop as this is the overwrite boundary.
+ checkJournalAlignment(getCurrentFileNumber(), file_pos);
+ return false;
+ }
+ return true;
+}
+
+void RecoveryManager::lastRecord(const uint64_t file_id, const std::streamoff endOffset) {
+ endOffset_ = endOffset;
+ initial_fid_ = file_id;
+ fileNumberMap_[file_id]->completedDblkCount_ = endOffset_ / QLS_DBLK_SIZE_BYTES;
+
+ // Remove any files in fileNumberMap_ beyond initial_fid_
+ fileNumberMapItr_t unwantedFirstItr = fileNumberMap_.find(file_id);
+ if (++unwantedFirstItr != fileNumberMap_.end()) {
+ fileNumberMapItr_t itr = unwantedFirstItr;
+ notNeededFilesList_.push_back(unwantedFirstItr->second->journalFilePtr_->getFqFileName());
+ while (++itr != fileNumberMap_.end()) {
+ notNeededFilesList_.push_back(itr->second->journalFilePtr_->getFqFileName());
+ delete itr->second->journalFilePtr_;
+ delete itr->second;
+ }
+ fileNumberMap_.erase(unwantedFirstItr, fileNumberMap_.end());
+ }
+}
+
+bool RecoveryManager::needNextFile() {
+ if (inFileStream_.is_open()) {
+ return inFileStream_.eof() || inFileStream_.tellg() >= std::streampos(efpFileSize_kib_ * 1024);
+ }
+ return true;
+}
+
+void RecoveryManager::prepareRecordList() {
+ // Set up recordIdList_ from enqueue map and transaction map
+ recordIdList_.clear();
+
+ // Extract records from enqueue list
+ std::vector<uint64_t> ridList;
+ enqueueMapRef_.rid_list(ridList);
+ qpid::linearstore::journal::enq_map::emap_data_struct_t eds;
+ for (std::vector<uint64_t>::const_iterator i=ridList.begin(); i!=ridList.end(); ++i) {
+ enqueueMapRef_.get_data(*i, eds);
+ recordIdList_.push_back(RecoveredRecordData_t(*i, eds._pfid, eds._file_posn, false));
+ }
+
+ // Extract records from pending transaction enqueues
+ std::vector<std::string> xidList;
+ transactionMapRef_.xid_list(xidList);
+ for (std::vector<std::string>::const_iterator j=xidList.begin(); j!=xidList.end(); ++j) {
+ qpid::linearstore::journal::txn_data_list_t tdsl = transactionMapRef_.get_tdata_list(*j);
+ for (qpid::linearstore::journal::tdl_itr_t k=tdsl.begin(); k!=tdsl.end(); ++k) {
+ if (k->enq_flag_) {
+ recordIdList_.push_back(RecoveredRecordData_t(k->rid_, k->fid_, k->foffs_, true));
+ }
+ }
+ }
+
+ std::sort(recordIdList_.begin(), recordIdList_.end(), recordIdListCompare);
+ recordIdListConstItr_ = recordIdList_.begin();
+}
+
+void RecoveryManager::readJournalData(char* target,
+ const std::streamsize readSize) {
+ std::streamoff bytesRead = 0;
+ while (bytesRead < readSize) {
+ std::streampos file_pos = inFileStream_.tellg();
+ if (file_pos == std::streampos(-1)) {
+ std::ostringstream oss;
+ oss << "tellg() failure: fail=" << (inFileStream_.fail()?"T":"F") << " bad=" << (inFileStream_.bad()?"T":"F");
+ throw jexception(jerrno::JERR_RCVM_STREAMBAD, oss.str(), "RecoveryManager", "readJournalData");
+ }
+ inFileStream_.read(target + bytesRead, readSize - bytesRead);
+ std::streamoff thisReadSize = inFileStream_.gcount();
+ if (thisReadSize < readSize) {
+ if (needNextFile()) {
+ getNextFile(false);
+ }
+ file_pos = inFileStream_.tellg();
+ if (file_pos == std::streampos(-1)) {
+ std::ostringstream oss;
+ oss << "tellg() failure: fail=" << (inFileStream_.fail()?"T":"F") << " bad=" << (inFileStream_.bad()?"T":"F");
+ throw jexception(jerrno::JERR_RCVM_STREAMBAD, oss.str(), "RecoveryManager", "readJournalData");
+ }
+ }
+ bytesRead += thisReadSize;
+ }
+}
+
+bool RecoveryManager::readFileHeader() {
+ file_hdr_t fhdr;
+ inFileStream_.read((char*)&fhdr, sizeof(fhdr));
+ checkFileStreamOk(true);
+ if (::file_hdr_check(&fhdr, QLS_FILE_MAGIC, QLS_JRNL_VERSION, efpFileSize_kib_, QLS_MAX_QUEUE_NAME_LEN) != 0) {
+ firstRecordOffset_ = fhdr._fro;
+ currentSerial_ = fhdr._rhdr._serial;
+ } else {
+ inFileStream_.close();
+ if (currentJournalFileItr_ == fileNumberMap_.begin()) {
+ journalEmptyFlag_ = true;
+ }
+ return false;
+ }
+ return true;
+}
+
+// static private
+bool RecoveryManager::readJournalFileHeader(const std::string& journalFileName,
+ ::file_hdr_t& fileHeaderRef,
+ std::string& queueName) {
+ const std::size_t headerBlockSize = QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_KIB * 1024;
+ char buffer[headerBlockSize];
+ std::ifstream ifs(journalFileName.c_str(), std::ifstream::in | std::ifstream::binary);
+ if (!ifs.good()) {
+ std::ostringstream oss;
+ oss << "File=" << journalFileName;
+ throw jexception(jerrno::JERR_RCVM_OPENRD, oss.str(), "RecoveryManager", "readJournalFileHeader");
+ }
+ ifs.read(buffer, headerBlockSize);
+ if (!ifs) {
+ std::streamsize s = ifs.gcount();
+ ifs.close();
+ std::ostringstream oss;
+ oss << "File=" << journalFileName << "; attempted_read_size=" << headerBlockSize << "; actual_read_size=" << s;
+ throw jexception(jerrno::JERR_RCVM_READ, oss.str(), "RecoveryManager", "readJournalFileHeader");
+ }
+ ifs.close();
+ ::memcpy(&fileHeaderRef, buffer, sizeof(::file_hdr_t));
+ if (::file_hdr_check(&fileHeaderRef, QLS_FILE_MAGIC, QLS_JRNL_VERSION, 0, QLS_MAX_QUEUE_NAME_LEN)) {
+ return false;
+ }
+ queueName.assign(buffer + sizeof(::file_hdr_t), fileHeaderRef._queue_name_len);
+ return true;
+}
+
+void RecoveryManager::removeEmptyFiles(EmptyFilePool* emptyFilePoolPtr) {
+ while (fileNumberMap_.begin()->second->journalFilePtr_->getEnqueuedRecordCount() == 0 && fileNumberMap_.size() > 1) {
+ RecoveredFileData_t* rfdp = fileNumberMap_.begin()->second;
+ emptyFilePoolPtr->returnEmptyFileSymlink(rfdp->journalFilePtr_->getFqFileName());
+ delete rfdp->journalFilePtr_;
+ delete rfdp;
+ fileNumberMap_.erase(fileNumberMap_.begin()->first);
+ }
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.h b/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.h
new file mode 100644
index 0000000000..55cc6f8329
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.h
@@ -0,0 +1,157 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_RECOVERYSTATE_H_
+#define QPID_LINEARSTORE_JOURNAL_RECOVERYSTATE_H_
+
+#include <fstream>
+#include <map>
+#include "qpid/linearstore/journal/LinearFileController.h"
+#include <stdint.h>
+#include <vector>
+
+struct file_hdr_t;
+struct rec_hdr_t;
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+class data_tok;
+class enq_map;
+class EmptyFilePool;
+class EmptyFilePoolManager;
+class JournalLog;
+class jrec;
+class txn_map;
+
+struct RecoveredRecordData_t {
+ uint64_t recordId_;
+ uint64_t fileId_;
+ std::streampos fileOffset_;
+ bool pendingTransaction_;
+ RecoveredRecordData_t(const uint64_t rid, const uint64_t fid, const std::streampos foffs, bool ptxn);
+};
+
+struct RecoveredFileData_t {
+ JournalFile* journalFilePtr_;
+ uint32_t completedDblkCount_;
+ RecoveredFileData_t(JournalFile* journalFilePtr, const uint32_t completedDblkCount);
+};
+
+bool recordIdListCompare(RecoveredRecordData_t a, RecoveredRecordData_t b);
+
+class RecoveryManager
+{
+protected:
+ // Types
+ typedef std::vector<std::string> stringList_t;
+ typedef stringList_t::const_iterator stringListConstItr_t;
+ typedef std::map<uint64_t, RecoveredFileData_t*> fileNumberMap_t;
+ typedef fileNumberMap_t::iterator fileNumberMapItr_t;
+ typedef fileNumberMap_t::const_iterator fileNumberMapConstItr_t;
+ typedef std::vector<RecoveredRecordData_t> recordIdList_t;
+ typedef recordIdList_t::const_iterator recordIdListConstItr_t;
+
+ // Location and identity
+ const std::string journalDirectory_;
+ const std::string queueName_;
+ enq_map& enqueueMapRef_;
+ txn_map& transactionMapRef_;
+ JournalLog& journalLogRef_;
+
+ // Initial journal analysis data
+ fileNumberMap_t fileNumberMap_; ///< File number - JournalFilePtr map
+ stringList_t notNeededFilesList_; ///< Files not needed and to be returned to EFP
+ stringList_t uninitFileList_; ///< File name of uninitialized journal files found during header analysis
+ bool journalEmptyFlag_; ///< Journal data files empty
+ std::streamoff firstRecordOffset_; ///< First record offset in ffid
+ std::streamoff endOffset_; ///< End offset (first byte past last record)
+ uint64_t highestRecordId_; ///< Highest rid found
+ uint64_t highestFileNumber_; ///< Highest file number found
+ bool lastFileFullFlag_; ///< Last file is full
+ uint64_t initial_fid_; ///< File id where initial write after recovery will occur
+
+ // State for recovery of individual enqueued records
+ uint64_t currentSerial_;
+ uint32_t efpFileSize_kib_;
+ fileNumberMapConstItr_t currentJournalFileItr_;
+ std::string currentFileName_;
+ std::ifstream inFileStream_;
+ recordIdList_t recordIdList_;
+ recordIdListConstItr_t recordIdListConstItr_;
+
+public:
+ RecoveryManager(const std::string& journalDirectory,
+ const std::string& queuename,
+ enq_map& enqueueMapRef,
+ txn_map& transactionMapRef,
+ JournalLog& journalLogRef);
+ virtual ~RecoveryManager();
+
+ void analyzeJournals(const std::vector<std::string>* preparedTransactionListPtr,
+ EmptyFilePoolManager* emptyFilePoolManager,
+ EmptyFilePool** emptyFilePoolPtrPtr);
+ std::streamoff getEndOffset() const;
+ uint64_t getHighestFileNumber() const;
+ uint64_t getHighestRecordId() const;
+ bool isLastFileFull() const;
+ bool readNextRemainingRecord(void** const dataPtrPtr,
+ std::size_t& dataSize,
+ void** const xidPtrPtr,
+ std::size_t& xidSize,
+ bool& transient,
+ bool& external,
+ data_tok* const dtokp,
+ bool ignore_pending_txns);
+ void recoveryComplete();
+ void setLinearFileControllerJournals(lfcAddJournalFileFn fnPtr,
+ LinearFileController* lfcPtr);
+ std::string toString(const std::string& jid, const uint16_t indent) const;
+protected:
+ void analyzeJournalFileHeaders(efpIdentity_t& efpIdentity);
+ void checkFileStreamOk(bool checkEof);
+ void checkJournalAlignment(const uint64_t start_fid, const std::streampos recordPosition);
+ bool decodeRecord(jrec& record,
+ std::size_t& cumulativeSizeRead,
+ ::rec_hdr_t& recordHeader,
+ const uint64_t start_fid,
+ const std::streampos recordOffset);
+ std::string getCurrentFileName() const;
+ uint64_t getCurrentFileNumber() const;
+ bool getFile(const uint64_t fileNumber, bool jumpToFirstRecordOffsetFlag);
+ bool getNextFile(bool jumpToFirstRecordOffsetFlag);
+ bool getNextRecordHeader();
+ void lastRecord(const uint64_t file_id, const std::streamoff endOffset);
+ bool needNextFile();
+ void prepareRecordList();
+ bool readFileHeader();
+ void readJournalData(char* target, const std::streamsize size);
+ void removeEmptyFiles(EmptyFilePool* emptyFilePoolPtr);
+
+ static bool readJournalFileHeader(const std::string& journalFileName,
+ ::file_hdr_t& fileHeaderRef,
+ std::string& queueName);
+};
+
+}}}
+
+#endif // QPID_LINEARSTORE_JOURNAL_RECOVERYSTATE_H_
diff --git a/qpid/cpp/src/qpid/linearstore/journal/aio.h b/qpid/cpp/src/qpid/linearstore/journal/aio.h
new file mode 100644
index 0000000000..14589e7580
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/aio.h
@@ -0,0 +1,201 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_AIO_H
+#define QPID_LINEARSTORE_JOURNAL_AIO_H
+
+#include <libaio.h>
+#include <cstring>
+#include <stdint.h>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+typedef iocb aio_cb;
+typedef io_event aio_event;
+
+/**
+ * \brief This class is a C++ wrapper class for the libaio functions used by the journal. Note that only those
+ * functions used by the journal are included here. This is not a complete implementation of all libaio functions.
+ */
+class aio
+{
+public:
+ /*
+ * \brief Initialize an AIO context. Causes kernel resources to be initialized for
+ * AIO operations.
+ *
+ * \param maxevents The maximum number of events to be handled
+ * \param ctxp Pointer to context struct to be initialized
+ */
+ static inline int queue_init(int maxevents, io_context_t* ctxp)
+ {
+ return ::io_queue_init(maxevents, ctxp);
+ }
+
+ /*
+ * \brief Release an AIO context. Causes kernel resources previously initialized to
+ * be released.
+ *
+ * \param ctx AIO context struct to be released
+ */
+ static inline int queue_release(io_context_t ctx)
+ {
+ return ::io_queue_release(ctx);
+ }
+
+ /*
+ * \brief Submit asynchronous I/O blocks for processing
+ *
+ * The io_submit() system call queues nr I/O request blocks for processing in the AIO context ctx.
+ * The iocbpp argument should be an array of nr AIO control blocks, which will be submitted to context ctx.
+ *
+ * \param ctx AIO context
+ * \param nr Number of AIO operations
+ * \param aios Array of nr pointers to AIO control blocks, one for each AIO operation
+ * \return On success, io_submit() returns the number of iocbs submitted (which may be 0 if nr is zero).
+ * A negative number indicates an error:
+ * - -EAGAIN Insufficient resources are available to queue any iocbs.
+ * - -EBADF The file descriptor specified in the first iocb is invalid.
+ * - -EFAULT One of the data structures points to invalid data.
+ * - -EINVAL The AIO context specified by ctx_id is invalid. nr is less than 0. The iocb at *iocbpp[0]
+ * is not properly initialized, or the operation specified is invalid for the file descriptor
+ * in the iocb.
+ */
+ static inline int submit(io_context_t ctx, long nr, aio_cb* aios[])
+ {
+ return ::io_submit(ctx, nr, aios);
+ }
+
+ /*
+ * \brief Get list of completed AIO operations
+ *
+ * The io_getevents() system call attempts to read at least min_nr events and up to nr events from the
+ * completion queue of the AIO context specified by ctx_id. The timeout argument specifies the amount of time
+ * to wait for events, where a NULL timeout waits until at least min_nr events have been seen. Note that timeout
+ * is relative.
+ *
+ * \param ctx AIO context
+ * \param min_nr Minimum number of events to return, will wait until min_nr events are accumulated or until timeout
+ * \param nr Number of events to return
+ * \param events Pointer to array of aio_event structs, one for each completed event
+ * \param timeout Time to wait for min_nr events; 0 will cause an indefinite wait for min_nr events
+ * \return On success, number of events read: 0 if no events are available, or less than min_nr
+ * if the timeout has elapsed. A negative number indicates an error:
+ * - -EFAULT Either events or timeout is an invalid pointer.
+ * - -EINVAL ctx_id is invalid. min_nr is out of range or nr is out of range.
+ * - -EINTR Interrupted by a signal handler; see signal(7).
+ */
+ static inline int getevents(io_context_t ctx, long min_nr, long nr, aio_event* events, timespec* const timeout)
+ {
+ return ::io_getevents(ctx, min_nr, nr, events, timeout);
+ }
+
+ /**
+ * \brief This function allows iocbs to be initialized with a pointer that can be re-used. This prepares an
+ * aio_cb struct for read use. (This is a wrapper for libaio's ::io_prep_pread() function.)
+ *
+ * \param aiocbp Pointer to the aio_cb struct to be prepared.
+ * \param fd File descriptor to be used for read.
+ * \param buf Pointer to buffer in which read data is to be placed. MUST BE PAGE_ALIGNED.
+ * \param count Number of bytes to read - buffer must be large enough.
+ * \param offset Offset within file from which data will be read.
+ */
+ static inline void prep_pread(aio_cb* aiocbp, int fd, void* buf, std::size_t count, int64_t offset)
+ {
+ ::io_prep_pread(aiocbp, fd, buf, count, offset);
+ }
+
+ /**
+ * \brief Special version of libaio's io_prep_pread() which preserves the value of the data pointer. This allows
+ * iocbs to be initialized with a pointer that can be re-used. This prepares a aio_cb struct for read use.
+ *
+ * \param aiocbp Pointer to the aio_cb struct to be prepared.
+ * \param fd File descriptor to be used for read.
+ * \param buf Pointer to buffer in which read data is to be placed. MUST BE PAGE_ALIGNED.
+ * \param count Number of bytes to read - buffer must be large enough.
+ * \param offset Offset within file from which data will be read.
+ */
+ static inline void prep_pread_2(aio_cb* aiocbp, int fd, void* buf, std::size_t count, int64_t offset)
+ {
+ std::memset((void*) ((char*) aiocbp + sizeof(void*)), 0, sizeof(aio_cb) - sizeof(void*));
+ aiocbp->aio_fildes = fd;
+ aiocbp->aio_lio_opcode = IO_CMD_PREAD;
+ aiocbp->aio_reqprio = 0;
+ aiocbp->u.c.buf = buf;
+ aiocbp->u.c.nbytes = count;
+ aiocbp->u.c.offset = offset;
+ }
+
+ /**
+ * \brief This function allows iocbs to be initialized with a pointer that can be re-used. This function prepares
+ * an aio_cb struct for write use. (This is a wrapper for libaio's ::io_prep_pwrite() function.)
+ *
+ * \param aiocbp Pointer to the aio_cb struct to be prepared.
+ * \param fd File descriptor to be used for write.
+ * \param buf Pointer to buffer in which data to be written is located. MUST BE PAGE_ALIGNED.
+ * \param count Number of bytes to write.
+ * \param offset Offset within file to which data will be written.
+ */
+ static inline void prep_pwrite(aio_cb* aiocbp, int fd, void* buf, std::size_t count, int64_t offset)
+ {
+ ::io_prep_pwrite(aiocbp, fd, buf, count, offset);
+ }
+
+ /**
+ * \brief Special version of libaio's io_prep_pwrite() which preserves the value of the data pointer. This allows
+ * iocbs to be initialized with a pointer that can be re-used. This function prepares an aio_cb struct for write
+ * use.
+ *
+ * \param aiocbp Pointer to the aio_cb struct to be prepared.
+ * \param fd File descriptor to be used for write.
+ * \param buf Pointer to buffer in which data to be written is located. MUST BE PAGE_ALIGNED.
+ * \param count Number of bytes to write.
+ * \param offset Offset within file to which data will be written.
+ */
+ static inline void prep_pwrite_2(aio_cb* aiocbp, int fd, void* buf, std::size_t count, int64_t offset)
+ {
+ std::memset((void*) ((char*) aiocbp + sizeof(void*)), 0, sizeof(aio_cb) - sizeof(void*));
+ aiocbp->aio_fildes = fd;
+ aiocbp->aio_lio_opcode = IO_CMD_PWRITE;
+ aiocbp->aio_reqprio = 0;
+ aiocbp->u.c.buf = buf;
+ aiocbp->u.c.nbytes = count;
+ aiocbp->u.c.offset = offset;
+ }
+
+ /**
+ * \brief Function to check the alignment of memory.
+ *
+ * \param ptr Pointer to be checked
+ * \param byte_count Alignment count (or boundary)
+ * \returns true if ptr is aligned with byte_count, false otherwise
+ */
+ static inline bool is_aligned(const void* ptr, uint64_t byte_count)
+ {
+ return ((uintptr_t)(ptr)) % (byte_count) == 0;
+ }
+};
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_AIO_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/aio_callback.h b/qpid/cpp/src/qpid/linearstore/journal/aio_callback.h
new file mode 100644
index 0000000000..f21b62617b
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/aio_callback.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_AIO_CALLBACK_H
+#define QPID_LINEARSTORE_JOURNAL_AIO_CALLBACK_H
+
+#include <stdint.h>
+#include <vector>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+class data_tok;
+
+class aio_callback
+{
+public:
+ virtual ~aio_callback() {}
+ virtual void wr_aio_cb(std::vector<data_tok*>& dtokl) = 0;
+ virtual void rd_aio_cb(std::vector<uint16_t>& pil) = 0;
+};
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_AIO_CALLBACK_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/data_tok.cpp b/qpid/cpp/src/qpid/linearstore/journal/data_tok.cpp
new file mode 100644
index 0000000000..3952c403a1
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/data_tok.cpp
@@ -0,0 +1,136 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/data_tok.h"
+
+#include <iomanip>
+#include "qpid/linearstore/journal/slock.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+// Static members
+
+uint64_t data_tok::_cnt = 0;
+smutex data_tok::_mutex;
+
+data_tok::data_tok():
+ _wstate(NONE),
+ _dsize(0),
+ _dblks_written(0),
+ _pg_cnt(0),
+ _fid(0),
+ _rid(0),
+ _xid(),
+ _dequeue_rid(0),
+ _external_rid(false)
+{
+ slock s(_mutex);
+ _icnt = _cnt++;
+}
+
+data_tok::~data_tok() {}
+
+const char*
+data_tok::wstate_str() const
+{
+ return wstate_str(_wstate);
+}
+
+const char*
+data_tok::wstate_str(write_state wstate)
+{
+ switch (wstate)
+ {
+ case NONE:
+ return "NONE";
+ case ENQ_CACHED:
+ return "ENQ_CACHED";
+ case ENQ_PART:
+ return "ENQ_PART";
+ case ENQ_SUBM:
+ return "ENQ_SUBM";
+ case ENQ:
+ return "ENQ";
+ case DEQ_CACHED:
+ return "DEQ_CACHED";
+ case DEQ_PART:
+ return "DEQ_PART";
+ case DEQ_SUBM:
+ return "DEQ_SUBM";
+ case DEQ:
+ return "DEQ";
+ case ABORT_CACHED:
+ return "ABORT_CACHED";
+ case ABORT_PART:
+ return "ABORT_PART";
+ case ABORT_SUBM:
+ return "ABORT_SUBM";
+ case ABORTED:
+ return "ABORTED";
+ case COMMIT_CACHED:
+ return "COMMIT_CACHED";
+ case COMMIT_PART:
+ return "COMMIT_PART";
+ case COMMIT_SUBM:
+ return "COMMIT_SUBM";
+ case COMMITTED:
+ return "COMMITTED";
+ }
+ // Not using default: forces compiler to ensure all cases are covered.
+ return "<wstate unknown>";
+}
+
+void
+data_tok::reset()
+{
+ _wstate = NONE;
+ _dsize = 0;
+ _dblks_written = 0;
+ _pg_cnt = 0;
+ _fid = 0;
+ _rid = 0;
+ _xid.clear();
+}
+
+// debug aid
+std::string
+data_tok::status_str() const
+{
+ std::ostringstream oss;
+ oss << std::hex << std::setfill('0');
+ oss << "dtok id=0x" << _icnt << "; ws=" << wstate_str()/* << "; rs=" << rstate_str()*/;
+ oss << "; fid=0x" << _fid << "; rid=0x" << _rid << "; xid=";
+ for (unsigned i=0; i<_xid.size(); i++)
+ {
+ if (isprint(_xid[i]))
+ oss << _xid[i];
+ else
+ oss << "/" << std::setw(2) << (int)((char)_xid[i]);
+ }
+ oss << "; drid=0x" << _dequeue_rid << " extrid=" << (_external_rid?"T":"F");
+ oss << "; ds=0x" << _dsize << "; dw=0x" << _dblks_written/* << "; dr=0x" << _dblks_read*/;
+ oss << "; pc=0x" << _pg_cnt;
+ return oss.str();
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/data_tok.h b/qpid/cpp/src/qpid/linearstore/journal/data_tok.h
new file mode 100644
index 0000000000..67e0ec9683
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/data_tok.h
@@ -0,0 +1,133 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_DATA_TOK_H
+#define QPID_LINEARSTORE_JOURNAL_DATA_TOK_H
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+class data_tok;
+}}}
+
+#include <cassert>
+#include "qpid/linearstore/journal/smutex.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+ /**
+ * \class data_tok
+ * \brief Data block token (data_tok) used to track wstate of a data block through asynchronous
+ * I/O process
+ */
+ class data_tok
+ {
+ public:
+ // TODO: Fix this, separate write state from operation
+ // ie: wstate = NONE, CACHED, PART, SUBM, COMPL
+ // op = ENQUEUE, DEQUEUE, ABORT, COMMIT
+ enum write_state
+ {
+ NONE, ///< Data block not sent to journal
+ ENQ_CACHED, ///< Data block enqueue written to page cache
+ ENQ_PART, ///< Data block part-submitted to AIO, waiting for page buffer to free up
+ ENQ_SUBM, ///< Data block enqueue submitted to AIO
+ ENQ, ///< Data block enqueue AIO write complete (enqueue complete)
+ DEQ_CACHED, ///< Data block dequeue written to page cache
+ DEQ_PART, ///< Data block part-submitted to AIO, waiting for page buffer to free up
+ DEQ_SUBM, ///< Data block dequeue submitted to AIO
+ DEQ, ///< Data block dequeue AIO write complete (dequeue complete)
+ ABORT_CACHED,
+ ABORT_PART,
+ ABORT_SUBM,
+ ABORTED,
+ COMMIT_CACHED,
+ COMMIT_PART,
+ COMMIT_SUBM,
+ COMMITTED
+ };
+
+ protected:
+ static smutex _mutex;
+ static uint64_t _cnt;
+ uint64_t _icnt;
+ write_state _wstate; ///< Enqueued / dequeued state of data
+ std::size_t _dsize; ///< Data size in bytes
+ uint32_t _dblks_written; ///< Data blocks read/written
+ uint32_t _pg_cnt; ///< Page counter - incr for each page containing part of data
+ uint64_t _fid; ///< FID containing header of enqueue record
+ uint64_t _rid; ///< RID of data set by enqueue operation
+ std::string _xid; ///< XID set by enqueue operation
+ uint64_t _dequeue_rid; ///< RID of data set by dequeue operation
+ bool _external_rid; ///< Flag to indicate external setting of rid
+
+ public:
+ data_tok();
+ virtual ~data_tok();
+
+ inline uint64_t id() const { return _icnt; }
+ inline write_state wstate() const { return _wstate; }
+ const char* wstate_str() const;
+ static const char* wstate_str(write_state wstate);
+ inline bool is_writable() const { return _wstate == NONE || _wstate == ENQ_PART; }
+ inline bool is_enqueued() const { return _wstate == ENQ; }
+ inline bool is_readable() const { return _wstate == ENQ; }
+ inline bool is_dequeueable() const { return _wstate == ENQ || _wstate == DEQ_PART; }
+ inline void set_wstate(const write_state wstate) { _wstate = wstate; }
+ inline std::size_t dsize() const { return _dsize; }
+ inline void set_dsize(std::size_t dsize) { _dsize = dsize; }
+
+ inline uint32_t dblocks_written() const { return _dblks_written; }
+ inline void incr_dblocks_written(uint32_t dblks_written)
+ { _dblks_written += dblks_written; }
+ inline void set_dblocks_written(uint32_t dblks_written) { _dblks_written = dblks_written; }
+
+ inline uint32_t pg_cnt() const { return _pg_cnt; }
+ inline uint32_t incr_pg_cnt() { return ++_pg_cnt; }
+ inline uint32_t decr_pg_cnt() { assert(_pg_cnt != 0); return --_pg_cnt; }
+
+ inline uint64_t fid() const { return _fid; }
+ inline void set_fid(const uint64_t fid) { _fid = fid; }
+ inline uint64_t rid() const { return _rid; }
+ inline void set_rid(const uint64_t rid) { _rid = rid; }
+ inline uint64_t dequeue_rid() const {return _dequeue_rid; }
+ inline void set_dequeue_rid(const uint64_t rid) { _dequeue_rid = rid; }
+ inline bool external_rid() const { return _external_rid; }
+ inline void set_external_rid(const bool external_rid) { _external_rid = external_rid; }
+
+ inline bool has_xid() const { return !_xid.empty(); }
+ inline const std::string& xid() const { return _xid; }
+ inline void clear_xid() { _xid.clear(); }
+ inline void set_xid(const std::string& xid) { _xid.assign(xid); }
+ inline void set_xid(const void* xidp, const std::size_t xid_len)
+ { _xid.assign((const char*)xidp, xid_len); }
+
+ void reset();
+
+ // debug aid
+ std::string status_str() const;
+ };
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_DATA_TOK_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/deq_rec.cpp b/qpid/cpp/src/qpid/linearstore/journal/deq_rec.cpp
new file mode 100644
index 0000000000..90ca27d082
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/deq_rec.cpp
@@ -0,0 +1,313 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/deq_rec.h"
+
+#include <cassert>
+#include <cstring>
+#include "qpid/linearstore/journal/Checksum.h"
+#include "qpid/linearstore/journal/jexception.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+deq_rec::deq_rec():
+ _xidp(0),
+ _xid_buff(0)
+{
+ ::deq_hdr_init(&_deq_hdr, QLS_DEQ_MAGIC, QLS_JRNL_VERSION, 0, 0, 0, 0, 0);
+ ::rec_tail_copy(&_deq_tail, &_deq_hdr._rhdr, 0);
+}
+
+deq_rec::~deq_rec()
+{
+ clean();
+}
+
+void
+deq_rec::reset(const uint64_t serial, const uint64_t rid, const uint64_t drid, const void* const xidp,
+ const std::size_t xidlen, const bool txn_coml_commit)
+{
+ _deq_hdr._rhdr._serial = serial;
+ _deq_hdr._rhdr._rid = rid;
+ ::set_txn_coml_commit(&_deq_hdr, txn_coml_commit);
+ _deq_hdr._deq_rid = drid;
+ _deq_hdr._xidsize = xidlen;
+ _xidp = xidp;
+ _xid_buff = 0;
+ _deq_tail._serial = serial;
+ _deq_tail._rid = rid;
+ _deq_tail._checksum = 0UL;
+}
+
+uint32_t
+deq_rec::encode(void* wptr, uint32_t rec_offs_dblks, uint32_t max_size_dblks, Checksum& checksum)
+{
+ assert(wptr != 0);
+ assert(max_size_dblks > 0);
+ if (_xidp == 0)
+ assert(_deq_hdr._xidsize == 0);
+
+ std::size_t rec_offs = rec_offs_dblks * QLS_DBLK_SIZE_BYTES;
+ std::size_t rem = max_size_dblks * QLS_DBLK_SIZE_BYTES;
+ std::size_t wr_cnt = 0;
+
+ if (rec_offs_dblks) // Continuation of split dequeue record (over 2 or more pages)
+ {
+ if (size_dblks(rec_size()) - rec_offs_dblks > max_size_dblks) // Further split required
+ {
+ rec_offs -= sizeof(_deq_hdr);
+ std::size_t wsize = _deq_hdr._xidsize > rec_offs ? _deq_hdr._xidsize - rec_offs : 0;
+ std::size_t wsize2 = wsize;
+ if (wsize)
+ {
+ if (wsize > rem)
+ wsize = rem;
+ std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize);
+ wr_cnt += wsize;
+ rem -= wsize;
+ }
+ rec_offs -= _deq_hdr._xidsize - wsize2;
+ checksum.addData((unsigned char*)wptr, wr_cnt);
+ if (rem)
+ {
+ _deq_tail._checksum = checksum.getChecksum();
+ wsize = sizeof(_deq_tail) > rec_offs ? sizeof(_deq_tail) - rec_offs : 0;
+ wsize2 = wsize;
+ if (wsize)
+ {
+ if (wsize > rem)
+ wsize = rem;
+ std::memcpy((char*)wptr + wr_cnt, (char*)&_deq_tail + rec_offs, wsize);
+ wr_cnt += wsize;
+ rem -= wsize;
+ }
+ rec_offs -= sizeof(_deq_tail) - wsize2;
+ }
+ assert(rem == 0);
+ assert(rec_offs == 0);
+ }
+ else // No further split required
+ {
+ rec_offs -= sizeof(_deq_hdr);
+ std::size_t wsize = _deq_hdr._xidsize > rec_offs ? _deq_hdr._xidsize - rec_offs : 0;
+ if (wsize)
+ {
+ std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize);
+ wr_cnt += wsize;
+ checksum.addData((unsigned char*)wptr, wr_cnt);
+ }
+ rec_offs -= _deq_hdr._xidsize - wsize;
+ _deq_tail._checksum = checksum.getChecksum();
+ wsize = sizeof(_deq_tail) > rec_offs ? sizeof(_deq_tail) - rec_offs : 0;
+ if (wsize)
+ {
+ std::memcpy((char*)wptr + wr_cnt, (char*)&_deq_tail + rec_offs, wsize);
+ wr_cnt += wsize;
+#ifdef QLS_CLEAN
+ std::size_t rec_offs = rec_offs_dblks * QLS_DBLK_SIZE_BYTES;
+ std::size_t dblk_rec_size = size_dblks(rec_size() - rec_offs) * QLS_DBLK_SIZE_BYTES;
+ std::memset((char*)wptr + wr_cnt, QLS_CLEAN_CHAR, dblk_rec_size - wr_cnt);
+#endif
+ }
+ rec_offs -= sizeof(_deq_tail) - wsize;
+ assert(rec_offs == 0);
+ }
+ }
+ else // Start at beginning of data record
+ {
+ // Assumption: the header will always fit into the first dblk
+ std::memcpy(wptr, (void*)&_deq_hdr, sizeof(_deq_hdr));
+ wr_cnt = sizeof(_deq_hdr);
+ if (size_dblks(rec_size()) > max_size_dblks) // Split required - can only occur with xid
+ {
+ std::size_t wsize;
+ rem -= sizeof(_deq_hdr);
+ if (rem)
+ {
+ wsize = rem >= _deq_hdr._xidsize ? _deq_hdr._xidsize : rem;
+ std::memcpy((char*)wptr + wr_cnt, _xidp, wsize);
+ wr_cnt += wsize;
+ rem -= wsize;
+ }
+ checksum.addData((unsigned char*)wptr, wr_cnt);
+ if (rem)
+ {
+ _deq_tail._checksum = checksum.getChecksum();
+ wsize = rem >= sizeof(_deq_tail) ? sizeof(_deq_tail) : rem;
+ std::memcpy((char*)wptr + wr_cnt, (void*)&_deq_tail, wsize);
+ wr_cnt += wsize;
+ rem -= wsize;
+ }
+ assert(rem == 0);
+ }
+ else // No split required
+ {
+ if (_deq_hdr._xidsize)
+ {
+ std::memcpy((char*)wptr + wr_cnt, _xidp, _deq_hdr._xidsize);
+ wr_cnt += _deq_hdr._xidsize;
+ checksum.addData((unsigned char*)wptr, wr_cnt);
+ _deq_tail._checksum = checksum.getChecksum();
+ std::memcpy((char*)wptr + wr_cnt, (void*)&_deq_tail, sizeof(_deq_tail));
+ wr_cnt += sizeof(_deq_tail);
+ }
+#ifdef QLS_CLEAN
+ std::size_t dblk_rec_size = size_dblks(rec_size()) * QLS_DBLK_SIZE_BYTES;
+ std::memset((char*)wptr + wr_cnt, QLS_CLEAN_CHAR, dblk_rec_size - wr_cnt);
+#endif
+ }
+ }
+ return size_dblks(wr_cnt);
+}
+
+bool
+deq_rec::decode(::rec_hdr_t& h, std::ifstream* ifsp, std::size_t& rec_offs, const std::streampos rec_start)
+{
+ if (rec_offs == 0)
+ {
+ ::rec_hdr_copy(&_deq_hdr._rhdr, &h);
+ ifsp->read((char*)&_deq_hdr._deq_rid, sizeof(_deq_hdr._deq_rid));
+ ifsp->read((char*)&_deq_hdr._xidsize, sizeof(_deq_hdr._xidsize));
+ rec_offs = sizeof(::deq_hdr_t);
+ // Read header, allocate (if req'd) for xid
+ if (_deq_hdr._xidsize)
+ {
+ _xid_buff = std::malloc(_deq_hdr._xidsize);
+ MALLOC_CHK(_xid_buff, "_buff", "enq_rec", "rcv_decode");
+ }
+ }
+ if (rec_offs < sizeof(_deq_hdr) + _deq_hdr._xidsize)
+ {
+ // Read xid (or continue reading xid)
+ std::size_t offs = rec_offs - sizeof(_deq_hdr);
+ ifsp->read((char*)_xid_buff + offs, _deq_hdr._xidsize - offs);
+ std::size_t size_read = ifsp->gcount();
+ rec_offs += size_read;
+ if (size_read < _deq_hdr._xidsize - offs)
+ {
+ assert(ifsp->eof());
+ // As we may have read past eof, turn off fail bit
+ ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit));
+ assert(!ifsp->fail() && !ifsp->bad());
+ return false;
+ }
+ }
+ if (rec_offs < sizeof(_deq_hdr) +
+ (_deq_hdr._xidsize ? _deq_hdr._xidsize + sizeof(rec_tail_t) : 0))
+ {
+ // Read tail (or continue reading tail)
+ std::size_t offs = rec_offs - sizeof(_deq_hdr) - _deq_hdr._xidsize;
+ ifsp->read((char*)&_deq_tail + offs, sizeof(rec_tail_t) - offs);
+ std::size_t size_read = ifsp->gcount();
+ rec_offs += size_read;
+ if (size_read < sizeof(rec_tail_t) - offs)
+ {
+ assert(ifsp->eof());
+ // As we may have read past eof, turn off fail bit
+ ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit));
+ assert(!ifsp->fail() && !ifsp->bad());
+ return false;
+ }
+ check_rec_tail(rec_start);
+ }
+ ifsp->ignore(rec_size_dblks() * QLS_DBLK_SIZE_BYTES - rec_size());
+ assert(!ifsp->fail() && !ifsp->bad());
+ return true;
+}
+
+std::size_t
+deq_rec::get_xid(void** const xidpp)
+{
+ if (!_xid_buff)
+ {
+ *xidpp = 0;
+ return 0;
+ }
+ *xidpp = _xid_buff;
+ return _deq_hdr._xidsize;
+}
+
+std::string&
+deq_rec::str(std::string& str) const
+{
+ std::ostringstream oss;
+ oss << "deq_rec: m=" << _deq_hdr._rhdr._magic;
+ oss << " v=" << (int)_deq_hdr._rhdr._version;
+ oss << " rid=" << _deq_hdr._rhdr._rid;
+ oss << " drid=" << _deq_hdr._deq_rid;
+ if (_xidp)
+ oss << " xid=\"" << _xidp << "\"";
+ str.append(oss.str());
+ return str;
+}
+
+std::size_t
+deq_rec::xid_size() const
+{
+ return _deq_hdr._xidsize;
+}
+
+std::size_t
+deq_rec::rec_size() const
+{
+ return sizeof(deq_hdr_t) + (_deq_hdr._xidsize ? _deq_hdr._xidsize + sizeof(rec_tail_t) : 0);
+}
+
+void
+deq_rec::check_rec_tail(const std::streampos rec_start) const {
+ Checksum checksum;
+ checksum.addData((const unsigned char*)&_deq_hdr, sizeof(::deq_hdr_t));
+ if (_deq_hdr._xidsize > 0) {
+ checksum.addData((const unsigned char*)_xid_buff, _deq_hdr._xidsize);
+ }
+ uint32_t cs = checksum.getChecksum();
+ uint16_t res = ::rec_tail_check(&_deq_tail, &_deq_hdr._rhdr, cs);
+ if (res != 0) {
+ std::stringstream oss;
+ oss << std::endl << " Record offset: 0x" << std::hex << rec_start;
+ if (res & ::REC_TAIL_MAGIC_ERR_MASK) {
+ oss << std::endl << " Magic: expected 0x" << ~_deq_hdr._rhdr._magic << "; found 0x" << _deq_tail._xmagic;
+ }
+ if (res & ::REC_TAIL_SERIAL_ERR_MASK) {
+ oss << std::endl << " Serial: expected 0x" << _deq_hdr._rhdr._serial << "; found 0x" << _deq_tail._serial;
+ }
+ if (res & ::REC_TAIL_RID_ERR_MASK) {
+ oss << std::endl << " Record Id: expected 0x" << _deq_hdr._rhdr._rid << "; found 0x" << _deq_tail._rid;
+ }
+ if (res & ::REC_TAIL_CHECKSUM_ERR_MASK) {
+ oss << std::endl << " Checksum: expected 0x" << cs << "; found 0x" << _deq_tail._checksum;
+ }
+ throw jexception(jerrno::JERR_JREC_BADRECTAIL, oss.str(), "deq_rec", "check_rec_tail");
+ }
+}
+
+void
+deq_rec::clean()
+{
+ if (_xid_buff) {
+ std::free(_xid_buff);
+ _xid_buff = 0;
+ }
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/deq_rec.h b/qpid/cpp/src/qpid/linearstore/journal/deq_rec.h
new file mode 100644
index 0000000000..9f55032e76
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/deq_rec.h
@@ -0,0 +1,70 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_DEQ_REQ_H
+#define QPID_LINEARSTORE_JOURNAL_DEQ_REQ_H
+
+#include "qpid/linearstore/journal/jrec.h"
+#include "qpid/linearstore/journal/utils/deq_hdr.h"
+#include "qpid/linearstore/journal/utils/rec_tail.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+/**
+* \class deq_rec
+* \brief Class to handle a single journal dequeue record.
+*/
+class deq_rec : public jrec
+{
+private:
+ ::deq_hdr_t _deq_hdr; ///< Local instance of dequeue header struct
+ const void* _xidp; ///< xid pointer for encoding (writing to disk)
+ void* _xid_buff; ///< Pointer to buffer to receive xid read from disk
+ ::rec_tail_t _deq_tail; ///< Local instance of enqueue tail struct, only encoded if XID is present
+
+public:
+ deq_rec();
+ virtual ~deq_rec();
+
+ void reset(const uint64_t serial, const uint64_t rid, const uint64_t drid, const void* const xidp,
+ const std::size_t xidlen, const bool txn_coml_commit);
+ uint32_t encode(void* wptr, uint32_t rec_offs_dblks, uint32_t max_size_dblks, Checksum& checksum);
+ bool decode(::rec_hdr_t& h, std::ifstream* ifsp, std::size_t& rec_offs, const std::streampos rec_start);
+
+ inline bool is_txn_coml_commit() const { return ::is_txn_coml_commit(&_deq_hdr); }
+ inline uint64_t rid() const { return _deq_hdr._rhdr._rid; }
+ inline uint64_t deq_rid() const { return _deq_hdr._deq_rid; }
+ std::size_t get_xid(void** const xidpp);
+ std::string& str(std::string& str) const;
+ inline std::size_t data_size() const { return 0; } // This record never carries data
+ std::size_t xid_size() const;
+ std::size_t rec_size() const;
+ void check_rec_tail(const std::streampos rec_start) const;
+
+private:
+ virtual void clean();
+};
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_DEQ_REQ_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/enq_map.cpp b/qpid/cpp/src/qpid/linearstore/journal/enq_map.cpp
new file mode 100644
index 0000000000..4eaaa64992
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/enq_map.cpp
@@ -0,0 +1,181 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/enq_map.h"
+
+#include "qpid/linearstore/journal/slock.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+// static return/error codes
+int16_t enq_map::EMAP_DUP_RID = -3;
+int16_t enq_map::EMAP_LOCKED = -2;
+int16_t enq_map::EMAP_RID_NOT_FOUND = -1;
+int16_t enq_map::EMAP_OK = 0;
+int16_t enq_map::EMAP_FALSE = 0;
+int16_t enq_map::EMAP_TRUE = 1;
+
+enq_map::enq_map():
+ _map(){}
+
+enq_map::~enq_map() {}
+
+
+short
+enq_map::insert_pfid(const uint64_t rid, const uint64_t pfid, const std::streampos file_posn)
+{
+ return insert_pfid(rid, pfid, file_posn, false);
+}
+
+short
+enq_map::insert_pfid(const uint64_t rid, const uint64_t pfid, const std::streampos file_posn, const bool locked)
+{
+ std::pair<emap_itr, bool> ret;
+ emap_data_struct_t rec(pfid, file_posn, locked);
+ {
+ slock s(_mutex);
+ ret = _map.insert(emap_param(rid, rec));
+ }
+ if (ret.second == false)
+ return EMAP_DUP_RID;
+ return EMAP_OK;
+}
+
+short
+enq_map::get_pfid(const uint64_t rid, uint64_t& pfid)
+{
+ slock s(_mutex);
+ emap_itr itr = _map.find(rid);
+ if (itr == _map.end()) // not found in map
+ return EMAP_RID_NOT_FOUND;
+ if (itr->second._lock)
+ return EMAP_LOCKED;
+ pfid = itr->second._pfid;
+ return EMAP_OK;
+}
+
+short
+enq_map::get_remove_pfid(const uint64_t rid, uint64_t& pfid, const bool txn_flag)
+{
+ slock s(_mutex);
+ emap_itr itr = _map.find(rid);
+ if (itr == _map.end()) // not found in map
+ return EMAP_RID_NOT_FOUND;
+ if (itr->second._lock && !txn_flag) // locked, but not a commit/abort
+ return EMAP_LOCKED;
+ pfid = itr->second._pfid;
+ _map.erase(itr);
+ return EMAP_OK;
+}
+
+short
+enq_map::get_file_posn(const uint64_t rid, std::streampos& file_posn) {
+ slock s(_mutex);
+ emap_itr itr = _map.find(rid);
+ if (itr == _map.end()) // not found in map
+ return EMAP_RID_NOT_FOUND;
+ if (itr->second._lock)
+ return EMAP_LOCKED;
+ file_posn = itr->second._file_posn;
+ return EMAP_OK;
+}
+
+short
+enq_map::get_data(const uint64_t rid, emap_data_struct_t& eds) {
+ slock s(_mutex);
+ emap_itr itr = _map.find(rid);
+ if (itr == _map.end()) // not found in map
+ return EMAP_RID_NOT_FOUND;
+ eds._pfid = itr->second._pfid;
+ eds._file_posn = itr->second._file_posn;
+ eds._lock = itr->second._lock;
+ return EMAP_OK;
+}
+
+bool
+enq_map::is_enqueued(const uint64_t rid, bool ignore_lock)
+{
+ slock s(_mutex);
+ emap_itr itr = _map.find(rid);
+ if (itr == _map.end()) // not found in map
+ return false;
+ if (!ignore_lock && itr->second._lock) // locked
+ return false;
+ return true;
+}
+
+short
+enq_map::lock(const uint64_t rid)
+{
+ slock s(_mutex);
+ emap_itr itr = _map.find(rid);
+ if (itr == _map.end()) // not found in map
+ return EMAP_RID_NOT_FOUND;
+ itr->second._lock = true;
+ return EMAP_OK;
+}
+
+short
+enq_map::unlock(const uint64_t rid)
+{
+ slock s(_mutex);
+ emap_itr itr = _map.find(rid);
+ if (itr == _map.end()) // not found in map
+ return EMAP_RID_NOT_FOUND;
+ itr->second._lock = false;
+ return EMAP_OK;
+}
+
+short
+enq_map::is_locked(const uint64_t rid)
+{
+ slock s(_mutex);
+ emap_itr itr = _map.find(rid);
+ if (itr == _map.end()) // not found in map
+ return EMAP_RID_NOT_FOUND;
+ return itr->second._lock ? EMAP_TRUE : EMAP_FALSE;
+}
+
+void
+enq_map::rid_list(std::vector<uint64_t>& rv)
+{
+ rv.clear();
+ {
+ slock s(_mutex);
+ for (emap_itr itr = _map.begin(); itr != _map.end(); itr++)
+ rv.push_back(itr->first);
+ }
+}
+
+void
+enq_map::pfid_list(std::vector<uint64_t>& fv)
+{
+ fv.clear();
+ {
+ slock s(_mutex);
+ for (emap_itr itr = _map.begin(); itr != _map.end(); itr++)
+ fv.push_back(itr->second._pfid);
+ }
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/enq_map.h b/qpid/cpp/src/qpid/linearstore/journal/enq_map.h
new file mode 100644
index 0000000000..912a583ab9
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/enq_map.h
@@ -0,0 +1,101 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_ENQ_MAP_H
+#define QPID_LINEARSTORE_JOURNAL_ENQ_MAP_H
+
+#include "qpid/linearstore/journal/smutex.h"
+#include <vector>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+/**
+* \class enq_map
+* \brief Class for storing the physical file id (pfid) and a transaction locked flag for each enqueued
+* data block using the record id (rid) as a key. This is the primary mechanism for
+* deterimining the enqueue low water mark: if a pfid exists in this map, then there is
+* at least one still-enqueued record in that file. (The transaction map must also be
+* clear, however.)
+*
+* Map rids against pfid and lock status. As records are enqueued, they are added to this
+* map, and as they are dequeued, they are removed. An enqueue is locked when a transactional
+* dequeue is pending that has been neither committed nor aborted.
+* <pre>
+* key data
+*
+* rid1 --- [ pfid, txn_lock ]
+* rid2 --- [ pfid, txn_lock ]
+* rid3 --- [ pfid, txn_lock ]
+* ...
+* </pre>
+*/
+class enq_map
+{
+public:
+ // return/error codes
+ static short EMAP_DUP_RID;
+ static short EMAP_LOCKED;
+ static short EMAP_RID_NOT_FOUND;
+ static short EMAP_OK;
+ static short EMAP_FALSE;
+ static short EMAP_TRUE;
+
+ typedef struct emap_data_struct_t {
+ uint64_t _pfid;
+ std::streampos _file_posn;
+ bool _lock;
+ emap_data_struct_t() : _pfid(0), _file_posn(0), _lock(false) {}
+ emap_data_struct_t(const uint64_t pfid, const std::streampos file_posn, const bool lock) : _pfid(pfid), _file_posn(file_posn), _lock(lock) {}
+ } emqp_data_struct_t;
+ typedef std::pair<uint64_t, emap_data_struct_t> emap_param;
+ typedef std::map<uint64_t, emap_data_struct_t> emap;
+ typedef emap::iterator emap_itr;
+
+private:
+ emap _map;
+ smutex _mutex;
+
+public:
+ enq_map();
+ virtual ~enq_map();
+
+ short insert_pfid(const uint64_t rid, const uint64_t pfid, const std::streampos file_posn); // 0=ok; -3=duplicate rid;
+ short insert_pfid(const uint64_t rid, const uint64_t pfid, const std::streampos file_posn, const bool locked); // 0=ok; -3=duplicate rid;
+ short get_pfid(const uint64_t rid, uint64_t& pfid); // >=0=pfid; -1=rid not found; -2=locked
+ short get_remove_pfid(const uint64_t rid, uint64_t& pfid, const bool txn_flag = false); // >=0=pfid; -1=rid not found; -2=locked
+ short get_file_posn(const uint64_t rid, std::streampos& file_posn); // -1=rid not found; -2=locked
+ short get_data(const uint64_t rid, emap_data_struct_t& eds);
+ bool is_enqueued(const uint64_t rid, bool ignore_lock = false);
+ short lock(const uint64_t rid); // 0=ok; -1=rid not found
+ short unlock(const uint64_t rid); // 0=ok; -1=rid not found
+ short is_locked(const uint64_t rid); // 1=true; 0=false; -1=rid not found
+ inline void clear() { _map.clear(); }
+ inline bool empty() const { return _map.empty(); }
+ inline uint32_t size() const { return uint32_t(_map.size()); }
+ void rid_list(std::vector<uint64_t>& rv);
+ void pfid_list(std::vector<uint64_t>& fv);
+};
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_ENQ_MAP_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/enq_rec.cpp b/qpid/cpp/src/qpid/linearstore/journal/enq_rec.cpp
new file mode 100644
index 0000000000..0fecd90cbf
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/enq_rec.cpp
@@ -0,0 +1,397 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/enq_rec.h"
+
+#include <cassert>
+#include <cstring>
+#include "qpid/linearstore/journal/Checksum.h"
+#include "qpid/linearstore/journal/jexception.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+enq_rec::enq_rec():
+ jrec(), // superclass
+ _xidp(0),
+ _data(0),
+ _xid_buff(0),
+ _data_buff(0)
+{
+ ::enq_hdr_init(&_enq_hdr, QLS_ENQ_MAGIC, QLS_JRNL_VERSION, 0, 0, 0, 0, false);
+ ::rec_tail_copy(&_enq_tail, &_enq_hdr._rhdr, 0);
+}
+
+enq_rec::~enq_rec()
+{
+ clean();
+}
+
+void
+enq_rec::reset(const uint64_t serial, const uint64_t rid, const void* const dbuf, const std::size_t dlen,
+ const void* const xidp, const std::size_t xidlen, const bool transient, const bool external)
+{
+ _enq_hdr._rhdr._serial = serial;
+ _enq_hdr._rhdr._rid = rid;
+ ::set_enq_transient(&_enq_hdr, transient);
+ ::set_enq_external(&_enq_hdr, external);
+ _enq_hdr._xidsize = xidlen;
+ _enq_hdr._dsize = dlen;
+ _xidp = xidp;
+ _data = dbuf;
+ _enq_tail._serial = serial;
+ _enq_tail._rid = rid;
+}
+
+uint32_t
+enq_rec::encode(void* wptr, uint32_t rec_offs_dblks, uint32_t max_size_dblks, Checksum& checksum)
+{
+ assert(wptr != 0);
+ assert(max_size_dblks > 0);
+ if (_xidp == 0)
+ assert(_enq_hdr._xidsize == 0);
+
+ std::size_t rec_offs = rec_offs_dblks * QLS_DBLK_SIZE_BYTES;
+ std::size_t rem = max_size_dblks * QLS_DBLK_SIZE_BYTES;
+ std::size_t wr_cnt = 0;
+ if (rec_offs_dblks) // Continuation of split data record (over 2 or more pages)
+ {
+ if (size_dblks(rec_size()) - rec_offs_dblks > max_size_dblks) // Further split required
+ {
+ rec_offs -= sizeof(_enq_hdr);
+ std::size_t wsize = _enq_hdr._xidsize > rec_offs ? _enq_hdr._xidsize - rec_offs : 0;
+ std::size_t wsize2 = wsize;
+ if (wsize)
+ {
+ if (wsize > rem)
+ wsize = rem;
+ std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize);
+ wr_cnt = wsize;
+ rem -= wsize;
+ }
+ rec_offs -= _enq_hdr._xidsize - wsize2;
+ if (rem && !::is_enq_external(&_enq_hdr))
+ {
+ wsize = _enq_hdr._dsize > rec_offs ? _enq_hdr._dsize - rec_offs : 0;
+ wsize2 = wsize;
+ if (wsize)
+ {
+ if (wsize > rem)
+ wsize = rem;
+ std::memcpy((char*)wptr + wr_cnt, (const char*)_data + rec_offs, wsize);
+ wr_cnt += wsize;
+ rem -= wsize;
+ }
+ rec_offs -= _enq_hdr._dsize - wsize2;
+ }
+ checksum.addData((unsigned char*)wptr, wr_cnt);
+ if (rem)
+ {
+ _enq_tail._checksum = checksum.getChecksum();
+ wsize = sizeof(_enq_tail) > rec_offs ? sizeof(_enq_tail) - rec_offs : 0;
+ wsize2 = wsize;
+ if (wsize)
+ {
+ if (wsize > rem)
+ wsize = rem;
+ std::memcpy((char*)wptr + wr_cnt, (char*)&_enq_tail + rec_offs, wsize);
+ wr_cnt += wsize;
+ rem -= wsize;
+ }
+ rec_offs -= sizeof(_enq_tail) - wsize2;
+ }
+ assert(rem == 0);
+ assert(rec_offs == 0);
+ }
+ else // No further split required
+ {
+ rec_offs -= sizeof(_enq_hdr);
+ std::size_t xid_wsize = _enq_hdr._xidsize > rec_offs ? _enq_hdr._xidsize - rec_offs : 0;
+ if (xid_wsize)
+ {
+ std::memcpy(wptr, (const char*)_xidp + rec_offs, xid_wsize);
+ wr_cnt += xid_wsize;
+ }
+ rec_offs -= _enq_hdr._xidsize - xid_wsize;
+ std::size_t data_wsize = _enq_hdr._dsize > rec_offs ? _enq_hdr._dsize - rec_offs : 0;
+ if (data_wsize && !::is_enq_external(&_enq_hdr))
+ {
+ std::memcpy((char*)wptr + wr_cnt, (const char*)_data + rec_offs, data_wsize);
+ wr_cnt += data_wsize;
+ }
+ rec_offs -= _enq_hdr._dsize - data_wsize;
+ if (xid_wsize || data_wsize) {
+ checksum.addData((unsigned char*)wptr, wr_cnt);
+ }
+ _enq_tail._checksum = checksum.getChecksum();
+ std::size_t wsize = sizeof(_enq_tail) > rec_offs ? sizeof(_enq_tail) - rec_offs : 0;
+ if (wsize)
+ {
+ std::memcpy((char*)wptr + wr_cnt, (char*)&_enq_tail + rec_offs, wsize);
+ wr_cnt += wsize;
+#ifdef QLS_CLEAN
+ std::size_t rec_offs = rec_offs_dblks * QLS_DBLK_SIZE_BYTES;
+ std::size_t dblk_rec_size = size_dblks(rec_size() - rec_offs) * QLS_DBLK_SIZE_BYTES;
+ std::memset((char*)wptr + wr_cnt, QLS_CLEAN_CHAR, dblk_rec_size - wr_cnt);
+#endif
+ }
+ rec_offs -= sizeof(_enq_tail) - wsize;
+ assert(rec_offs == 0);
+ }
+ }
+ else // Start at beginning of data record
+ {
+ // Assumption: the header will always fit into the first dblk
+ std::memcpy(wptr, (void*)&_enq_hdr, sizeof(_enq_hdr));
+ wr_cnt = sizeof(_enq_hdr);
+ if (size_dblks(rec_size()) > max_size_dblks) // Split required
+ {
+ std::size_t wsize;
+ rem -= sizeof(_enq_hdr);
+ if (rem)
+ {
+ wsize = rem >= _enq_hdr._xidsize ? _enq_hdr._xidsize : rem;
+ std::memcpy((char*)wptr + wr_cnt, _xidp, wsize);
+ wr_cnt += wsize;
+ rem -= wsize;
+ }
+ if (rem && !::is_enq_external(&_enq_hdr))
+ {
+ wsize = rem >= _enq_hdr._dsize ? _enq_hdr._dsize : rem;
+ std::memcpy((char*)wptr + wr_cnt, _data, wsize);
+ wr_cnt += wsize;
+ rem -= wsize;
+ }
+ checksum.addData((unsigned char*)wptr, wr_cnt);
+ if (rem)
+ {
+ _enq_tail._checksum = checksum.getChecksum();
+ wsize = rem >= sizeof(_enq_tail) ? sizeof(_enq_tail) : rem;
+ std::memcpy((char*)wptr + wr_cnt, (void*)&_enq_tail, wsize);
+ wr_cnt += wsize;
+ rem -= wsize;
+ }
+ assert(rem == 0);
+ }
+ else // No split required
+ {
+ if (_enq_hdr._xidsize)
+ {
+ std::memcpy((char*)wptr + wr_cnt, _xidp, _enq_hdr._xidsize);
+ wr_cnt += _enq_hdr._xidsize;
+ }
+ if (!::is_enq_external(&_enq_hdr))
+ {
+ std::memcpy((char*)wptr + wr_cnt, _data, _enq_hdr._dsize);
+ wr_cnt += _enq_hdr._dsize;
+ }
+ checksum.addData((unsigned char*)wptr, wr_cnt);
+ _enq_tail._checksum = checksum.getChecksum();
+ std::memcpy((char*)wptr + wr_cnt, (void*)&_enq_tail, sizeof(_enq_tail));
+ wr_cnt += sizeof(_enq_tail);
+#ifdef QLS_CLEAN
+ std::size_t dblk_rec_size = size_dblks(rec_size()) * QLS_DBLK_SIZE_BYTES;
+ std::memset((char*)wptr + wr_cnt, QLS_CLEAN_CHAR, dblk_rec_size - wr_cnt);
+#endif
+ }
+ }
+ return size_dblks(wr_cnt);
+}
+
+bool
+enq_rec::decode(::rec_hdr_t& h, std::ifstream* ifsp, std::size_t& rec_offs, const std::streampos rec_start)
+{
+ if (rec_offs == 0)
+ {
+ // Read header, allocate (if req'd) for xid
+ ::rec_hdr_copy(&_enq_hdr._rhdr, &h);
+ ifsp->read((char*)&_enq_hdr._xidsize, sizeof(_enq_hdr._xidsize));
+ ifsp->read((char*)&_enq_hdr._dsize, sizeof(_enq_hdr._dsize));
+ rec_offs = sizeof(::enq_hdr_t);
+ if (_enq_hdr._xidsize > 0)
+ {
+ _xid_buff = std::malloc(_enq_hdr._xidsize);
+ MALLOC_CHK(_xid_buff, "_xid_buff", "enq_rec", "decode");
+ }
+ if (_enq_hdr._dsize > 0)
+ {
+ _data_buff = std::malloc(_enq_hdr._dsize);
+ MALLOC_CHK(_data_buff, "_data_buff", "enq_rec", "decode")
+ }
+ }
+ if (rec_offs < sizeof(_enq_hdr) + _enq_hdr._xidsize)
+ {
+ // Read xid (or continue reading xid)
+ std::size_t offs = rec_offs - sizeof(_enq_hdr);
+ ifsp->read((char*)_xid_buff + offs, _enq_hdr._xidsize - offs);
+ std::size_t size_read = ifsp->gcount();
+ rec_offs += size_read;
+ if (size_read < _enq_hdr._xidsize - offs)
+ {
+ assert(ifsp->eof());
+ // As we may have read past eof, turn off fail bit
+ ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit));
+ assert(!ifsp->fail() && !ifsp->bad());
+ return false;
+ }
+ }
+ if (!::is_enq_external(&_enq_hdr))
+ {
+ if (rec_offs < sizeof(_enq_hdr) + _enq_hdr._xidsize + _enq_hdr._dsize)
+ {
+ // Read data (or continue reading data)
+ std::size_t offs = rec_offs - sizeof(_enq_hdr) - _enq_hdr._xidsize;
+ ifsp->read((char*)_data_buff + offs, _enq_hdr._dsize - offs);
+ std::size_t size_read = ifsp->gcount();
+ rec_offs += size_read;
+ if (size_read < _enq_hdr._dsize - offs)
+ {
+ assert(ifsp->eof());
+ // As we may have read past eof, turn off fail bit
+ ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit));
+ assert(!ifsp->fail() && !ifsp->bad());
+ return false;
+ }
+ }
+ }
+ if (rec_offs < sizeof(_enq_hdr) + _enq_hdr._xidsize +
+ (::is_enq_external(&_enq_hdr) ? 0 : _enq_hdr._dsize) + sizeof(rec_tail_t))
+ {
+ // Read tail (or continue reading tail)
+ std::size_t offs = rec_offs - sizeof(_enq_hdr) - _enq_hdr._xidsize;
+ if (!::is_enq_external(&_enq_hdr))
+ offs -= _enq_hdr._dsize;
+ ifsp->read((char*)&_enq_tail + offs, sizeof(rec_tail_t) - offs);
+ std::size_t size_read = ifsp->gcount();
+ rec_offs += size_read;
+ if (size_read < sizeof(rec_tail_t) - offs)
+ {
+ assert(ifsp->eof());
+ // As we may have read past eof, turn off fail bit
+ ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit));
+ assert(!ifsp->fail() && !ifsp->bad());
+ return false;
+ }
+ check_rec_tail(rec_start);
+ }
+ ifsp->ignore(rec_size_dblks() * QLS_DBLK_SIZE_BYTES - rec_size());
+ assert(!ifsp->fail() && !ifsp->bad());
+ return true;
+}
+
+std::size_t
+enq_rec::get_xid(void** const xidpp)
+{
+ if (!_xid_buff || !_enq_hdr._xidsize) {
+ *xidpp = 0;
+ return 0;
+ }
+ *xidpp = _xid_buff;
+ return _enq_hdr._xidsize;
+}
+
+std::size_t
+enq_rec::get_data(void** const datapp)
+{
+ if (!_data_buff) {
+ *datapp = 0;
+ return 0;
+ }
+ if (::is_enq_external(&_enq_hdr))
+ *datapp = 0;
+ else
+ *datapp = _data_buff;
+ return _enq_hdr._dsize;
+}
+
+std::string&
+enq_rec::str(std::string& str) const
+{
+ std::ostringstream oss;
+ oss << "enq_rec: m=" << _enq_hdr._rhdr._magic;
+ oss << " v=" << (int)_enq_hdr._rhdr._version;
+ oss << " rid=" << _enq_hdr._rhdr._rid;
+ if (_xidp)
+ oss << " xid=\"" << _xidp << "\"";
+ oss << " len=" << _enq_hdr._dsize;
+ str.append(oss.str());
+ return str;
+}
+
+std::size_t
+enq_rec::rec_size() const
+{
+ return rec_size(_enq_hdr._xidsize, _enq_hdr._dsize, ::is_enq_external(&_enq_hdr));
+}
+
+std::size_t
+enq_rec::rec_size(const std::size_t xidsize, const std::size_t dsize, const bool external)
+{
+ if (external)
+ return sizeof(enq_hdr_t) + xidsize + sizeof(rec_tail_t);
+ return sizeof(enq_hdr_t) + xidsize + dsize + sizeof(rec_tail_t);
+}
+
+void
+enq_rec::check_rec_tail(const std::streampos rec_start) const {
+ Checksum checksum;
+ checksum.addData((const unsigned char*)&_enq_hdr, sizeof(::enq_hdr_t));
+ if (_enq_hdr._xidsize > 0) {
+ checksum.addData((const unsigned char*)_xid_buff, _enq_hdr._xidsize);
+ }
+ if (_enq_hdr._dsize > 0) {
+ checksum.addData((const unsigned char*)_data_buff, _enq_hdr._dsize);
+ }
+ uint32_t cs = checksum.getChecksum();
+ uint16_t res = ::rec_tail_check(&_enq_tail, &_enq_hdr._rhdr, cs);
+ if (res != 0) {
+ std::stringstream oss;
+ oss << std::endl << " Record offset: 0x" << std::hex << rec_start;
+ if (res & ::REC_TAIL_MAGIC_ERR_MASK) {
+ oss << std::endl << " Magic: expected 0x" << ~_enq_hdr._rhdr._magic << "; found 0x" << _enq_tail._xmagic;
+ }
+ if (res & ::REC_TAIL_SERIAL_ERR_MASK) {
+ oss << std::endl << " Serial: expected 0x" << _enq_hdr._rhdr._serial << "; found 0x" << _enq_tail._serial;
+ }
+ if (res & ::REC_TAIL_RID_ERR_MASK) {
+ oss << std::endl << " Record Id: expected 0x" << _enq_hdr._rhdr._rid << "; found 0x" << _enq_tail._rid;
+ }
+ if (res & ::REC_TAIL_CHECKSUM_ERR_MASK) {
+ oss << std::endl << " Checksum: expected 0x" << cs << "; found 0x" << _enq_tail._checksum;
+ }
+ throw jexception(jerrno::JERR_JREC_BADRECTAIL, oss.str(), "enq_rec", "check_rec_tail");
+ }
+}
+
+void
+enq_rec::clean() {
+ if (_xid_buff) {
+ std::free(_xid_buff);
+ _xid_buff = 0;
+ }
+ if (_data_buff) {
+ std::free(_data_buff);
+ _data_buff = 0;
+ }
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/enq_rec.h b/qpid/cpp/src/qpid/linearstore/journal/enq_rec.h
new file mode 100644
index 0000000000..d85cde42f5
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/enq_rec.h
@@ -0,0 +1,74 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_ENQ_REC_H
+#define QPID_LINEARSTORE_JOURNAL_ENQ_REC_H
+
+#include "qpid/linearstore/journal/jrec.h"
+#include "qpid/linearstore/journal/utils/enq_hdr.h"
+#include "qpid/linearstore/journal/utils/rec_tail.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+/**
+* \class enq_rec
+* \brief Class to handle a single journal enqueue record.
+*/
+class enq_rec : public jrec
+{
+private:
+ ::enq_hdr_t _enq_hdr; ///< Local instance of enqueue header struct
+ const void* _xidp; ///< xid pointer for encoding (for writing to disk)
+ const void* _data; ///< Pointer to data to be written to disk
+ void* _xid_buff;
+ void* _data_buff;
+ ::rec_tail_t _enq_tail; ///< Local instance of enqueue tail struct
+
+public:
+ enq_rec();
+ virtual ~enq_rec();
+
+ void reset(const uint64_t serial, const uint64_t rid, const void* const dbuf, const std::size_t dlen,
+ const void* const xidp, const std::size_t xidlen, const bool transient, const bool external);
+ uint32_t encode(void* wptr, uint32_t rec_offs_dblks, uint32_t max_size_dblks, Checksum& checksum);
+ bool decode(::rec_hdr_t& h, std::ifstream* ifsp, std::size_t& rec_offs, const std::streampos rec_start);
+
+ std::size_t get_xid(void** const xidpp);
+ std::size_t get_data(void** const datapp);
+ inline bool is_transient() const { return ::is_enq_transient(&_enq_hdr); }
+ inline bool is_external() const { return ::is_enq_external(&_enq_hdr); }
+ std::string& str(std::string& str) const;
+ inline std::size_t data_size() const { return _enq_hdr._dsize; }
+ inline std::size_t xid_size() const { return _enq_hdr._xidsize; }
+ std::size_t rec_size() const;
+ static std::size_t rec_size(const std::size_t xidsize, const std::size_t dsize, const bool external);
+ inline uint64_t rid() const { return _enq_hdr._rhdr._rid; }
+ void check_rec_tail(const std::streampos rec_start) const;
+
+private:
+ virtual void clean();
+};
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_ENQ_REC_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/enums.h b/qpid/cpp/src/qpid/linearstore/journal/enums.h
new file mode 100644
index 0000000000..90ec355955
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/enums.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_ENUMS_H
+#define QPID_LINEARSTORE_JOURNAL_ENUMS_H
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+// TODO: Change this to flags, as multiple of these conditions may exist simultaneously
+/**
+* \brief Enumeration of possible return states from journal read and write operations.
+*/
+enum _iores
+{
+ RHM_IORES_SUCCESS = 0, ///< Success: IO operation completed noramlly.
+ RHM_IORES_PAGE_AIOWAIT, ///< IO operation suspended - next page is waiting for AIO.
+ RHM_IORES_FILE_AIOWAIT, ///< IO operation suspended - next file is waiting for AIO.
+ RHM_IORES_EMPTY, ///< During read operations, nothing further is available to read.
+ RHM_IORES_TXPENDING ///< Operation blocked by pending transaction.
+};
+typedef _iores iores;
+
+static inline const char* iores_str(iores res)
+{
+ switch (res)
+ {
+ case RHM_IORES_SUCCESS: return "RHM_IORES_SUCCESS";
+ case RHM_IORES_PAGE_AIOWAIT: return "RHM_IORES_PAGE_AIOWAIT";
+ case RHM_IORES_FILE_AIOWAIT: return "RHM_IORES_FILE_AIOWAIT";
+ case RHM_IORES_EMPTY: return "RHM_IORES_EMPTY";
+ case RHM_IORES_TXPENDING: return "RHM_IORES_TXPENDING";
+ }
+ return "<iores unknown>";
+}
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_ENUMS_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/jcfg.h b/qpid/cpp/src/qpid/linearstore/journal/jcfg.h
new file mode 100644
index 0000000000..b33a419a9d
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/jcfg.h
@@ -0,0 +1,72 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include <cmath>
+#include <cstdlib>
+
+#ifndef QPID_QLS_JRNL_JCFG_H
+#define QPID_QLS_JRNL_JCFG_H
+
+#define QLS_SBLK_SIZE_BYTES 4096 /**< Disk softblock size in bytes, should match size used on disk media */
+#define QLS_AIO_ALIGN_BOUNDARY_BYTES QLS_SBLK_SIZE_BYTES /** Memory alignment boundary used for DMA */
+/**
+* <b>Rule:</b> Data block size (QLS_DBLK_SIZE_BYTES) MUST be a power of 2 AND
+* a power of 2 factor of the disk softblock size (QLS_SBLK_SIZE_BYTES):
+* <pre>
+* n * QLS_DBLK_SIZE_BYTES == QLS_SBLK_SIZE_BYTES (n = 1,2,4,8...)
+* </pre>
+*/
+#define QLS_DBLK_SIZE_BYTES 128 /**< Data block size in bytes (CANNOT BE LESS THAN 32!) */
+#define QLS_SBLK_SIZE_DBLKS (QLS_SBLK_SIZE_BYTES / QLS_DBLK_SIZE_BYTES) /**< Disk softblock size in multiples of QLS_DBLK_SIZE_BYTES */
+#define QLS_SBLK_SIZE_KIB (QLS_SBLK_SIZE_BYTES / 1024) /**< Disk softblock size in KiB */
+
+#define QLS_WMGR_DEF_PAGE_SIZE_KIB 32 /**< Journal write page size in KiB (default) */
+#define QLS_WMGR_DEF_PAGE_SIZE_SBLKS (QLS_WMGR_DEF_PAGE_SIZE_KIB / QLS_SBLK_SIZE_KIB) /**< Journal write page size in softblocks (default) */
+#define QLS_WMGR_DEF_PAGES 32 /**< Number of pages to use in wmgr (default) */
+
+#define QLS_WMGR_MAXDTOKPP 1024 /**< Max. dtoks (data blocks) per page in wmgr */
+#define QLS_WMGR_MAXWAITUS 100 /**< Max. wait time (us) before submitting AIO */
+
+#define QLS_JRNL_FILE_EXTENSION ".jrnl" /**< Extension for journal data files */
+#define QLS_TXA_MAGIC 0x61534c51 /**< ("QLSa" in little endian) Magic for dtx abort hdrs */
+#define QLS_TXC_MAGIC 0x63534c51 /**< ("QLSc" in little endian) Magic for dtx commit hdrs */
+#define QLS_DEQ_MAGIC 0x64534c51 /**< ("QLSd" in little endian) Magic for deq rec hdrs */
+#define QLS_ENQ_MAGIC 0x65534c51 /**< ("QLSe" in little endian) Magic for enq rec hdrs */
+#define QLS_FILE_MAGIC 0x66534c51 /**< ("QLSf" in little endian) Magic for file hdrs */
+#define QLS_EMPTY_MAGIC 0x78534c51 /**< ("QLSx" in little endian) Magic for empty dblk */
+#define QLS_JRNL_VERSION 2 /**< Version (of file layout) */
+#define QLS_JRNL_FHDR_RES_SIZE_SBLKS 1 /**< Journal file header reserved size in sblks (as defined by QLS_SBLK_SIZE_BYTES) */
+#define QLS_MAX_QUEUE_NAME_LEN (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES) - sizeof(file_hdr_t)
+
+#define QLS_CLEAN /**< If defined, writes QLS_CLEAN_CHAR to all filled areas on disk */
+#define QLS_CLEAN_CHAR 0xff /**< Char used to clear empty space on disk */
+
+namespace qpid {
+namespace linearstore {
+
+ const int QLS_RAND_WIDTH = (int)(::log((RAND_MAX + 1ULL))/::log(2));
+ const int QLS_RAND_SHIFT1 = 64 - QLS_RAND_WIDTH;
+ const int QLS_RAND_SHIFT2 = QLS_RAND_SHIFT1 - QLS_RAND_WIDTH;
+ const int QLS_RAND_MASK = (int)::pow(2, QLS_RAND_SHIFT2) - 1;
+
+}}
+
+#endif /* ifndef QPID_QLS_JRNL_JCFG_H */
diff --git a/qpid/cpp/src/qpid/linearstore/journal/jcntl.cpp b/qpid/cpp/src/qpid/linearstore/journal/jcntl.cpp
new file mode 100644
index 0000000000..cc31f2e1df
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/jcntl.cpp
@@ -0,0 +1,440 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/jcntl.h"
+
+#include <iomanip>
+#include "qpid/linearstore/journal/data_tok.h"
+#include "qpid/linearstore/journal/JournalLog.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+#define AIO_CMPL_TIMEOUT_SEC 5
+#define AIO_CMPL_TIMEOUT_NSEC 0
+#define FINAL_AIO_CMPL_TIMEOUT_SEC 15
+#define FINAL_AIO_CMPL_TIMEOUT_NSEC 0
+
+// Static
+timespec jcntl::_aio_cmpl_timeout; ///< Timeout for blocking libaio returns
+timespec jcntl::_final_aio_cmpl_timeout; ///< Timeout for blocking libaio returns when stopping or finalizing
+bool jcntl::_init = init_statics();
+bool jcntl::init_statics()
+{
+ _aio_cmpl_timeout.tv_sec = AIO_CMPL_TIMEOUT_SEC;
+ _aio_cmpl_timeout.tv_nsec = AIO_CMPL_TIMEOUT_NSEC;
+ _final_aio_cmpl_timeout.tv_sec = FINAL_AIO_CMPL_TIMEOUT_SEC;
+ _final_aio_cmpl_timeout.tv_nsec = FINAL_AIO_CMPL_TIMEOUT_NSEC;
+ return true;
+}
+
+
+// Functions
+
+jcntl::jcntl(const std::string& jid,
+ const std::string& jdir,
+ JournalLog& jrnl_log):
+ _jid(jid),
+ _jdir(jdir),
+ _init_flag(false),
+ _stop_flag(false),
+ _readonly_flag(false),
+ _jrnl_log(jrnl_log),
+ _linearFileController(*this),
+ _emptyFilePoolPtr(0),
+ _emap(),
+ _tmap(),
+ _wmgr(this, _emap, _tmap, _linearFileController),
+ _recoveryManager(_jdir.dirname(), _jid, _emap, _tmap, jrnl_log)
+{}
+
+jcntl::~jcntl()
+{
+ if (_init_flag && !_stop_flag)
+ try { stop(true); }
+ catch (const jexception& e) { std::cerr << e << std::endl; }
+ _linearFileController.finalize();
+}
+
+void
+jcntl::initialize(EmptyFilePool* efpp,
+ const uint16_t wcache_num_pages,
+ const uint32_t wcache_pgsize_sblks,
+ aio_callback* const cbp)
+{
+ _init_flag = false;
+ _stop_flag = false;
+ _readonly_flag = false;
+
+ _emap.clear();
+ _tmap.clear();
+
+ _linearFileController.finalize();
+ _jdir.clear_dir(); // Clear any existing journal files
+ _linearFileController.initialize(_jdir.dirname(), efpp, 0ULL);
+ _linearFileController.getNextJournalFile();
+ _wmgr.initialize(cbp, wcache_pgsize_sblks, wcache_num_pages, QLS_WMGR_MAXDTOKPP, QLS_WMGR_MAXWAITUS, 0);
+ _init_flag = true;
+}
+
+void
+jcntl::recover(EmptyFilePoolManager* efpmp,
+ const uint16_t wcache_num_pages,
+ const uint32_t wcache_pgsize_sblks,
+ aio_callback* const cbp,
+ const std::vector<std::string>* prep_txn_list_ptr,
+ uint64_t& highest_rid)
+{
+ _init_flag = false;
+ _stop_flag = false;
+ _readonly_flag = false;
+
+ _emap.clear();
+ _tmap.clear();
+
+ _linearFileController.finalize();
+
+ // Verify journal dir and journal files
+ _jdir.verify_dir();
+ _recoveryManager.analyzeJournals(prep_txn_list_ptr, efpmp, &_emptyFilePoolPtr);
+ assert(_emptyFilePoolPtr != 0);
+
+ highest_rid = _recoveryManager.getHighestRecordId();
+ _jrnl_log.log(/*LOG_DEBUG*/JournalLog::LOG_INFO, _jid, _recoveryManager.toString(_jid, 5U));
+ _linearFileController.initialize(_jdir.dirname(), _emptyFilePoolPtr, _recoveryManager.getHighestFileNumber());
+ _recoveryManager.setLinearFileControllerJournals(&qpid::linearstore::journal::LinearFileController::addJournalFile, &_linearFileController);
+ if (_recoveryManager.isLastFileFull()) {
+ _linearFileController.getNextJournalFile();
+ }
+ _wmgr.initialize(cbp, wcache_pgsize_sblks, wcache_num_pages, QLS_WMGR_MAXDTOKPP, QLS_WMGR_MAXWAITUS,
+ (_recoveryManager.isLastFileFull() ? 0 : _recoveryManager.getEndOffset()));
+
+ _readonly_flag = true;
+ _init_flag = true;
+}
+
+void
+jcntl::recover_complete()
+{
+ if (!_readonly_flag)
+ throw jexception(jerrno::JERR_JCNTL_NOTRECOVERED, "jcntl", "recover_complete");
+ _recoveryManager.recoveryComplete();
+ _readonly_flag = false;
+}
+
+void
+jcntl::delete_jrnl_files()
+{
+ stop(true); // wait for AIO to complete
+ _linearFileController.purgeEmptyFilesToEfp();
+ _jdir.delete_dir();
+}
+
+
+iores
+jcntl::enqueue_data_record(const void* const data_buff,
+ const std::size_t tot_data_len,
+ const std::size_t this_data_len,
+ data_tok* dtokp,
+ const bool transient)
+{
+ iores r;
+ check_wstatus("enqueue_data_record");
+ {
+ slock s(_wr_mutex);
+ while (handle_aio_wait(_wmgr.enqueue(data_buff, tot_data_len, this_data_len, dtokp, 0, 0, false, transient, false), r,
+ dtokp)) ;
+ }
+ return r;
+}
+
+iores
+jcntl::enqueue_extern_data_record(const std::size_t tot_data_len,
+ data_tok* dtokp,
+ const bool transient)
+{
+ iores r;
+ check_wstatus("enqueue_extern_data_record");
+ {
+ slock s(_wr_mutex);
+ while (handle_aio_wait(_wmgr.enqueue(0, tot_data_len, 0, dtokp, 0, 0, false, transient, true), r, dtokp)) ;
+ }
+ return r;
+}
+
+iores
+jcntl::enqueue_txn_data_record(const void* const data_buff,
+ const std::size_t tot_data_len,
+ const std::size_t this_data_len,
+ data_tok* dtokp,
+ const std::string& xid,
+ const bool tpc_flag,
+ const bool transient)
+{
+ iores r;
+ check_wstatus("enqueue_tx_data_record");
+ {
+ slock s(_wr_mutex);
+ while (handle_aio_wait(_wmgr.enqueue(data_buff, tot_data_len, this_data_len, dtokp, xid.data(), xid.size(),
+ tpc_flag, transient, false), r, dtokp)) ;
+ }
+ return r;
+}
+
+iores
+jcntl::enqueue_extern_txn_data_record(const std::size_t tot_data_len,
+ data_tok* dtokp,
+ const std::string& xid,
+ const bool tpc_flag,
+ const bool transient)
+{
+ iores r;
+ check_wstatus("enqueue_extern_txn_data_record");
+ {
+ slock s(_wr_mutex);
+ while (handle_aio_wait(_wmgr.enqueue(0, tot_data_len, 0, dtokp, xid.data(), xid.size(), tpc_flag, transient,
+ true), r, dtokp)) ;
+ }
+ return r;
+}
+
+iores
+jcntl::read_data_record(void** const datapp,
+ std::size_t& dsize,
+ void** const xidpp,
+ std::size_t& xidsize,
+ bool& transient,
+ bool& external,
+ data_tok* const dtokp,
+ bool ignore_pending_txns)
+{
+ check_rstatus("read_data");
+ if (_recoveryManager.readNextRemainingRecord(datapp, dsize, xidpp, xidsize, transient, external, dtokp, ignore_pending_txns)) {
+ return RHM_IORES_SUCCESS;
+ }
+ return RHM_IORES_EMPTY;
+}
+
+iores
+jcntl::dequeue_data_record(data_tok* const dtokp,
+ const bool txn_coml_commit)
+{
+ iores r;
+ check_wstatus("dequeue_data");
+ {
+ slock s(_wr_mutex);
+ while (handle_aio_wait(_wmgr.dequeue(dtokp, 0, 0, false, txn_coml_commit), r, dtokp)) ;
+ }
+ return r;
+}
+
+iores
+jcntl::dequeue_txn_data_record(data_tok* const dtokp,
+ const std::string& xid,
+ const bool tpc_flag,
+ const bool txn_coml_commit)
+{
+ iores r;
+ check_wstatus("dequeue_data");
+ {
+ slock s(_wr_mutex);
+ while (handle_aio_wait(_wmgr.dequeue(dtokp, xid.data(), xid.size(), tpc_flag, txn_coml_commit), r, dtokp)) ;
+ }
+ return r;
+}
+
+iores
+jcntl::txn_abort(data_tok* const dtokp,
+ const std::string& xid)
+{
+ iores r;
+ check_wstatus("txn_abort");
+ {
+ slock s(_wr_mutex);
+ while (handle_aio_wait(_wmgr.abort(dtokp, xid.data(), xid.size()), r, dtokp)) ;
+ }
+ return r;
+}
+
+iores
+jcntl::txn_commit(data_tok* const dtokp,
+ const std::string& xid)
+{
+ iores r;
+ check_wstatus("txn_commit");
+ {
+ slock s(_wr_mutex);
+ while (handle_aio_wait(_wmgr.commit(dtokp, xid.data(), xid.size()), r, dtokp)) ;
+ }
+ return r;
+}
+
+bool
+jcntl::is_txn_synced(const std::string& xid)
+{
+ slock s(_wr_mutex);
+ bool res = _wmgr.is_txn_synced(xid);
+ return res;
+}
+
+int32_t
+jcntl::get_wr_events(timespec* const timeout)
+{
+ stlock t(_wr_mutex);
+ if (!t.locked())
+ return jerrno::LOCK_TAKEN;
+ return _wmgr.get_events(timeout, false);
+}
+
+void
+jcntl::stop(const bool block_till_aio_cmpl)
+{
+ if (_readonly_flag)
+ check_rstatus("stop");
+ else
+ check_wstatus("stop");
+ _stop_flag = true;
+ if (!_readonly_flag)
+ flush(block_till_aio_cmpl);
+}
+
+LinearFileController&
+jcntl::getLinearFileControllerRef() {
+ return _linearFileController;
+}
+
+// static
+std::string
+jcntl::str2hexnum(const std::string& str) {
+ if (str.empty()) {
+ return "<null>";
+ }
+ std::ostringstream oss;
+ oss << "(" << str.size() << ")0x" << std::hex;
+ for (unsigned i=str.size(); i>0; --i) {
+ oss << std::setfill('0') << std::setw(2) << (uint16_t)(uint8_t)str[i-1];
+ }
+ return oss.str();
+}
+
+iores
+jcntl::flush(const bool block_till_aio_cmpl)
+{
+ if (!_init_flag)
+ return RHM_IORES_SUCCESS;
+ if (_readonly_flag)
+ throw jexception(jerrno::JERR_JCNTL_READONLY, "jcntl", "flush");
+ iores res;
+ {
+ slock s(_wr_mutex);
+ res = _wmgr.flush();
+ }
+ if (block_till_aio_cmpl)
+ aio_cmpl_wait();
+ return res;
+}
+
+// Protected/Private functions
+
+void
+jcntl::check_wstatus(const char* fn_name) const
+{
+ if (!_init_flag)
+ throw jexception(jerrno::JERR__NINIT, "jcntl", fn_name);
+ if (_readonly_flag)
+ throw jexception(jerrno::JERR_JCNTL_READONLY, "jcntl", fn_name);
+ if (_stop_flag)
+ throw jexception(jerrno::JERR_JCNTL_STOPPED, "jcntl", fn_name);
+}
+
+void
+jcntl::check_rstatus(const char* fn_name) const
+{
+ if (!_init_flag)
+ throw jexception(jerrno::JERR__NINIT, "jcntl", fn_name);
+ if (_stop_flag)
+ throw jexception(jerrno::JERR_JCNTL_STOPPED, "jcntl", fn_name);
+}
+
+
+void
+jcntl::aio_cmpl_wait()
+{
+ //while (_wmgr.get_aio_evt_rem())
+ while (true)
+ {
+ uint32_t aer;
+ {
+ slock s(_wr_mutex);
+ aer = _wmgr.get_aio_evt_rem();
+ }
+ if (aer == 0) break; // no events left
+ if (get_wr_events(&_aio_cmpl_timeout) == jerrno::AIO_TIMEOUT)
+ throw jexception(jerrno::JERR_JCNTL_AIOCMPLWAIT, "jcntl", "aio_cmpl_wait");
+ }
+}
+
+
+bool
+jcntl::handle_aio_wait(const iores res, iores& resout, const data_tok* dtp)
+{
+ resout = res;
+ if (res == RHM_IORES_PAGE_AIOWAIT)
+ {
+ while (_wmgr.curr_pg_blocked())
+ {
+ if (_wmgr.get_aio_evt_rem() == 0) {
+//std::cout << "&&&&&& jcntl::handle_aio_wait() " << _wmgr.status_str() << std::endl; // DEBUG
+ throw jexception("_wmgr.curr_pg_blocked() with no events remaining"); // TODO - complete exception
+ }
+ if (_wmgr.get_events(&_aio_cmpl_timeout, false) == jerrno::AIO_TIMEOUT)
+ {
+ std::ostringstream oss;
+ oss << "get_events() returned JERR_JCNTL_AIOCMPLWAIT; wmgr_status: " << _wmgr.status_str();
+ _jrnl_log.log(JournalLog::LOG_CRITICAL, _jid, oss.str());
+ throw jexception(jerrno::JERR_JCNTL_AIOCMPLWAIT, "jcntl", "handle_aio_wait");
+ }
+ }
+ return true;
+ }
+ else if (res == RHM_IORES_FILE_AIOWAIT)
+ {
+// while (_wmgr.curr_file_blocked())
+// {
+// if (_wmgr.get_events(pmgr::UNUSED, &_aio_cmpl_timeout) == jerrno::AIO_TIMEOUT)
+// {
+// std::ostringstream oss;
+// oss << "get_events() returned JERR_JCNTL_AIOCMPLWAIT; wmgr_status: " << _wmgr.status_str();
+// this->log(LOG_CRITICAL, oss.str());
+// throw jexception(jerrno::JERR_JCNTL_AIOCMPLWAIT, "jcntl", "handle_aio_wait");
+// }
+// }
+// _wrfc.wr_reset();
+ resout = RHM_IORES_SUCCESS;
+ data_tok::write_state ws = dtp->wstate();
+ return ws == data_tok::ENQ_PART || ws == data_tok::DEQ_PART || ws == data_tok::ABORT_PART ||
+ ws == data_tok::COMMIT_PART;
+ }
+ return false;
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/jcntl.h b/qpid/cpp/src/qpid/linearstore/journal/jcntl.h
new file mode 100644
index 0000000000..94c00d2fab
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/jcntl.h
@@ -0,0 +1,570 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_JCNTL_H
+#define QPID_LINEARSTORE_JOURNAL_JCNTL_H
+
+#include <qpid/linearstore/journal/LinearFileController.h>
+#include "qpid/linearstore/journal/jdir.h"
+#include "qpid/linearstore/journal/RecoveryManager.h"
+#include "qpid/linearstore/journal/wmgr.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+class EmptyFilePool;
+class EmptyFilePoolManager;
+class JournalLog;
+
+/**
+* \brief Access and control interface for the journal. This is the top-level class for the
+* journal.
+*
+* This is the top-level journal class; one instance of this class controls one instance of the
+* journal and all its files and associated control structures. Besides this class, the only
+* other class that needs to be used at a higher level is the data_tok class, one instance of
+* which is used per data block written to the journal, and is used to track its status through
+* the AIO enqueue, read and dequeue process.
+*/
+class jcntl
+{
+protected:
+ /**
+ * \brief Journal ID
+ *
+ * This string uniquely identifies this journal instance. It will most likely be associated
+ * with the identity of the message queue with which it is associated.
+ */
+ // TODO: This is not included in any files at present, add to file_hdr?
+ std::string _jid;
+
+ /**
+ * \brief Journal directory
+ *
+ * This string stores the path to the journal directory. It may be absolute or relative, and
+ * should not end in a file separator character. (e.g. "/fastdisk/jdata" is correct,
+ * "/fastdisk/jdata/" is not.)
+ */
+ jdir _jdir;
+
+ /**
+ * \brief Initialized flag
+ *
+ * This flag starts out set to false, is set to true once this object has been initialized,
+ * either by calling initialize() or recover().
+ */
+ bool _init_flag;
+
+ /**
+ * \brief Stopped flag
+ *
+ * This flag starts out false, and is set to true when stop() is called. At this point, the
+ * journal will no longer accept messages until either initialize() or recover() is called.
+ * There is no way other than through initialization to reset this flag.
+ */
+ // TODO: It would be helpful to distinguish between states stopping and stopped. If stop(true) is called,
+ // then we are stopping, but must wait for all outstanding aios to return before being finally stopped. During
+ // this period, however, no new enqueue/dequeue/read requests may be accepted.
+ bool _stop_flag;
+
+ /**
+ * \brief Read-only state flag used during recover.
+ *
+ * When true, this flag prevents journal write operations (enqueue and dequeue), but
+ * allows read to occur. It is used during recovery, and is reset when recovered() is
+ * called.
+ */
+ bool _readonly_flag;
+
+ // Journal control structures
+ JournalLog& _jrnl_log; ///< Ref to Journal Log instance
+ LinearFileController _linearFileController; ///< Linear File Controller
+ EmptyFilePool* _emptyFilePoolPtr; ///< Pointer to Empty File Pool for this queue
+ enq_map _emap; ///< Enqueue map for low water mark management
+ txn_map _tmap; ///< Transaction map open transactions
+ wmgr _wmgr; ///< Write page manager which manages AIO
+ RecoveryManager _recoveryManager; ///< Recovery data used for recovery
+ smutex _wr_mutex; ///< Mutex for journal writes
+
+public:
+ static timespec _aio_cmpl_timeout; ///< Timeout for blocking libaio returns
+ static timespec _final_aio_cmpl_timeout; ///< Timeout for blocking libaio returns when stopping or finalizing
+
+ /**
+ * \brief Journal constructor.
+ *
+ * Constructor which sets the physical file location and base name.
+ *
+ * \param jid A unique identifier for this journal instance.
+ * \param jdir The directory which will contain the journal files.
+ * \param base_filename The string which will be used to start all journal filenames.
+ */
+ jcntl(const std::string& jid,
+ const std::string& jdir,
+ JournalLog& jrnl_log);
+
+ /**
+ * \brief Destructor.
+ */
+ virtual ~jcntl();
+
+ inline const std::string& id() const { return _jid; }
+
+ inline const std::string& jrnl_dir() const { return _jdir.dirname(); }
+
+ /**
+ * \brief Initialize the journal for storing data.
+ *
+ * Initialize the journal by creating new journal data files and initializing internal
+ * control structures. When complete, the journal will be empty, and ready to store data.
+ *
+ * <b>NOTE: Any existing journal will be ignored by this operation.</b> To use recover
+ * the data from an existing journal, use recover().
+ *
+ * <b>NOTE: If <i>NULL</i> is passed to the deque pointers, they will be internally created
+ * and deleted.</b>
+ *
+ * <b>NOTE: If <i>NULL</i> is passed to the callbacks, internal default callbacks will be
+ * used.</b>
+ *
+ * \param num_jfiles The number of journal files to be created.
+ * \param auto_expand If true, allows journal file auto-expansion. In this mode, the journal will automatically
+ * add files to the journal if it runs out of space. No more than ae_max_jfiles may be added. If false, then
+ * no files are added and an exception will be thrown if the journal runs out of file space.
+ * \param ae_max_jfiles Upper limit of journal files for auto-expand mode. When auto_expand is true, this is the
+ * maximum total number of files allowed in the journal (original plus those added by auto-expand mode). If
+ * this number of files exist and the journal runs out of space, an exception will be thrown. This number
+ * must be greater than the num_jfiles parameter value but cannot exceed the maximum number of files for a
+ * single journal; if num_jfiles is already at its maximum value, then auto-expand will be disabled.
+ * \param jfsize_sblks The size of each journal file expressed in softblocks.
+ * \param wcache_num_pages The number of write cache pages to create.
+ * \param wcache_pgsize_sblks The size in sblks of each write cache page.
+ * \param cbp Pointer to object containing callback functions for read and write operations. May be 0 (NULL).
+ *
+ * \exception TODO
+ */
+ void initialize(EmptyFilePool* efpp,
+ const uint16_t wcache_num_pages,
+ const uint32_t wcache_pgsize_sblks,
+ aio_callback* const cbp);
+
+ /**
+ * /brief Initialize journal by recovering state from previously written journal.
+ *
+ * Initialize journal by recovering state from previously written journal. The journal files
+ * are analyzed, and all records that have not been dequeued and that remain in the journal
+ * will be available for reading. The journal is placed in a read-only state until
+ * recovered() is called; any calls to enqueue or dequeue will fail with an exception
+ * in this state.
+ *
+ * <b>NOTE: If <i>NULL</i> is passed to the deque pointers, they will be internally created
+ * and deleted.</b>
+ *
+ * <b>NOTE: If <i>NULL</i> is passed to the callbacks, internal default callbacks will be
+ * used.</b>
+ *
+ * \param num_jfiles The number of journal files to be created.
+ * \param auto_expand If true, allows journal file auto-expansion. In this mode, the journal will automatically
+ * add files to the journal if it runs out of space. No more than ae_max_jfiles may be added. If false, then
+ * no files are added and an exception will be thrown if the journal runs out of file space.
+ * \param ae_max_jfiles Upper limit of journal files for auto-expand mode. When auto_expand is true, this is the
+ * maximum total number of files allowed in the journal (original plus those added by auto-expand mode). If
+ * this number of files exist and the journal runs out of space, an exception will be thrown. This number
+ * must be greater than the num_jfiles parameter value but cannot exceed the maximum number of files for a
+ * single journal; if num_jfiles is already at its maximum value, then auto-expand will be disabled.
+ * \param jfsize_sblks The size of each journal file expressed in softblocks.
+ * \param wcache_num_pages The number of write cache pages to create.
+ * \param wcache_pgsize_sblks The size in sblks of each write cache page.
+ * \param cbp Pointer to object containing callback functions for read and write operations. May be 0 (NULL).
+ * \param prep_txn_list_ptr
+ * \param highest_rid Returns the highest rid found in the journal during recover
+ *
+ * \exception TODO
+ */
+ void recover(EmptyFilePoolManager* efpm,
+ const uint16_t wcache_num_pages,
+ const uint32_t wcache_pgsize_sblks,
+ aio_callback* const cbp,
+ const std::vector<std::string>* prep_txn_list_ptr,
+ uint64_t& highest_rid);
+
+ /**
+ * \brief Notification to the journal that recovery is complete and that normal operation
+ * may resume.
+ *
+ * This call notifies the journal that recovery is complete and that normal operation
+ * may resume. The read pointers are reset so that all records read as a part of recover
+ * may be re-read during normal operation. The read-only flag is then reset, allowing
+ * enqueue and dequeue operations to resume.
+ *
+ * \exception TODO
+ */
+ void recover_complete();
+
+ /**
+ * \brief Stops journal and deletes all journal files.
+ *
+ * Clear the journal directory of all journal files matching the base filename.
+ *
+ * \exception TODO
+ */
+ void delete_jrnl_files();
+
+ /**
+ * \brief Enqueue data.
+ *
+ * Enqueue data or part thereof. If a large data block is being written, then it may be
+ * enqueued in parts by setting this_data_len to the size of the data being written in this
+ * call. The total data size must be known in advance, however, as this is written into the
+ * record header on the first record write. The state of the write (i.e. how much has been
+ * written so far) is maintained in the data token dtokp. Partial writes will return in state
+ * ENQ_PART.
+ *
+ * Note that a return value of anything other than RHM_IORES_SUCCESS implies that this write
+ * operation did not complete successfully or was partially completed. The action taken under
+ * these conditions depends on the value of the return. For example, RHM_IORES_AIO_WAIT
+ * implies that all pages in the write page cache are waiting for AIO operations to return,
+ * and that the call should be remade after waiting a bit.
+ *
+ * Example: If a write of 99 kB is divided into three equal parts, then the following states
+ * and returns would characterize a successful operation:
+ * <pre>
+ * dtok. dtok. dtok.
+ * Pperation Return wstate() dsize() written() Comment
+ * -----------------+--------+--------+-------+---------+------------------------------------
+ * NONE 0 0 Value of dtok before op
+ * edr(99000, 33000) SUCCESS ENQ_PART 99000 33000 Enqueue part 1
+ * edr(99000, 33000) AIO_WAIT ENQ_PART 99000 50000 Enqueue part 2, not completed
+ * edr(99000, 33000) SUCCESS ENQ_PART 99000 66000 Enqueue part 2 again
+ * edr(99000, 33000) SUCCESS ENQ 99000 99000 Enqueue part 3
+ * </pre>
+ *
+ * \param data_buff Pointer to data to be enqueued for this enqueue operation.
+ * \param tot_data_len Total data length.
+ * \param this_data_len Amount to be written in this enqueue operation.
+ * \param dtokp Pointer to data token which contains the details of the enqueue operation.
+ * \param transient Flag indicating transient persistence (ie, ignored on recover).
+ *
+ * \exception TODO
+ */
+ iores enqueue_data_record(const void* const data_buff,
+ const std::size_t tot_data_len,
+ const std::size_t this_data_len,
+ data_tok* dtokp,
+ const bool transient);
+
+ iores enqueue_extern_data_record(const std::size_t tot_data_len,
+ data_tok* dtokp,
+ const bool transient);
+
+ /**
+ * \brief Enqueue data.
+ *
+ * \param data_buff Pointer to data to be enqueued for this enqueue operation.
+ * \param tot_data_len Total data length.
+ * \param this_data_len Amount to be written in this enqueue operation.
+ * \param dtokp Pointer to data token which contains the details of the enqueue operation.
+ * \param xid String containing xid. An empty string (i.e. length=0) will be considered
+ * non-transactional.
+ * \param transient Flag indicating transient persistence (ie, ignored on recover).
+ *
+ * \exception TODO
+ */
+ iores enqueue_txn_data_record(const void* const data_buff,
+ const std::size_t tot_data_len,
+ const std::size_t this_data_len,
+ data_tok* dtokp,
+ const std::string& xid,
+ const bool tpc_flag,
+ const bool transient);
+
+ iores enqueue_extern_txn_data_record(const std::size_t tot_data_len,
+ data_tok* dtokp,
+ const std::string& xid,
+ const bool tpc_flag,
+ const bool transient);
+
+ /**
+ * \brief Reads data from the journal. It is the responsibility of the reader to free
+ * the memory that is allocated through this call - see below for details.
+ *
+ * Reads the next non-dequeued data record from the journal.
+ *
+ * <b>Note</b> that this call allocates memory into which the data and XID are copied. It
+ * is the responsibility of the caller to free this memory. The memory for the data and
+ * XID are allocated in a single call, and the XID precedes the data in the memory space.
+ * Thus, where an XID exists, freeing the XID pointer will free both the XID and data memory.
+ * However, if an XID does not exist for the message, the XID pointer xidpp is set to NULL,
+ * and it is the data pointer datapp that must be freed. Should neither an XID nor data be
+ * present (ie an empty record), then no memory is allocated, and both pointers will be NULL.
+ * In this case, there is no need to free memory.
+ *
+ * TODO: Fix this lousy interface. The caller should NOT be required to clean up these
+ * pointers! Rather use a struct, or better still, let the data token carry the data and
+ * xid pointers and lengths, and have the data token both allocate and delete.
+ *
+ * \param datapp Pointer to pointer that will be set to point to memory allocated and
+ * containing the data. Will be set to NULL if the call fails or there is no data
+ * in the record.
+ * \param dsize Ref that will be set to the size of the data. Will be set to 0 if the call
+ * fails or if there is no data in the record.
+ * \param xidpp Pointer to pointer that will be set to point to memory allocated and
+ * containing the XID. Will be set to NULL if the call fails or there is no XID attached
+ * to this record.
+ * \param xidsize Ref that will be set to the size of the XID.
+ * \param transient Ref that will be set true if record is transient.
+ * \param external Ref that will be set true if record is external. In this case, the data
+ * pointer datapp will be set to NULL, but dsize will contain the size of the data.
+ * NOTE: If there is an xid, then xidpp must be freed.
+ * \param dtokp Pointer to data_tok instance for this data, used to track state of data
+ * through journal.
+ * \param ignore_pending_txns When false (default), if the next record to be read is locked
+ * by a pending transaction, the read fails with RHM_IORES_TXPENDING. However, if set
+ * to true, then locks are ignored. This is required for reading of the Transaction
+ * Prepared List (TPL) which may have its entries locked, but may be read from
+ * time-to-time, and needs all its records (locked and unlocked) to be available.
+ *
+ * \exception TODO
+ */
+ iores read_data_record(void** const datapp,
+ std::size_t& dsize,
+ void** const xidpp,
+ std::size_t& xidsize,
+ bool& transient,
+ bool& external,
+ data_tok* const dtokp,
+ bool ignore_pending_txns);
+
+ /**
+ * \brief Dequeues (marks as no longer needed) data record in journal.
+ *
+ * Dequeues (marks as no longer needed) data record in journal. Note that it is possible
+ * to use the same data token instance used to enqueue this data; it contains the record ID
+ * needed to correctly mark this data as dequeued in the journal. Otherwise the RID of the
+ * record to be dequeued and the write state of ENQ must be manually set in a new or reset
+ * instance of data_tok.
+ *
+ * \param dtokp Pointer to data_tok instance for this data, used to track state of data
+ * through journal.
+ * \param txn_coml_commit Only used for preparedXID journal. When used for dequeueing
+ * prepared XID list items, sets whether the complete() was called in commit or abort
+ * mode.
+ *
+ * \exception TODO
+ */
+ iores dequeue_data_record(data_tok* const dtokp,
+ const bool txn_coml_commit);
+
+ /**
+ * \brief Dequeues (marks as no longer needed) data record in journal.
+ *
+ * Dequeues (marks as no longer needed) data record in journal as part of a transaction.
+ * Note that it is possible to use the same data token instance used to enqueue this data;
+ * it contains the RID needed to correctly mark this data as dequeued in the journal.
+ * Otherwise the RID of the record to be dequeued and the write state of ENQ must be
+ * manually set in a new or reset instance of data_tok.
+ *
+ * \param dtokp Pointer to data_tok instance for this data, used to track state of data
+ * through journal.
+ * \param xid String containing xid. An empty string (i.e. length=0) will be considered
+ * non-transactional.
+ * \param txn_coml_commit Only used for preparedXID journal. When used for dequeueing
+ * prepared XID list items, sets whether the complete() was called in commit or abort
+ * mode.
+ *
+ * \exception TODO
+ */
+ iores dequeue_txn_data_record(data_tok* const dtokp,
+ const std::string& xid,
+ const bool tpc_flag,
+ const bool txn_coml_commit);
+
+ /**
+ * \brief Abort the transaction for all records enqueued or dequeued with the matching xid.
+ *
+ * Abort the transaction for all records enqueued with the matching xid. All enqueued records
+ * are effectively deleted from the journal, and can not be read. All dequeued records remain
+ * as though they had never been dequeued.
+ *
+ * \param dtokp Pointer to data_tok instance for this data, used to track state of data
+ * through journal.
+ * \param xid String containing xid.
+ *
+ * \exception TODO
+ */
+ iores txn_abort(data_tok* const dtokp,
+ const std::string& xid);
+
+ /**
+ * \brief Commit the transaction for all records enqueued or dequeued with the matching xid.
+ *
+ * Commit the transaction for all records enqueued with the matching xid. All enqueued
+ * records are effectively released for reading and dequeueing. All dequeued records are
+ * removed and can no longer be accessed.
+ *
+ * \param dtokp Pointer to data_tok instance for this data, used to track state of data
+ * through journal.
+ * \param xid String containing xid.
+ *
+ * \exception TODO
+ */
+ iores txn_commit(data_tok* const dtokp,
+ const std::string& xid);
+
+ /**
+ * \brief Check whether all the enqueue records for the given xid have reached disk.
+ *
+ * \param xid String containing xid.
+ *
+ * \exception TODO
+ */
+ bool is_txn_synced(const std::string& xid);
+
+ /**
+ * \brief Forces a check for returned AIO write events.
+ *
+ * Forces a check for returned AIO write events. This is normally performed by enqueue() and
+ * dequeue() operations, but if these operations cease, then this call needs to be made to
+ * force the processing of any outstanding AIO operations.
+ */
+ int32_t get_wr_events(timespec* const timeout);
+
+ /**
+ * \brief Stop the journal from accepting any further requests to read or write data.
+ *
+ * This operation is used to stop the journal. This is the normal mechanism for bringing the
+ * journal to an orderly stop. Any outstanding AIO operations or partially written pages in
+ * the write page cache will by flushed and will complete.
+ *
+ * <b>Note:</b> The journal cannot be restarted without either initializing it or restoring
+ * it.
+ *
+ * \param block_till_aio_cmpl If true, will block the thread while waiting for all
+ * outstanding AIO operations to complete.
+ */
+ void stop(const bool block_till_aio_cmpl);
+
+ /**
+ * \brief Force a flush of the write page cache, creating a single AIO write operation.
+ */
+ iores flush(const bool block_till_aio_cmpl);
+
+ inline uint32_t get_enq_cnt() const { return _emap.size(); } // TODO: _emap: Thread safe?
+
+ inline uint32_t get_wr_aio_evt_rem() const { slock l(_wr_mutex); return _wmgr.get_aio_evt_rem(); }
+
+ uint32_t get_wr_outstanding_aio_dblks() const;
+
+ uint32_t get_rd_outstanding_aio_dblks() const;
+
+ LinearFileController& getLinearFileControllerRef();
+
+ /**
+ * \brief Check if a particular rid is enqueued. Note that this function will return
+ * false if the rid is transactionally enqueued and is not committed, or if it is
+ * locked (i.e. transactionally dequeued, but the dequeue has not been committed).
+ */
+ inline bool is_enqueued(const uint64_t rid, bool ignore_lock) { return _emap.is_enqueued(rid, ignore_lock); }
+
+ inline bool is_locked(const uint64_t rid) {
+ if (_emap.is_enqueued(rid, true) < enq_map::EMAP_OK)
+ return false;
+ return _emap.is_locked(rid) == enq_map::EMAP_TRUE;
+ }
+
+ inline void enq_rid_list(std::vector<uint64_t>& rids) { _emap.rid_list(rids); }
+
+ inline void enq_xid_list(std::vector<std::string>& xids) { _tmap.xid_list(xids); }
+
+ inline uint32_t get_open_txn_cnt() const { return _tmap.size(); }
+
+ // TODO Make this a const, but txn_map must support const first.
+ inline txn_map& get_txn_map() { return _tmap; }
+
+ /**
+ * \brief Check if the journal is stopped.
+ *
+ * \return <b><i>true</i></b> if the jouranl is stopped;
+ * <b><i>false</i></b> otherwise.
+ */
+ inline bool is_stopped() { return _stop_flag; }
+
+ /**
+ * \brief Check if the journal is ready to read and write data.
+ *
+ * Checks if the journal is ready to read and write data. This function will return
+ * <b><i>true</i></b> if the journal has been either initialized or restored, and the stop()
+ * function has not been called since the initialization.
+ *
+ * Note that the journal may also be stopped if an internal error occurs (such as running out
+ * of data journal file space).
+ *
+ * \return <b><i>true</i></b> if the journal is ready to read and write data;
+ * <b><i>false</i></b> otherwise.
+ */
+ inline bool is_ready() const { return _init_flag && !_stop_flag; }
+
+ inline bool is_read_only() const { return _readonly_flag; }
+
+ /**
+ * \brief Get the journal directory.
+ *
+ * This returns the journal directory as set during initialization. This is the directory
+ * into which the journal files will be written.
+ */
+ inline const std::string& dirname() const { return _jdir.dirname(); }
+
+ // Management instrumentation callbacks
+ inline virtual void instr_incr_outstanding_aio_cnt() {}
+ inline virtual void instr_decr_outstanding_aio_cnt() {}
+
+ static std::string str2hexnum(const std::string& str);
+
+protected:
+ static bool _init;
+ static bool init_statics();
+
+ /**
+ * \brief Check status of journal before allowing write operations.
+ */
+ void check_wstatus(const char* fn_name) const;
+
+ /**
+ * \brief Check status of journal before allowing read operations.
+ */
+ void check_rstatus(const char* fn_name) const;
+
+ /**
+ * \brief Call that blocks while waiting for all outstanding AIOs to complete
+ */
+ void aio_cmpl_wait();
+
+ /**
+ * \brief Call that blocks until at least one message returns; used to wait for
+ * AIO wait conditions to clear.
+ */
+ bool handle_aio_wait(const iores res, iores& resout, const data_tok* dtp);
+};
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_JCNTL_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/jdir.cpp b/qpid/cpp/src/qpid/linearstore/journal/jdir.cpp
new file mode 100644
index 0000000000..72b94d0098
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/jdir.cpp
@@ -0,0 +1,457 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/jdir.h"
+
+#include <cstring>
+#include <cerrno>
+#include <iomanip>
+#include "qpid/linearstore/journal/jexception.h"
+#include <sys/stat.h>
+#include <unistd.h>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+jdir::jdir(const std::string& dirname/*, const std::string& _base_filename*/):
+ _dirname(dirname)/*,
+ _base_filename(_base_filename)*/
+{}
+
+jdir::~jdir()
+{}
+
+// === create_dir ===
+
+void
+jdir::create_dir()
+{
+ create_dir(_dirname);
+}
+
+
+void
+jdir::create_dir(const char* dirname)
+{
+ create_dir(std::string(dirname));
+}
+
+
+void
+jdir::create_dir(const std::string& dirname)
+{
+ std::size_t fdp = dirname.find_last_of('/');
+ if (fdp != std::string::npos)
+ {
+ std::string parent_dir = dirname.substr(0, fdp);
+ if (!exists(parent_dir))
+ create_dir(parent_dir);
+ }
+ if (::mkdir(dirname.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH))
+ {
+ if (errno != EEXIST) // Dir exists, ignore
+ {
+ std::ostringstream oss;
+ oss << "dir=\"" << dirname << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JDIR_MKDIR, oss.str(), "jdir", "create_dir");
+ }
+ }
+}
+
+
+// === clear_dir ===
+
+void
+jdir::clear_dir(const bool create_flag)
+{
+ clear_dir(_dirname/*, _base_filename*/, create_flag);
+}
+
+void
+jdir::clear_dir(const char* dirname/*, const char* base_filename*/, const bool create_flag)
+{
+ clear_dir(std::string(dirname)/*, std::string(base_filename)*/, create_flag);
+}
+
+
+void
+jdir::clear_dir(const std::string& dirname/*, const std::string&
+#ifndef RHM_JOWRITE
+ base_filename
+#endif
+*/
+ , const bool create_flag)
+{
+ DIR* dir = open_dir(dirname, "clear_dir", true);
+ if (!dir && create_flag) {
+ create_dir(dirname);
+ dir = open_dir(dirname, "clear_dir", true);
+ }
+//#ifndef RHM_JOWRITE
+ struct dirent* entry;
+ bool found = false;
+ std::string bak_dir;
+ while ((entry = ::readdir(dir)) != 0)
+ {
+ // Ignore . and ..
+ if (std::strcmp(entry->d_name, ".") != 0 && std::strcmp(entry->d_name, "..") != 0)
+ {
+ if (std::strlen(entry->d_name) >= 3) // 'bak'
+ {
+ if (std::strncmp(entry->d_name, "bak", 3) == 0)
+ {
+ if (!found)
+ {
+ bak_dir = create_bak_dir(dirname/*, base_filename*/);
+ found = true;
+ }
+ std::ostringstream oldname;
+ oldname << dirname << "/" << entry->d_name;
+ std::ostringstream newname;
+ newname << bak_dir << "/" << entry->d_name;
+ if (::rename(oldname.str().c_str(), newname.str().c_str()))
+ {
+ ::closedir(dir);
+ std::ostringstream oss;
+ oss << "file=\"" << oldname.str() << "\" dest=\"" <<
+ newname.str() << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JDIR_FMOVE, oss.str(), "jdir", "clear_dir");
+ }
+ }
+ }
+ }
+ }
+// FIXME: Find out why this fails with false alarms/errors from time to time...
+// While commented out, there is no error capture from reading dir entries.
+// check_err(errno, dir, dirname, "clear_dir");
+//#endif
+ close_dir(dir, dirname, "clear_dir");
+}
+
+// === push_down ===
+
+std::string
+jdir::push_down(const std::string& dirname, const std::string& target_dir/*, const std::string& bak_dir_base*/)
+{
+ std::string bak_dir_name = create_bak_dir(dirname/*, bak_dir_base*/);
+
+ DIR* dir = open_dir(dirname, "push_down", false);
+ // Copy contents of targetDirName into bak dir
+ struct dirent* entry;
+ while ((entry = ::readdir(dir)) != 0)
+ {
+ // Search for targetDirName in storeDirName
+ if (std::strcmp(entry->d_name, target_dir.c_str()) == 0)
+ {
+ std::ostringstream oldname;
+ oldname << dirname << "/" << target_dir;
+ std::ostringstream newname;
+ newname << bak_dir_name << "/" << target_dir;
+ if (::rename(oldname.str().c_str(), newname.str().c_str()))
+ {
+ ::closedir(dir);
+ std::ostringstream oss;
+ oss << "file=\"" << oldname.str() << "\" dest=\"" << newname.str() << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JDIR_FMOVE, oss.str(), "jdir", "push_down");
+ }
+ break;
+ }
+ }
+ close_dir(dir, dirname, "push_down");
+ return bak_dir_name;
+}
+
+// === verify_dir ===
+
+void
+jdir::verify_dir()
+{
+ verify_dir(_dirname/*, _base_filename*/);
+}
+
+void
+jdir::verify_dir(const char* dirname/*, const char* base_filename*/)
+{
+ verify_dir(std::string(dirname)/*, std::string(base_filename)*/);
+}
+
+
+void
+jdir::verify_dir(const std::string& dirname/*, const std::string& base_filename*/)
+{
+ if (!is_dir(dirname))
+ {
+ std::ostringstream oss;
+ oss << "dir=\"" << dirname << "\"";
+ throw jexception(jerrno::JERR_JDIR_NOTDIR, oss.str(), "jdir", "verify_dir");
+ }
+
+ // Read jinf file, then verify all journal files are present
+// jinf ji(dirname + "/" + base_filename + "." + QLS_JRNL_FILE_EXTENSION, true);
+// for (uint16_t fnum=0; fnum < ji.num_jfiles(); fnum++)
+// {
+// std::ostringstream oss;
+// oss << dirname << "/" << base_filename << ".";
+// oss << std::setw(4) << std::setfill('0') << std::hex << fnum;
+// oss << "." << QLS_JRNL_FILE_EXTENSION;
+// if (!exists(oss.str()))
+// throw jexception(jerrno::JERR_JDIR_NOSUCHFILE, oss.str(), "jdir", "verify_dir");
+// }
+}
+
+
+// === delete_dir ===
+
+void
+jdir::delete_dir(bool children_only)
+{
+ delete_dir(_dirname, children_only);
+}
+
+void
+jdir::delete_dir(const char* dirname, bool children_only)
+{
+ delete_dir(std::string(dirname), children_only);
+}
+
+void
+jdir::delete_dir(const std::string& dirname, bool children_only)
+{
+ struct dirent* entry;
+ struct stat s;
+ DIR* dir = open_dir(dirname, "delete_dir", true); // true = allow dir does not exist, return 0
+ if (!dir) return;
+ while ((entry = ::readdir(dir)) != 0)
+ {
+ // Ignore . and ..
+ if (std::strcmp(entry->d_name, ".") != 0 && std::strcmp(entry->d_name, "..") != 0)
+ {
+ std::string full_name(dirname + "/" + entry->d_name);
+ if (::lstat(full_name.c_str(), &s))
+ {
+ ::closedir(dir);
+ std::ostringstream oss;
+ oss << "stat: file=\"" << full_name << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JDIR_STAT, oss.str(), "jdir", "delete_dir");
+ }
+ if (S_ISREG(s.st_mode) || S_ISLNK(s.st_mode)) // This is a file or slink
+ {
+ if(::unlink(full_name.c_str()))
+ {
+ ::closedir(dir);
+ std::ostringstream oss;
+ oss << "unlink: file=\"" << entry->d_name << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JDIR_UNLINK, oss.str(), "jdir", "delete_dir");
+ }
+ }
+ else if (S_ISDIR(s.st_mode)) // This is a dir
+ {
+ delete_dir(full_name);
+ }
+ else // all other types, throw up!
+ {
+ ::closedir(dir);
+ std::ostringstream oss;
+ oss << "file=\"" << entry->d_name << "\" is not a dir, file or slink.";
+ oss << " (mode=0x" << std::hex << s.st_mode << std::dec << ")";
+ throw jexception(jerrno::JERR_JDIR_BADFTYPE, oss.str(), "jdir", "delete_dir");
+ }
+ }
+ }
+
+// FIXME: Find out why this fails with false alarms/errors from time to time...
+// While commented out, there is no error capture from reading dir entries.
+// check_err(errno, dir, dirname, "delete_dir");
+ // Now dir is empty, close and delete it
+ close_dir(dir, dirname, "delete_dir");
+
+ if (!children_only)
+ if (::rmdir(dirname.c_str()))
+ {
+ std::ostringstream oss;
+ oss << "dir=\"" << dirname << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JDIR_RMDIR, oss.str(), "jdir", "delete_dir");
+ }
+}
+
+
+std::string
+jdir::create_bak_dir(const std::string& dirname)
+{
+ DIR* dir = open_dir(dirname, "create_bak_dir", false);
+ long dir_num = 0L;
+ struct dirent* entry;
+ while ((entry = ::readdir(dir)) != 0)
+ {
+ // Ignore . and ..
+ if (std::strcmp(entry->d_name, ".") != 0 && std::strcmp(entry->d_name, "..") != 0)
+ {
+ if (std::strlen(entry->d_name) == 9) // Format: _bak.XXXX
+ {
+ if (std::strncmp(entry->d_name, "_bak.", 5) == 0)
+ {
+ long this_dir_num = std::strtol(entry->d_name + 5, 0, 16);
+ if (this_dir_num > dir_num)
+ dir_num = this_dir_num;
+ }
+ }
+ }
+ }
+// FIXME: Find out why this fails with false alarms/errors from time to time...
+// While commented out, there is no error capture from reading dir entries.
+// check_err(errno, dir, dirname, "create_bak_dir");
+ close_dir(dir, dirname, "create_bak_dir");
+
+ std::ostringstream dn;
+ dn << dirname << "/_bak." << std::hex << std::setw(4) << std::setfill('0') << ++dir_num;
+ if (::mkdir(dn.str().c_str(), S_IRWXU | S_IRWXG | S_IROTH))
+ {
+ std::ostringstream oss;
+ oss << "dir=\"" << dn.str() << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JDIR_MKDIR, oss.str(), "jdir", "create_bak_dir");
+ }
+ return std::string(dn.str());
+}
+
+bool
+jdir::is_dir(const char* name)
+{
+ struct stat s;
+ if (::stat(name, &s))
+ {
+ std::ostringstream oss;
+ oss << "file=\"" << name << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JDIR_STAT, oss.str(), "jdir", "is_dir");
+ }
+ return S_ISDIR(s.st_mode);
+}
+
+bool
+jdir::is_dir(const std::string& name)
+{
+ return is_dir(name.c_str());
+}
+
+bool
+jdir::exists(const char* name)
+{
+ struct stat s;
+ if (::stat(name, &s))
+ {
+ if (errno == ENOENT) // No such dir or file
+ return false;
+ // Throw for any other condition
+ std::ostringstream oss;
+ oss << "file=\"" << name << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JDIR_STAT, oss.str(), "jdir", "exists");
+ }
+ return true;
+}
+
+bool
+jdir::exists(const std::string& name)
+{
+ return exists(name.c_str());
+}
+
+void
+jdir::read_dir(const std::string& name, std::vector<std::string>& dir_list, const bool incl_dirs, const bool incl_files, const bool incl_links, const bool return_fqfn) {
+ struct stat s;
+ if (is_dir(name)) {
+ DIR* dir = open_dir(name, "read_dir", false);
+ struct dirent* entry;
+ while ((entry = ::readdir(dir)) != 0) {
+ if (std::strcmp(entry->d_name, ".") != 0 && std::strcmp(entry->d_name, "..") != 0) { // Ignore . and ..
+ std::string full_name(name + "/" + entry->d_name);
+ if (::stat(full_name.c_str(), &s))
+ {
+ ::closedir(dir);
+ std::ostringstream oss;
+ oss << "stat: file=\"" << full_name << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JDIR_STAT, oss.str(), "jdir", "delete_dir");
+ }
+ if ((S_ISREG(s.st_mode) && incl_files) || (S_ISDIR(s.st_mode) && incl_dirs) || (S_ISLNK(s.st_mode) && incl_links)) {
+ if (return_fqfn) {
+ dir_list.push_back(name + "/" + entry->d_name);
+ } else {
+ dir_list.push_back(entry->d_name);
+ }
+ }
+ }
+ }
+ close_dir(dir, name, "read_dir");
+ }
+}
+
+void
+jdir::check_err(const int err_num, DIR* dir, const std::string& dir_name, const std::string& fn_name)
+{
+ if (err_num)
+ {
+ std::ostringstream oss;
+ oss << "dir=\"" << dir_name << "\"" << FORMAT_SYSERR(err_num);
+ ::closedir(dir); // Try to close, it makes no sense to trap errors here...
+ throw jexception(jerrno::JERR_JDIR_READDIR, oss.str(), "jdir", fn_name);
+ }
+}
+
+void
+jdir::close_dir(DIR* dir, const std::string& dir_name, const std::string& fn_name)
+{
+ if (::closedir(dir))
+ {
+ std::ostringstream oss;
+ oss << "dir=\"" << dir_name << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JDIR_CLOSEDIR, oss.str(), "jdir", fn_name);
+ }
+}
+
+DIR*
+jdir::open_dir(const std::string& dir_name, const std::string& fn_name, const bool test_enoent)
+{
+ DIR* dir = ::opendir(dir_name.c_str());
+ if (!dir) {
+ if (test_enoent && errno == ENOENT) {
+ return 0;
+ }
+ std::ostringstream oss;
+ oss << "dir=\"" << dir_name << "\"" << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR_JDIR_OPENDIR, oss.str(), "jdir", fn_name);
+ }
+ return dir;
+}
+
+std::ostream&
+operator<<(std::ostream& os, const jdir& jdir)
+{
+ os << jdir._dirname;
+ return os;
+}
+
+std::ostream&
+operator<<(std::ostream& os, const jdir* jdirPtr)
+{
+ os << jdirPtr->_dirname;
+ return os;
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/jdir.h b/qpid/cpp/src/qpid/linearstore/journal/jdir.h
new file mode 100644
index 0000000000..59f21ce499
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/jdir.h
@@ -0,0 +1,362 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_JDIR_H
+#define QPID_LINEARSTORE_JOURNAL_JDIR_H
+
+#include <dirent.h>
+#include <string>
+#include <vector>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+ /**
+ * \class jdir
+ * \brief Class to manage the %journal directory
+ */
+ class jdir
+ {
+ private:
+ std::string _dirname;
+ //std::string _base_filename;
+
+ public:
+
+ /**
+ * \brief Sole constructor
+ *
+ * \param dirname Name of directory to be managed.
+ * \param base_filename Filename root used in the creation of %journal files
+ * and sub-directories.
+ */
+ jdir(const std::string& dirname/*, const std::string& base_filename*/);
+
+ virtual ~jdir();
+
+
+ /**
+ * \brief Create %journal directory as set in the dirname parameter of the constructor.
+ * Recursive creation is supported.
+ *
+ * \exception jerrno::JERR_JDIR_MKDIR The creation of dirname failed.
+ */
+ void create_dir();
+
+ /**
+ * \brief Static function to create a directory. Recursive creation is supported.
+ *
+ * \param dirname C-string containing name of directory.
+ *
+ * \exception jerrno::JERR_JDIR_MKDIR The creation of dirname failed.
+ */
+ static void create_dir(const char* dirname);
+
+ /**
+ * \brief Static function to create a directory. Recursive creation is supported.
+ *
+ * \param dirname String containing name of directory.
+ *
+ * \exception jerrno::JERR_JDIR_MKDIR The creation of dirname failed.
+ */
+ static void create_dir(const std::string& dirname);
+
+
+ /**
+ * \brief Clear the %journal directory of files matching the base filename
+ * by moving them into a subdirectory. This fn uses the dirname and base_filename
+ * that were set on construction.
+ *
+ * \param create_flag If set, create dirname if it is non-existent, otherwise throw
+ * exception.
+ *
+ * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened.
+ * \exception jerrno::JERR_JDIR_FMOVE Moving the files from the %journal directory to the created backup
+ * directory failed.
+ * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed.
+ */
+ void clear_dir(const bool create_flag = true);
+
+ /**
+ * \brief Clear the directory dirname of %journal files matching base_filename
+ * by moving them into a subdirectory.
+ *
+ * \param dirname C-string containing name of %journal directory.
+ * \param base_filename C-string containing base filename of %journal files to be matched
+ * for moving into subdirectory.
+ * \param create_flag If set, create dirname if it is non-existent, otherwise throw
+ * exception
+ *
+ * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened.
+ * \exception jerrno::JERR_JDIR_FMOVE Moving the files from the %journal directory to the created backup
+ * directory failed.
+ * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed.
+ */
+ static void clear_dir(const char* dirname/*, const char* base_filename*/,
+ const bool create_flag = true);
+
+ /**
+ * \brief Clear the directory dirname of %journal files matching base_filename
+ * by moving them into a subdirectory.
+ *
+ * \param dirname String containing name of %journal directory.
+ * \param base_filename String containing base filename of %journal files to be matched
+ * for moving into subdirectory.
+ * \param create_flag If set, create dirname if it is non-existent, otherwise throw
+ * exception
+ *
+ * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened.
+ * \exception jerrno::JERR_JDIR_FMOVE Moving the files from the %journal directory to the created backup
+ * directory failed.
+ * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed.
+ */
+ static void clear_dir(const std::string& dirname/*, const std::string& base_filename*/,
+ const bool create_flag = true);
+
+
+
+ /**
+ * \brief Move (push down) the directory target_dir located in directory dirname into a backup directory
+ * named _bak_dir_base.XXXX (note prepended underscore), where XXXX is an increasing hex serial number
+ * starting at 0000.
+ *
+ * \param dirname Full path to directory containing directory to be pushed down.
+ * \param target_dir Name of directory in dirname to be pushed down.
+ * \param bak_dir_base Base name for backup directory to be created in dirname, into which target_dir will be moved.
+ * \return Name of backup dir into which target_dir was pushed.
+ */
+ static std::string push_down(const std::string& dirname, const std::string& target_dir/*, const std::string& bak_dir_base*/);
+
+
+ /**
+ * \brief Verify that dirname is a valid %journal directory.
+ *
+ * The validation reads the .%jinf file, and using this information verifies that all the expected %journal
+ * (.jdat) files are present.
+ *
+ * \exception jerrno::JERR_JDIR_NOTDIR dirname is not a directory
+ * \exception jerrno::JERR_JDIR_STAT Could not stat dirname
+ * \exception jerrno::JERR__FILEIO Error reading %jinf file
+ * \exception jerrno::JERR_JINF_CVALIDFAIL Error validating %jinf file
+ * \exception jerrno::JERR_JDIR_NOSUCHFILE Expected jdat file is missing
+ */
+ void verify_dir();
+
+ /**
+ * \brief Verify that dirname is a valid %journal directory.
+ *
+ * The validation reads the .%jinf file, and using this information verifies that all the expected %journal
+ * (.jdat) files are present.
+ *
+ * \param dirname C-string containing name of %journal directory.
+ * \param base_filename C-string containing base filename of %journal files to be matched for moving into sub-directory.
+ *
+ * \exception jerrno::JERR_JDIR_NOTDIR dirname is not a directory
+ * \exception jerrno::JERR_JDIR_STAT Could not stat dirname
+ * \exception jerrno::JERR__FILEIO Error reading %jinf file
+ * \exception jerrno::JERR_JINF_CVALIDFAIL Error validating %jinf file
+ * \exception jerrno::JERR_JDIR_NOSUCHFILE Expected jdat file is missing
+ */
+ static void verify_dir(const char* dirname/*, const char* base_filename*/);
+
+ /**
+ * \brief Verify that dirname is a valid %journal directory.
+ *
+ * The validation reads the .%jinf file, and using this information verifies that all the expected %journal
+ * (.jdat) files are present.
+ *
+ * \param dirname String containing name of %journal directory.
+ * \param base_filename String containing base filename of %journal files to be matched for moving into sub-directory.
+ *
+ * \exception jerrno::JERR_JDIR_NOTDIR dirname is not a directory
+ * \exception jerrno::JERR_JDIR_STAT Could not stat dirname
+ * \exception jerrno::JERR__FILEIO Error reading %jinf file
+ * \exception jerrno::JERR_JINF_CVALIDFAIL Error validating %jinf file
+ * \exception jerrno::JERR_JDIR_NOSUCHFILE Expected jdat file is missing
+ */
+ static void verify_dir(const std::string& dirname/*, const std::string& base_filename*/);
+
+ /**
+ * \brief Delete the %journal directory and all files and sub--directories that it may
+ * contain. This is equivilent of rm -rf.
+ *
+ * FIXME: links are not handled correctly.
+ *
+ * \param children_only If true, delete only children of dirname, but leave dirname itself.
+ *
+ * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened.
+ * \exception jerrno::JERR_JDIR_STAT Could not stat dirname.
+ * \exception jerrno::JERR_JDIR_UNLINK A file could not be deleted.
+ * \exception jerrno::JERR_JDIR_BADFTYPE A dir entry is neiter a file nor a dir.
+ * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed.
+ * \exception jerrno::JERR_JDIR_RMDIR A directory could not be deleted.
+ */
+ void delete_dir(bool children_only = false );
+
+ /**
+ * \brief Delete the %journal directory and all files and sub--directories that it may
+ * contain. This is equivilent of rm -rf.
+ *
+ * FIXME: links are not handled correctly.
+ *
+ * \param dirname C-string containing name of directory to be deleted.
+ * \param children_only If true, delete only children of dirname, but leave dirname itself.
+ *
+ * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened.
+ * \exception jerrno::JERR_JDIR_STAT Could not stat dirname.
+ * \exception jerrno::JERR_JDIR_UNLINK A file could not be deleted.
+ * \exception jerrno::JERR_JDIR_BADFTYPE A dir entry is neiter a file nor a dir.
+ * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed.
+ * \exception jerrno::JERR_JDIR_RMDIR A directory could not be deleted.
+ */
+ static void delete_dir(const char* dirname, bool children_only = false);
+
+ /**
+ * \brief Delete the %journal directory and all files and sub--directories that it may
+ * contain. This is equivilent of rm -rf.
+ *
+ * FIXME: links are not handled correctly.
+ *
+ * \param dirname String containing name of directory to be deleted.
+ * \param children_only If true, delete only children of dirname, but leave dirname itself.
+ *
+ * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened.
+ * \exception jerrno::JERR_JDIR_STAT Could not stat dirname.
+ * \exception jerrno::JERR_JDIR_UNLINK A file could not be deleted.
+ * \exception jerrno::JERR_JDIR_BADFTYPE A dir entry is neiter a file nor a dir.
+ * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed.
+ * \exception jerrno::JERR_JDIR_RMDIR A directory could not be deleted.
+ */
+ static void delete_dir(const std::string& dirname, bool children_only = false);
+
+ /**
+ * \brief Create bakup directory that is next in sequence and move all %journal files
+ * matching base_filename into it.
+ *
+ * In directory dirname, search for existing backup directory using pattern
+ * "_basename.bak.XXXX" where XXXX is a hexadecimal sequence, and create next directory
+ * based on highest number found. Move all %journal files which match the base_fileaname
+ * parameter into this new backup directory.
+ *
+ * \param dirname String containing name of %journal directory.
+ * \param base_filename String containing base filename of %journal files to be matched
+ * for moving into subdirectory.
+ *
+ * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened.
+ * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed.
+ * \exception jerrno::JERR_JDIR_MKDIR The backup directory could not be deleted.
+ */
+ static std::string create_bak_dir(const std::string& dirname/*,
+ const std::string& base_filename*/);
+
+ /**
+ * \brief Return the directory name as a string.
+ */
+ inline const std::string& dirname() const { return _dirname; }
+
+ /**
+ * \brief Return the %journal base filename name as a string.
+ */
+// inline const std::string& base_filename() const { return _base_filename; }
+
+ /**
+ * \brief Test whether the named file is a directory.
+ *
+ * \param name Name of file to be tested.
+ * \return <b><i>true</i></b> if the named file is a directory; <b><i>false</i></b>
+ * otherwise.
+ * \exception jerrno::JERR_JDIR_STAT Could not stat name.
+ */
+ static bool is_dir(const char* name);
+
+ /**
+ * \brief Test whether the named file is a directory.
+ *
+ * \param name Name of file to be tested.
+ * \return <b><i>true</i></b> if the named file is a directory; <b><i>false</i></b>
+ * otherwise.
+ * \exception jerrno::JERR_JDIR_STAT Could not stat name.
+ */
+ static bool is_dir(const std::string& name);
+
+
+ /**
+ * \brief Test whether the named entity exists on the filesystem.
+ *
+ * If stat() fails with error ENOENT, then this will return <b><i>false</i></b>. If
+ * stat() succeeds, then <b><i>true</i></b> is returned, irrespective of the file type.
+ * If stat() fails with any other error, an exception is thrown.
+ *
+ * \param name Name of entity to be tested.
+ * \return <b><i>true</i></b> if the named entity exists; <b><i>false</i></b>
+ * otherwise.
+ * \exception jerrno::JERR_JDIR_STAT Could not stat name.
+ */
+ static bool exists(const char* name);
+
+ /**
+ * \brief Test whether the named entity exists on the filesystem.
+ *
+ * If stat() fails with error ENOENT, then this will return <b><i>false</i></b>. If
+ * stat() succeeds, then <b><i>true</i></b> is returned, irrespective of the file type.
+ * If stat() fails with any other error, an exception is thrown.
+ *
+ * \param name Name of entity to be tested.
+ * \return <b><i>true</i></b> if the named entity exists; <b><i>false</i></b>
+ * otherwise.
+ * \exception jerrno::JERR_JDIR_STAT Could not stat name.
+ */
+ static bool exists(const std::string& name);
+
+ static void read_dir(const std::string& name, std::vector<std::string>& dir_list, const bool incl_dirs, const bool incl_files, const bool incl_links, const bool return_fqfn);
+
+ /**
+ * \brief Stream operator
+ */
+ friend std::ostream& operator<<(std::ostream& os, const jdir& jdir);
+
+ /**
+ * \brief Stream operator
+ */
+ friend std::ostream& operator<<(std::ostream& os, const jdir* jdirPtr);
+
+ private:
+ /**
+ * \brief Check for error, if non-zero close DIR handle and throw JERR_JDIR_READDIR
+ *
+ * \exception jerrno::JERR_JDIR_READDIR Error while reading contents of dir.
+ */
+ static void check_err(const int err_num, DIR* dir, const std::string& dir_name, const std::string& fn_name);
+
+ /**
+ * \brief Close a DIR handle, throw JERR_JDIR_CLOSEDIR if error occurs during close
+ *
+ * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed.
+ */
+ static void close_dir(DIR* dir, const std::string& dir_name, const std::string& fn_name);
+
+ static DIR* open_dir(const std::string& dir_name, const std::string& fn_name, const bool test_enoent);
+ };
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_JDIR_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp b/qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp
new file mode 100644
index 0000000000..ce88e7809c
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp
@@ -0,0 +1,236 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/jerrno.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+std::map<uint32_t, const char*> jerrno::_err_map;
+std::map<uint32_t, const char*>::iterator jerrno::_err_map_itr;
+bool jerrno::_initialized = jerrno::__init();
+
+// generic errors
+const uint32_t jerrno::JERR__MALLOC = 0x0100;
+const uint32_t jerrno::JERR__UNDERFLOW = 0x0101;
+const uint32_t jerrno::JERR__NINIT = 0x0102;
+const uint32_t jerrno::JERR__AIO = 0x0103;
+const uint32_t jerrno::JERR__FILEIO = 0x0104;
+const uint32_t jerrno::JERR__RTCLOCK = 0x0105;
+const uint32_t jerrno::JERR__PTHREAD = 0x0106;
+const uint32_t jerrno::JERR__TIMEOUT = 0x0107;
+const uint32_t jerrno::JERR__UNEXPRESPONSE = 0x0108;
+const uint32_t jerrno::JERR__RECNFOUND = 0x0109;
+const uint32_t jerrno::JERR__NOTIMPL = 0x010a;
+const uint32_t jerrno::JERR__NULL = 0x010b;
+const uint32_t jerrno::JERR__SYMLINK = 0x010c;
+
+// class jcntl
+const uint32_t jerrno::JERR_JCNTL_STOPPED = 0x0200;
+const uint32_t jerrno::JERR_JCNTL_READONLY = 0x0201;
+const uint32_t jerrno::JERR_JCNTL_AIOCMPLWAIT = 0x0202;
+const uint32_t jerrno::JERR_JCNTL_UNKNOWNMAGIC = 0x0203;
+const uint32_t jerrno::JERR_JCNTL_NOTRECOVERED = 0x0204;
+const uint32_t jerrno::JERR_JCNTL_ENQSTATE = 0x0207;
+const uint32_t jerrno::JERR_JCNTL_INVALIDENQHDR = 0x0208;
+
+// class jdir
+const uint32_t jerrno::JERR_JDIR_NOTDIR = 0x0300;
+const uint32_t jerrno::JERR_JDIR_MKDIR = 0x0301;
+const uint32_t jerrno::JERR_JDIR_OPENDIR = 0x0302;
+const uint32_t jerrno::JERR_JDIR_READDIR = 0x0303;
+const uint32_t jerrno::JERR_JDIR_CLOSEDIR = 0x0304;
+const uint32_t jerrno::JERR_JDIR_RMDIR = 0x0305;
+const uint32_t jerrno::JERR_JDIR_NOSUCHFILE = 0x0306;
+const uint32_t jerrno::JERR_JDIR_FMOVE = 0x0307;
+const uint32_t jerrno::JERR_JDIR_STAT = 0x0308;
+const uint32_t jerrno::JERR_JDIR_UNLINK = 0x0309;
+const uint32_t jerrno::JERR_JDIR_BADFTYPE = 0x030a;
+
+// class JournalFile
+const uint32_t jerrno::JERR_JNLF_OPEN = 0x0400;
+const uint32_t jerrno::JERR_JNLF_CLOSE = 0x0401;
+const uint32_t jerrno::JERR_JNLF_FILEOFFSOVFL = 0x0402;
+const uint32_t jerrno::JERR_JNLF_CMPLOFFSOVFL = 0x0403;
+
+// class LinearFileController
+const uint32_t jerrno::JERR_LFCR_SEQNUMNOTFOUND = 0x0500;
+
+// class jrec, enq_rec, deq_rec, txn_rec
+const uint32_t jerrno::JERR_JREC_BADRECHDR = 0x0700;
+const uint32_t jerrno::JERR_JREC_BADRECTAIL = 0x0701;
+
+// class wmgr
+const uint32_t jerrno::JERR_WMGR_BADPGSTATE = 0x0801;
+const uint32_t jerrno::JERR_WMGR_BADDTOKSTATE = 0x0802;
+const uint32_t jerrno::JERR_WMGR_ENQDISCONT = 0x0803;
+const uint32_t jerrno::JERR_WMGR_DEQDISCONT = 0x0804;
+const uint32_t jerrno::JERR_WMGR_DEQRIDNOTENQ = 0x0805;
+const uint32_t jerrno::JERR_WMGR_BADFH = 0x0806;
+const uint32_t jerrno::JERR_WMGR_NOTSBLKALIGNED = 0x0807;
+
+// class RecoveryManager
+const uint32_t jerrno::JERR_RCVM_OPENRD = 0x0900;
+const uint32_t jerrno::JERR_RCVM_STREAMBAD = 0x0901;
+const uint32_t jerrno::JERR_RCVM_READ = 0x0902;
+const uint32_t jerrno::JERR_RCVM_WRITE = 0x0903;
+const uint32_t jerrno::JERR_RCVM_NULLXID = 0x0904;
+const uint32_t jerrno::JERR_RCVM_NOTDBLKALIGNED = 0x0905;
+const uint32_t jerrno::JERR_RCVM_NULLFID = 0x0907;
+const uint32_t jerrno::JERR_RCVM_INVALIDEFPID = 0x0908;
+
+// class data_tok
+const uint32_t jerrno::JERR_DTOK_ILLEGALSTATE = 0x0a00;
+// const uint32_t jerrno::JERR_DTOK_RIDNOTSET = 0x0a01;
+
+// class enq_map, txn_map
+const uint32_t jerrno::JERR_MAP_DUPLICATE = 0x0b00;
+const uint32_t jerrno::JERR_MAP_NOTFOUND = 0x0b01;
+const uint32_t jerrno::JERR_MAP_LOCKED = 0x0b02;
+
+// EFP errors
+const uint32_t jerrno::JERR_EFP_BADPARTITIONNAME = 0x0d01;
+const uint32_t jerrno::JERR_EFP_BADPARTITIONDIR = 0x0d02;
+const uint32_t jerrno::JERR_EFP_BADEFPDIRNAME = 0x0d03;
+const uint32_t jerrno::JERR_EFP_NOEFP = 0x0d04;
+const uint32_t jerrno::JERR_EFP_EMPTY = 0x0d05;
+const uint32_t jerrno::JERR_EFP_LSTAT = 0x0d06;
+const uint32_t jerrno::JERR_EFP_BADFILETYPE = 0x0d07;
+const uint32_t jerrno::JERR_EFP_FOPEN = 0x0d08;
+const uint32_t jerrno::JERR_EFP_FWRITE = 0x0d09;
+const uint32_t jerrno::JERR_EFP_MKDIR = 0x0d0a;
+
+// Negative returns for some functions
+const int32_t jerrno::AIO_TIMEOUT = -1;
+const int32_t jerrno::LOCK_TAKEN = -2;
+
+
+// static initialization fn
+
+bool
+jerrno::__init()
+{
+ // generic errors
+ _err_map[JERR__MALLOC] = "JERR__MALLOC: Buffer memory allocation failed.";
+ _err_map[JERR__UNDERFLOW] = "JERR__UNDERFLOW: Underflow error";
+ _err_map[JERR__NINIT] = "JERR__NINIT: Operation on uninitialized class.";
+ _err_map[JERR__AIO] = "JERR__AIO: AIO error.";
+ _err_map[JERR__FILEIO] = "JERR__FILEIO: File read or write failure.";
+ _err_map[JERR__RTCLOCK] = "JERR__RTCLOCK: Reading real-time clock failed.";
+ _err_map[JERR__PTHREAD] = "JERR__PTHREAD: pthread failure.";
+ _err_map[JERR__TIMEOUT] = "JERR__TIMEOUT: Timeout waiting for event.";
+ _err_map[JERR__UNEXPRESPONSE] = "JERR__UNEXPRESPONSE: Unexpected response to call or event.";
+ _err_map[JERR__RECNFOUND] = "JERR__RECNFOUND: Record not found.";
+ _err_map[JERR__NOTIMPL] = "JERR__NOTIMPL: Not implemented";
+ _err_map[JERR__NULL] = "JERR__NULL: Operation on null pointer";
+ _err_map[JERR__SYMLINK] = "JERR__SYMLINK: Symbolic link operation failed";
+
+ // class jcntl
+ _err_map[JERR_JCNTL_STOPPED] = "JERR_JCNTL_STOPPED: Operation on stopped journal.";
+ _err_map[JERR_JCNTL_READONLY] = "JERR_JCNTL_READONLY: Write operation on read-only journal (during recovery).";
+ _err_map[JERR_JCNTL_AIOCMPLWAIT] = "JERR_JCNTL_AIOCMPLWAIT: Timeout waiting for AIOs to complete.";
+ _err_map[JERR_JCNTL_UNKNOWNMAGIC] = "JERR_JCNTL_UNKNOWNMAGIC: Found record with unknown magic.";
+ _err_map[JERR_JCNTL_NOTRECOVERED] = "JERR_JCNTL_NOTRECOVERED: Operation requires recover() to be run first.";
+ _err_map[JERR_JCNTL_ENQSTATE] = "JERR_JCNTL_ENQSTATE: Read error: Record not in ENQ state";
+ _err_map[JERR_JCNTL_INVALIDENQHDR] = "JERR_JCNTL_INVALIDENQHDR: Invalid ENQ header";
+
+ // class jdir
+ _err_map[JERR_JDIR_NOTDIR] = "JERR_JDIR_NOTDIR: Directory name exists but is not a directory.";
+ _err_map[JERR_JDIR_MKDIR] = "JERR_JDIR_MKDIR: Directory creation failed.";
+ _err_map[JERR_JDIR_OPENDIR] = "JERR_JDIR_OPENDIR: Directory open failed.";
+ _err_map[JERR_JDIR_READDIR] = "JERR_JDIR_READDIR: Directory read failed.";
+ _err_map[JERR_JDIR_CLOSEDIR] = "JERR_JDIR_CLOSEDIR: Directory close failed.";
+ _err_map[JERR_JDIR_RMDIR] = "JERR_JDIR_RMDIR: Directory delete failed.";
+ _err_map[JERR_JDIR_NOSUCHFILE] = "JERR_JDIR_NOSUCHFILE: File does not exist.";
+ _err_map[JERR_JDIR_FMOVE] = "JERR_JDIR_FMOVE: File move failed.";
+ _err_map[JERR_JDIR_STAT] = "JERR_JDIR_STAT: File stat failed.";
+ _err_map[JERR_JDIR_UNLINK] = "JERR_JDIR_UNLINK: File delete failed.";
+ _err_map[JERR_JDIR_BADFTYPE] = "JERR_JDIR_BADFTYPE: Bad or unknown file type (stat mode).";
+
+ // class JournalFile
+ _err_map[JERR_JNLF_OPEN] = "JERR_JNLF_OPEN: Unable to open file for write";
+ _err_map[JERR_JNLF_CLOSE] = "JERR_JNLF_CLOSE: Unable to close file";
+ _err_map[JERR_JNLF_FILEOFFSOVFL] = "JERR_JNLF_FILEOFFSOVFL: Attempted to increase submitted offset past file size.";
+ _err_map[JERR_JNLF_CMPLOFFSOVFL] = "JERR_JNLF_CMPLOFFSOVFL: Attempted to increase completed file offset past submitted offset.";
+
+ // class LinearFileController
+ _err_map[JERR_LFCR_SEQNUMNOTFOUND] = "JERR_LFCR_SEQNUMNOTFOUND: File sequence number not found";
+
+ // class jrec, enq_rec, deq_rec, txn_rec
+ _err_map[JERR_JREC_BADRECHDR] = "JERR_JREC_BADRECHDR: Invalid record header.";
+ _err_map[JERR_JREC_BADRECTAIL] = "JERR_JREC_BADRECTAIL: Invalid record tail.";
+
+ // class wmgr
+ _err_map[JERR_WMGR_BADPGSTATE] = "JERR_WMGR_BADPGSTATE: Page buffer in illegal state for operation.";
+ _err_map[JERR_WMGR_BADDTOKSTATE] = "JERR_WMGR_BADDTOKSTATE: Data token in illegal state for operation.";
+ _err_map[JERR_WMGR_ENQDISCONT] = "JERR_WMGR_ENQDISCONT: Enqueued new dtok when previous enqueue returned partly completed (state ENQ_PART).";
+ _err_map[JERR_WMGR_DEQDISCONT] = "JERR_WMGR_DEQDISCONT: Dequeued new dtok when previous dequeue returned partly completed (state DEQ_PART).";
+ _err_map[JERR_WMGR_DEQRIDNOTENQ] = "JERR_WMGR_DEQRIDNOTENQ: Dequeue rid is not enqueued.";
+ _err_map[JERR_WMGR_BADFH] = "JERR_WMGR_BADFH: Bad file handle.";
+ _err_map[JERR_WMGR_NOTSBLKALIGNED] = "JERR_WMGR_NOTSBLKALIGNED: Offset is not soft block (sblk)-aligned";
+
+ // class RecoveryManager
+ _err_map[JERR_RCVM_OPENRD] = "JERR_RCVM_OPENRD: Unable to open file for read";
+ _err_map[JERR_RCVM_STREAMBAD] = "JERR_RCVM_STREAMBAD: Read/write stream error";
+ _err_map[JERR_RCVM_READ] = "JERR_RCVM_READ: Read error: no or insufficient data to read";
+ _err_map[JERR_RCVM_WRITE] = "JERR_RCVM_WRITE: Write error";
+ _err_map[JERR_RCVM_NULLXID] = "JERR_RCVM_NULLXID: Null XID when XID length non-null in header";
+ _err_map[JERR_RCVM_NOTDBLKALIGNED] = "JERR_RCVM_NOTDBLKALIGNED: Offset is not data block (dblk)-aligned";
+ _err_map[JERR_RCVM_NULLFID] = "JERR_RCVM_NULLFID: Null file id (FID)";
+ _err_map[JERR_RCVM_INVALIDEFPID] = "JERR_RCVM_INVALIDEFPID: Invalid EFP identity (partition/size)";
+
+ // class data_tok
+ _err_map[JERR_DTOK_ILLEGALSTATE] = "JERR_MTOK_ILLEGALSTATE: Attempted to change to illegal state.";
+ //_err_map[JERR_DTOK_RIDNOTSET] = "JERR_DTOK_RIDNOTSET: Record ID not set.";
+
+ // class enq_map, txn_map
+ _err_map[JERR_MAP_DUPLICATE] = "JERR_MAP_DUPLICATE: Attempted to insert record into map using duplicate key.";
+ _err_map[JERR_MAP_NOTFOUND] = "JERR_MAP_NOTFOUND: Key not found in map.";
+ _err_map[JERR_MAP_LOCKED] = "JERR_MAP_LOCKED: Record ID locked by a pending transaction.";
+
+ // EFP errors
+ _err_map[JERR_EFP_BADPARTITIONNAME] = "JERR_EFP_BADPARTITIONNAME: Invalid partition name (must be \'pNNN\' where NNN is a non-zero number)";
+ _err_map[JERR_EFP_BADEFPDIRNAME] = "JERR_EFP_BADEFPDIRNAME: Bad Empty File Pool directory name (must be \'NNNk\', where NNN is a number which is a multiple of 4)";
+ _err_map[JERR_EFP_BADPARTITIONDIR] = "JERR_EFP_BADPARTITIONDIR: Invalid partition directory";
+ _err_map[JERR_EFP_NOEFP] = "JERR_EFP_NOEFP: No Empty File Pool found for given partition and empty file size";
+ _err_map[JERR_EFP_EMPTY] = "JERR_EFP_EMPTY: Empty File Pool is empty";
+ _err_map[JERR_EFP_LSTAT] = "JERR_EFP_LSTAT: lstat() operation failed";
+ _err_map[JERR_EFP_BADFILETYPE] = "JERR_EFP_BADFILETYPE: File type incorrect for operation";
+ _err_map[JERR_EFP_FOPEN] = "JERR_EFP_FOPEN: Unable to fopen file for write";
+ _err_map[JERR_EFP_FWRITE] = "JERR_EFP_FWRITE: Write failed";
+ _err_map[JERR_EFP_MKDIR] = "JERR_EFP_MKDIR: Directory creation failed";
+
+ //_err_map[] = "";
+
+ return true;
+}
+
+const char*
+jerrno::err_msg(const uint32_t err_no) throw ()
+{
+ _err_map_itr = _err_map.find(err_no);
+ if (_err_map_itr == _err_map.end())
+ return "<Unknown error code>";
+ return _err_map_itr->second;
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/jerrno.h b/qpid/cpp/src/qpid/linearstore/journal/jerrno.h
new file mode 100644
index 0000000000..6e817682ca
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/jerrno.h
@@ -0,0 +1,157 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_JERRNO_H
+#define QPID_LINEARSTORE_JOURNAL_JERRNO_H
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+class jerrno;
+}}}
+
+#include <map>
+#include <stdint.h>
+#include <string>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+ /**
+ * \class jerrno
+ * \brief Class containing static error definitions and static map for error messages.
+ */
+ class jerrno
+ {
+ static std::map<uint32_t, const char*> _err_map; ///< Map of error messages
+ static std::map<uint32_t, const char*>::iterator _err_map_itr; ///< Iterator
+ static bool _initialized; ///< Dummy flag, used to initialise map.
+
+ public:
+ // generic errors
+ static const uint32_t JERR__MALLOC; ///< Buffer memory allocation failed
+ static const uint32_t JERR__UNDERFLOW; ///< Underflow error
+ static const uint32_t JERR__NINIT; ///< Operation on uninitialized class
+ static const uint32_t JERR__AIO; ///< AIO failure
+ static const uint32_t JERR__FILEIO; ///< File read or write failure
+ static const uint32_t JERR__RTCLOCK; ///< Reading real-time clock failed
+ static const uint32_t JERR__PTHREAD; ///< pthread failure
+ static const uint32_t JERR__TIMEOUT; ///< Timeout waiting for an event
+ static const uint32_t JERR__UNEXPRESPONSE; ///< Unexpected response to call or event
+ static const uint32_t JERR__RECNFOUND; ///< Record not found
+ static const uint32_t JERR__NOTIMPL; ///< Not implemented
+ static const uint32_t JERR__NULL; ///< Operation on null pointer
+ static const uint32_t JERR__SYMLINK; ///< Symbolic Link operation failed
+
+ // class jcntl
+ static const uint32_t JERR_JCNTL_STOPPED; ///< Operation on stopped journal
+ static const uint32_t JERR_JCNTL_READONLY; ///< Write operation on read-only journal
+ static const uint32_t JERR_JCNTL_AIOCMPLWAIT; ///< Timeout waiting for AIOs to complete
+ static const uint32_t JERR_JCNTL_UNKNOWNMAGIC; ///< Found record with unknown magic
+ static const uint32_t JERR_JCNTL_NOTRECOVERED; ///< Req' recover() to be called first
+ static const uint32_t JERR_JCNTL_ENQSTATE; ///< Read error: Record not in ENQ state
+ static const uint32_t JERR_JCNTL_INVALIDENQHDR; ///< Invalid ENQ header
+
+ // class jdir
+ static const uint32_t JERR_JDIR_NOTDIR; ///< Exists but is not a directory
+ static const uint32_t JERR_JDIR_MKDIR; ///< Directory creation failed
+ static const uint32_t JERR_JDIR_OPENDIR; ///< Directory open failed
+ static const uint32_t JERR_JDIR_READDIR; ///< Directory read failed
+ static const uint32_t JERR_JDIR_CLOSEDIR; ///< Directory close failed
+ static const uint32_t JERR_JDIR_RMDIR; ///< Directory delete failed
+ static const uint32_t JERR_JDIR_NOSUCHFILE; ///< File does not exist
+ static const uint32_t JERR_JDIR_FMOVE; ///< File move failed
+ static const uint32_t JERR_JDIR_STAT; ///< File stat failed
+ static const uint32_t JERR_JDIR_UNLINK; ///< File delete failed
+ static const uint32_t JERR_JDIR_BADFTYPE; ///< Bad or unknown file type (stat mode)
+
+ // class JournalFile
+ static const uint32_t JERR_JNLF_OPEN; ///< Unable to open file for write
+ static const uint32_t JERR_JNLF_CLOSE; ///< Unable to close file
+ static const uint32_t JERR_JNLF_FILEOFFSOVFL; ///< Increased offset past file size
+ static const uint32_t JERR_JNLF_CMPLOFFSOVFL; ///< Increased cmpl offs past subm offs
+
+ // class LinearFileController
+ static const uint32_t JERR_LFCR_SEQNUMNOTFOUND; ///< File sequence number not found
+
+ // class jrec, enq_rec, deq_rec, txn_rec
+ static const uint32_t JERR_JREC_BADRECHDR; ///< Invalid data record header
+ static const uint32_t JERR_JREC_BADRECTAIL; ///< Invalid data record tail
+
+ // class wmgr
+ static const uint32_t JERR_WMGR_BADPGSTATE; ///< Page buffer in illegal state.
+ static const uint32_t JERR_WMGR_BADDTOKSTATE; ///< Data token in illegal state.
+ static const uint32_t JERR_WMGR_ENQDISCONT; ///< Enq. new dtok when previous part compl.
+ static const uint32_t JERR_WMGR_DEQDISCONT; ///< Deq. new dtok when previous part compl.
+ static const uint32_t JERR_WMGR_DEQRIDNOTENQ; ///< Deq. rid not enqueued
+ static const uint32_t JERR_WMGR_BADFH; ///< Bad file handle
+ static const uint32_t JERR_WMGR_NOTSBLKALIGNED; ///< Offset is not soft block (sblk)-aligned
+
+ // class RecoveryManager
+ static const uint32_t JERR_RCVM_OPENRD; ///< Unable to open file for read
+ static const uint32_t JERR_RCVM_STREAMBAD; ///< Read/write stream error
+ static const uint32_t JERR_RCVM_READ; ///< Read error: no or insufficient data to read
+ static const uint32_t JERR_RCVM_WRITE; ///< Write error
+ static const uint32_t JERR_RCVM_NULLXID; ///< Null XID when XID length non-null in header
+ static const uint32_t JERR_RCVM_NOTDBLKALIGNED; ///< Offset is not data block (dblk)-aligned
+ static const uint32_t JERR_RCVM_NULLFID; ///< Null file ID (FID)
+ static const uint32_t JERR_RCVM_INVALIDEFPID; ///< Invalid EFP identity (partition/size)
+
+ // class data_tok
+ static const uint32_t JERR_DTOK_ILLEGALSTATE; ///< Attempted to change to illegal state
+// static const uint32_t JERR_DTOK_RIDNOTSET; ///< Record ID not set
+
+ // class enq_map, txn_map
+ static const uint32_t JERR_MAP_DUPLICATE; ///< Attempted to insert using duplicate key
+ static const uint32_t JERR_MAP_NOTFOUND; ///< Key not found in map
+ static const uint32_t JERR_MAP_LOCKED; ///< rid locked by pending txn
+
+ // EFP errors
+ static const uint32_t JERR_EFP_BADPARTITIONNAME; ///< Partition name invalid or of value 0
+ static const uint32_t JERR_EFP_BADEFPDIRNAME; ///< Empty File Pool directory name invalid
+ static const uint32_t JERR_EFP_BADPARTITIONDIR; ///< Invalid partition directory
+ static const uint32_t JERR_EFP_NOEFP; ///< No EFP found for given partition and file size
+ static const uint32_t JERR_EFP_EMPTY; ///< EFP empty
+ static const uint32_t JERR_EFP_LSTAT; ///< lstat operation failed
+ static const uint32_t JERR_EFP_BADFILETYPE; ///< Bad file type
+ static const uint32_t JERR_EFP_FOPEN; ///< Unable to fopen file for write
+ static const uint32_t JERR_EFP_FWRITE; ///< Write failed
+ static const uint32_t JERR_EFP_MKDIR; ///< Directory creation failed
+
+ // Negative returns for some functions
+ static const int32_t AIO_TIMEOUT; ///< Timeout waiting for AIO return
+ static const int32_t LOCK_TAKEN; ///< Attempted to take lock, but it was taken by another thread
+ /**
+ * \brief Method to access error message from known error number.
+ */
+ static const char* err_msg(const uint32_t err_no) throw ();
+
+ private:
+ /**
+ * \brief Static function to initialize map.
+ */
+ static bool __init();
+ };
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_JERRNO_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/jexception.cpp b/qpid/cpp/src/qpid/linearstore/journal/jexception.cpp
new file mode 100644
index 0000000000..49f486746a
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/jexception.cpp
@@ -0,0 +1,168 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/jexception.h"
+
+#include <iomanip>
+
+#define CATLEN(p) MAX_MSG_SIZE - std::strlen(p) - 1
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+jexception::jexception() throw ():
+ std::exception(),
+ _err_code(0)
+{
+ format();
+}
+
+jexception::jexception(const uint32_t err_code) throw ():
+ std::exception(),
+ _err_code(err_code)
+{
+ format();
+}
+
+jexception::jexception(const char* additional_info) throw ():
+ std::exception(),
+ _err_code(0),
+ _additional_info(additional_info)
+{
+ format();
+}
+
+jexception::jexception(const std::string& additional_info) throw ():
+ std::exception(),
+ _err_code(0),
+ _additional_info(additional_info)
+{
+ format();
+}
+
+jexception::jexception(const uint32_t err_code, const char* additional_info) throw ():
+ std::exception(),
+ _err_code(err_code),
+ _additional_info(additional_info)
+{
+ format();
+}
+
+jexception::jexception(const uint32_t err_code, const std::string& additional_info) throw ():
+ std::exception(),
+ _err_code(err_code),
+ _additional_info(additional_info)
+{
+ format();
+}
+
+jexception::jexception(const uint32_t err_code, const char* throwing_class,
+ const char* throwing_fn) throw ():
+ std::exception(),
+ _err_code(err_code),
+ _throwing_class(throwing_class),
+ _throwing_fn(throwing_fn)
+{
+ format();
+}
+
+jexception::jexception(const uint32_t err_code, const std::string& throwing_class,
+ const std::string& throwing_fn) throw ():
+ std::exception(),
+ _err_code(err_code),
+ _throwing_class(throwing_class),
+ _throwing_fn(throwing_fn)
+{
+ format();
+}
+
+jexception::jexception(const uint32_t err_code, const char* additional_info,
+ const char* throwing_class, const char* throwing_fn) throw ():
+ std::exception(),
+ _err_code(err_code),
+ _additional_info(additional_info),
+ _throwing_class(throwing_class),
+ _throwing_fn(throwing_fn)
+{
+ format();
+}
+
+jexception::jexception(const uint32_t err_code, const std::string& additional_info,
+ const std::string& throwing_class, const std::string& throwing_fn) throw ():
+ std::exception(),
+ _err_code(err_code),
+ _additional_info(additional_info),
+ _throwing_class(throwing_class),
+ _throwing_fn(throwing_fn)
+{
+ format();
+}
+
+jexception::~jexception() throw ()
+{}
+
+void
+jexception::format()
+{
+ const bool ai = !_additional_info.empty();
+ const bool tc = !_throwing_class.empty();
+ const bool tf = !_throwing_fn.empty();
+ std::ostringstream oss;
+ oss << "jexception 0x" << std::hex << std::setfill('0') << std::setw(4) << _err_code << " ";
+ if (tc)
+ {
+ oss << _throwing_class;
+ if (tf)
+ oss << "::";
+ else
+ oss << " ";
+ }
+ if (tf)
+ oss << _throwing_fn << "() ";
+ if (tc || tf)
+ oss << "threw " << jerrno::err_msg(_err_code);
+ if (ai)
+ oss << " (" << _additional_info << ")";
+ _what.assign(oss.str());
+}
+
+const char*
+jexception::what() const throw ()
+{
+ return _what.c_str();
+}
+
+std::ostream&
+operator<<(std::ostream& os, const jexception& je)
+{
+ os << je.what();
+ return os;
+}
+
+std::ostream&
+operator<<(std::ostream& os, const jexception* jePtr)
+{
+ os << jePtr->what();
+ return os;
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/jexception.h b/qpid/cpp/src/qpid/linearstore/journal/jexception.h
new file mode 100644
index 0000000000..d03ee32e3f
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/jexception.h
@@ -0,0 +1,125 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_JEXCEPTION_H
+#define QPID_LINEARSTORE_JOURNAL_JEXCEPTION_H
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+class jexception;
+}}}
+
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <exception>
+#include "qpid/linearstore/journal/jerrno.h"
+#include <sstream>
+#include <string>
+
+// Macro for formatting commom system errors
+#define FORMAT_SYSERR(errno) " errno=" << errno << " (" << std::strerror(errno) << ")"
+
+#define MALLOC_CHK(ptr, var, cls, fn) if(ptr == 0) { \
+ clean(); \
+ std::ostringstream oss; \
+ oss << var << ": malloc() failed: " << FORMAT_SYSERR(errno); \
+ throw jexception(jerrno::JERR__MALLOC, oss.str(), cls, fn); \
+ }
+
+// TODO: The following is a temporary bug-tracking aid which forces a core.
+// Replace with the commented out version below when BZ484048 is resolved.
+#define PTHREAD_CHK(err, pfn, cls, fn) if(err != 0) { \
+ std::ostringstream oss; \
+ oss << cls << "::" << fn << "(): " << pfn; \
+ errno = err; \
+ ::perror(oss.str().c_str()); \
+ ::abort(); \
+ }
+/*
+#define PTHREAD_CHK(err, pfn, cls, fn) if(err != 0) { \
+ std::ostringstream oss; \
+ oss << pfn << " failed: " << FORMAT_SYSERR(err); \
+ throw jexception(jerrno::JERR__PTHREAD, oss.str(), cls, fn); \
+ }
+*/
+
+#define ASSERT(cond, msg) if(cond == 0) { \
+ std::cerr << msg << std::endl; \
+ ::abort(); \
+ }
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+ /**
+ * \class jexception
+ * \brief Generic journal exception class
+ */
+ class jexception : public std::exception
+ {
+ private:
+ uint32_t _err_code;
+ std::string _additional_info;
+ std::string _throwing_class;
+ std::string _throwing_fn;
+ std::string _what;
+ void format();
+
+ public:
+ jexception() throw ();
+
+ jexception(const uint32_t err_code) throw ();
+
+ jexception(const char* additional_info) throw ();
+ jexception(const std::string& additional_info) throw ();
+
+ jexception(const uint32_t err_code, const char* additional_info) throw ();
+ jexception(const uint32_t err_code, const std::string& additional_info) throw ();
+
+ jexception(const uint32_t err_code, const char* throwing_class, const char* throwing_fn)
+ throw ();
+ jexception(const uint32_t err_code, const std::string& throwing_class,
+ const std::string& throwing_fn) throw ();
+
+ jexception(const uint32_t err_code, const char* additional_info,
+ const char* throwing_class, const char* throwing_fn) throw ();
+ jexception(const uint32_t err_code, const std::string& additional_info,
+ const std::string& throwing_class, const std::string& throwing_fn) throw ();
+
+ virtual ~jexception() throw ();
+ virtual const char* what() const throw (); // override std::exception::what()
+
+ inline uint32_t err_code() const throw () { return _err_code; }
+ inline const std::string additional_info() const throw () { return _additional_info; }
+ inline const std::string throwing_class() const throw () { return _throwing_class; }
+ inline const std::string throwing_fn() const throw () { return _throwing_fn; }
+
+ friend std::ostream& operator<<(std::ostream& os, const jexception& je);
+ friend std::ostream& operator<<(std::ostream& os, const jexception* jePtr);
+ }; // class jexception
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_JEXCEPTION_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/jrec.h b/qpid/cpp/src/qpid/linearstore/journal/jrec.h
new file mode 100644
index 0000000000..cad0e5d7a2
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/jrec.h
@@ -0,0 +1,122 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_JREC_H
+#define QPID_LINEARSTORE_JOURNAL_JREC_H
+
+#include <fstream>
+#include "qpid/linearstore/journal/jcfg.h"
+#include <stdint.h>
+
+struct rec_hdr_t;
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+class Checksum;
+
+/**
+* \class jrec
+* \brief Abstract class for all file jrecords, both data and log. This class establishes
+* the common data format and structure for these jrecords.
+*/
+class jrec
+{
+public:
+ jrec() {}
+ virtual ~jrec() {}
+
+ /**
+ * \brief Encode this instance of jrec into the write buffer at the disk-block-aligned
+ * pointer wptr starting at position rec_offs_dblks in the encoded record to a
+ * maximum size of max_size_dblks.
+ *
+ * This call encodes the content of the data contianed in this instance of jrec into a
+ * disk-softblock-aligned (defined by JRNL_SBLK_SIZE) buffer pointed to by parameter
+ * wptr. No more than paramter max_size_dblks data-blocks may be written to the buffer.
+ * The parameter rec_offs_dblks is the offset in data-blocks within the fully encoded
+ * data block this instance represents at which to start encoding.
+ *
+ * Encoding entails writing the record header (struct enq_hdr), the data and the record tail
+ * (struct enq_tail). The record must be data-block-aligned (defined by JRNL_DBLK_SIZE),
+ * thus any remaining space in the final data-block is ignored; the returned value is the
+ * number of data-blocks consumed from the page by the encode action. Provided the initial
+ * alignment requirements are met, records may be of arbitrary size and may span multiple
+ * data-blocks, disk-blocks and/or pages.
+ *
+ * Since the record size in data-blocks is known, the general usage pattern is to call
+ * encode() as many times as is needed to fully encode the data. Each call to encode()
+ * will encode as much of the record as it can to what remains of the current page cache,
+ * and will return the number of data-blocks actually encoded.
+ *
+ * <b>Example:</b> Assume that record r1 was previously written to page 0, and that this
+ * is an instance representing record r2. Being larger than the page size ps, r2 would span
+ * multiple pages as follows:
+ * <pre>
+ * |<---ps--->|
+ * +----------+----------+----------+----...
+ * | |r2a| r2b | r2c | |
+ * |<-r1-><----------r2----------> |
+ * +----------+----------+----------+----...
+ * page: p0 p1 p2
+ * </pre>
+ * Encoding record r2 will require multiple calls to encode; one for each page which
+ * is involved. Record r2 is divided logically into sections r2a, r2b and r2c at the
+ * points where the page boundaries intersect with the record. Assuming a page size
+ * of ps, the page boundary pointers are represented by their names p0, p1... and the
+ * sizes of the record segments are represented by their names r1, r2a, r2b..., the calls
+ * should be as follows:
+ * <pre>
+ * encode(p0+r1, 0, ps-r1); (returns r2a data-blocks)
+ * encode(p1, r2a, ps); (returns r2b data-blocks which equals ps)
+ * encode(p2, r2a+r2b, ps); (returns r2c data-blocks)
+ * </pre>
+ *
+ * \param wptr Data-block-aligned pointer to position in page buffer where encoding is to
+ * take place.
+ * \param rec_offs_dblks Offset in data-blocks within record from which to start encoding.
+ * \param max_size_dblks Maximum number of data-blocks to write to pointer wptr.
+ * \returns Number of data-blocks encoded.
+ */
+ virtual uint32_t encode(void* wptr, uint32_t rec_offs_dblks, uint32_t max_size_dblks, Checksum& checksum) = 0;
+ virtual bool decode(::rec_hdr_t& h, std::ifstream* ifsp, std::size_t& rec_offs, const std::streampos rec_start) = 0;
+
+ virtual std::string& str(std::string& str) const = 0;
+ virtual std::size_t data_size() const = 0;
+ virtual std::size_t xid_size() const = 0;
+ virtual std::size_t rec_size() const = 0;
+ inline virtual uint32_t rec_size_dblks() const { return size_dblks(rec_size()); }
+ static inline uint32_t size_dblks(const std::size_t size)
+ { return size_blks(size, QLS_DBLK_SIZE_BYTES); }
+ static inline uint32_t size_sblks(const std::size_t size)
+ { return size_blks(size, QLS_SBLK_SIZE_BYTES); }
+ static inline uint32_t size_blks(const std::size_t size, const std::size_t blksize)
+ { return (size + blksize - 1)/blksize; }
+ virtual uint64_t rid() const = 0;
+
+protected:
+ virtual void clean() = 0;
+};
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JRNL_JREC_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/pmgr.cpp b/qpid/cpp/src/qpid/linearstore/journal/pmgr.cpp
new file mode 100644
index 0000000000..764beaa879
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/pmgr.cpp
@@ -0,0 +1,192 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/pmgr.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+pmgr::page_cb::page_cb(uint16_t index):
+ _index(index),
+ _state(UNUSED),
+ _frid(0),
+ _wdblks(0),
+ _pdtokl(0),
+ _jfp(0),
+ _pbuff(0)
+{}
+
+// TODO: almost identical to pmgr::page_state_str() below - resolve
+const char*
+pmgr::page_cb::state_str() const
+{
+ switch(_state)
+ {
+ case UNUSED:
+ return "UNUSED";
+ case IN_USE:
+ return "IN_USE";
+ case AIO_PENDING:
+ return "AIO_PENDING";
+ }
+ return "<unknown>";
+}
+
+// static
+const uint32_t pmgr::_sblkSizeBytes = QLS_SBLK_SIZE_BYTES;
+
+pmgr::pmgr(jcntl* jc, enq_map& emap, txn_map& tmap):
+ _cache_pgsize_sblks(0),
+ _cache_num_pages(0),
+ _jc(jc),
+ _emap(emap),
+ _tmap(tmap),
+ _page_base_ptr(0),
+ _page_ptr_arr(0),
+ _page_cb_arr(0),
+ _aio_cb_arr(0),
+ _aio_event_arr(0),
+ _ioctx(0),
+ _pg_index(0),
+ _pg_cntr(0),
+ _pg_offset_dblks(0),
+ _aio_evt_rem(0),
+ _cbp(0),
+ _enq_rec(),
+ _deq_rec(),
+ _txn_rec()
+{}
+
+pmgr::~pmgr()
+{
+ pmgr::clean();
+}
+
+void
+pmgr::initialize(aio_callback* const cbp, const uint32_t cache_pgsize_sblks, const uint16_t cache_num_pages)
+{
+ // As static use of this class keeps old values around, clean up first...
+ pmgr::clean();
+ _pg_index = 0;
+ _pg_cntr = 0;
+ _pg_offset_dblks = 0;
+ _aio_evt_rem = 0;
+ _cache_pgsize_sblks = cache_pgsize_sblks;
+ _cache_num_pages = cache_num_pages;
+ _cbp = cbp;
+
+ // 1. Allocate page memory (as a single block)
+ std::size_t cache_pgsize = _cache_num_pages * _cache_pgsize_sblks * _sblkSizeBytes;
+ if (::posix_memalign(&_page_base_ptr, QLS_AIO_ALIGN_BOUNDARY_BYTES, cache_pgsize))
+ {
+ clean();
+ std::ostringstream oss;
+ oss << "posix_memalign(): alignment=" << QLS_AIO_ALIGN_BOUNDARY_BYTES << " size=" << cache_pgsize;
+ oss << FORMAT_SYSERR(errno);
+ throw jexception(jerrno::JERR__MALLOC, oss.str(), "pmgr", "initialize");
+ }
+
+ // 2. Allocate array of page pointers
+ _page_ptr_arr = (void**)std::malloc(_cache_num_pages * sizeof(void*));
+ MALLOC_CHK(_page_ptr_arr, "_page_ptr_arr", "pmgr", "initialize");
+
+ // 3. Allocate and initialize page control block (page_cb) array
+ _page_cb_arr = (page_cb*)std::malloc(_cache_num_pages * sizeof(page_cb));
+ MALLOC_CHK(_page_cb_arr, "_page_cb_arr", "pmgr", "initialize");
+ std::memset(_page_cb_arr, 0, _cache_num_pages * sizeof(page_cb));
+
+ // 4. Allocate IO control block (iocb) array
+ _aio_cb_arr = (aio_cb*)std::malloc(_cache_num_pages * sizeof(aio_cb));
+ MALLOC_CHK(_aio_cb_arr, "_aio_cb_arr", "pmgr", "initialize");
+
+ // 5. Set page pointers in _page_ptr_arr, _page_cb_arr and iocbs to pages within page block
+ for (uint16_t i=0; i<_cache_num_pages; i++)
+ {
+ _page_ptr_arr[i] = (void*)((char*)_page_base_ptr + _cache_pgsize_sblks * _sblkSizeBytes * i);
+ _page_cb_arr[i]._index = i;
+ _page_cb_arr[i]._state = UNUSED;
+ _page_cb_arr[i]._pbuff = _page_ptr_arr[i];
+ _page_cb_arr[i]._pdtokl = new std::deque<data_tok*>;
+ _page_cb_arr[i]._pdtokl->clear();
+ _aio_cb_arr[i].data = (void*)&_page_cb_arr[i];
+ }
+
+ // 6. Allocate io_event array, max one event per cache page plus one for each file
+ const uint16_t max_aio_evts = _cache_num_pages + 1; // One additional event for file header writes
+ _aio_event_arr = (aio_event*)std::malloc(max_aio_evts * sizeof(aio_event));
+ MALLOC_CHK(_aio_event_arr, "_aio_event_arr", "pmgr", "initialize");
+
+ // 7. Initialize AIO context
+ if (int ret = aio::queue_init(max_aio_evts, &_ioctx))
+ {
+ std::ostringstream oss;
+ oss << "io_queue_init() failed: " << FORMAT_SYSERR(-ret);
+ throw jexception(jerrno::JERR__AIO, oss.str(), "pmgr", "initialize");
+ }
+}
+
+void
+pmgr::clean()
+{
+ // Clean up allocated memory here
+
+ if (_ioctx)
+ aio::queue_release(_ioctx);
+
+ std::free(_page_base_ptr);
+ _page_base_ptr = 0;
+
+ if (_page_cb_arr)
+ {
+ for (int i=0; i<_cache_num_pages; i++)
+ delete _page_cb_arr[i]._pdtokl;
+ std::free(_page_ptr_arr);
+ _page_ptr_arr = 0;
+ }
+
+ std::free(_page_cb_arr);
+ _page_cb_arr = 0;
+
+ std::free(_aio_cb_arr);
+ _aio_cb_arr = 0;
+
+ std::free(_aio_event_arr);
+ _aio_event_arr = 0;
+}
+
+// TODO: almost identical to pmgr::page_cb::state_str() above - resolve
+const char*
+pmgr::page_state_str(page_state ps)
+{
+ switch (ps)
+ {
+ case UNUSED:
+ return "UNUSED";
+ case IN_USE:
+ return "IN_USE";
+ case AIO_PENDING:
+ return "AIO_PENDING";
+ }
+ return "<page_state unknown>";
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/pmgr.h b/qpid/cpp/src/qpid/linearstore/journal/pmgr.h
new file mode 100644
index 0000000000..e618397647
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/pmgr.h
@@ -0,0 +1,119 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_PMGR_H
+#define QPID_LINEARSTORE_JOURNAL_PMGR_H
+
+#include <deque>
+#include "qpid/linearstore/journal/aio.h"
+#include "qpid/linearstore/journal/deq_rec.h"
+#include "qpid/linearstore/journal/enq_map.h"
+#include "qpid/linearstore/journal/enq_rec.h"
+#include "qpid/linearstore/journal/txn_map.h"
+#include "qpid/linearstore/journal/txn_rec.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+class aio_callback;
+class data_tok;
+class jcntl;
+class JournalFile;
+
+/**
+* \brief Abstract class for managing either read or write page cache of arbitrary size and
+* number of cache_num_pages.
+*/
+class pmgr
+{
+public:
+ /**
+ * \brief Enumeration of possible stats of a page within a page cache.
+ */
+ enum page_state
+ {
+ UNUSED, ///< A page is uninitialized, contains no data.
+ IN_USE, ///< Page is in use.
+ AIO_PENDING ///< An AIO request outstanding.
+ };
+
+ /**
+ * \brief Page control block, carries control and state information for each page in the
+ * cache.
+ */
+ struct page_cb
+ {
+ uint16_t _index; ///< Index of this page
+ page_state _state; ///< Status of page
+ uint64_t _frid; ///< First rid in page (used for fhdr init)
+ uint32_t _wdblks; ///< Total number of dblks in page so far
+ std::deque<data_tok*>* _pdtokl; ///< Page message tokens list
+ JournalFile* _jfp; ///< Journal file for incrementing compl counts
+ void* _pbuff; ///< Page buffer
+
+ page_cb(uint16_t index); ///< Convenience constructor
+ const char* state_str() const; ///< Return state as string for this pcb
+ };
+
+protected:
+ static const uint32_t _sblkSizeBytes; ///< Disk softblock size
+ uint32_t _cache_pgsize_sblks; ///< Size of page cache cache_num_pages
+ uint16_t _cache_num_pages; ///< Number of page cache cache_num_pages
+ jcntl* _jc; ///< Pointer to journal controller
+ enq_map& _emap; ///< Ref to enqueue map
+ txn_map& _tmap; ///< Ref to transaction map
+ void* _page_base_ptr; ///< Base pointer to page memory
+ void** _page_ptr_arr; ///< Array of pointers to cache_num_pages in page memory
+ page_cb* _page_cb_arr; ///< Array of page_cb structs
+ aio_cb* _aio_cb_arr; ///< Array of iocb structs
+ aio_event* _aio_event_arr; ///< Array of io_events
+ io_context_t _ioctx; ///< AIO context for read/write operations
+ uint16_t _pg_index; ///< Index of current page being used
+ uint32_t _pg_cntr; ///< Page counter; determines if file rotation req'd
+ uint32_t _pg_offset_dblks; ///< Page offset (used so far) in data blocks
+ uint32_t _aio_evt_rem; ///< Remaining AIO events
+ aio_callback* _cbp; ///< Pointer to callback object
+
+ enq_rec _enq_rec; ///< Enqueue record used for encoding/decoding
+ deq_rec _deq_rec; ///< Dequeue record used for encoding/decoding
+ txn_rec _txn_rec; ///< Transaction record used for encoding/decoding
+
+public:
+ pmgr(jcntl* jc, enq_map& emap, txn_map& tmap);
+ virtual ~pmgr();
+
+ virtual int32_t get_events(timespec* const timeout, bool flush) = 0;
+ inline uint32_t get_aio_evt_rem() const { return _aio_evt_rem; }
+ static const char* page_state_str(page_state ps);
+ inline uint32_t cache_pgsize_sblks() const { return _cache_pgsize_sblks; }
+ inline uint16_t cache_num_pages() const { return _cache_num_pages; }
+
+protected:
+ virtual void initialize(aio_callback* const cbp, const uint32_t cache_pgsize_sblks,
+ const uint16_t cache_num_pages);
+ virtual void rotate_page() = 0;
+ virtual void clean();
+};
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_PMGR_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/slock.h b/qpid/cpp/src/qpid/linearstore/journal/slock.h
new file mode 100644
index 0000000000..12e9e2d08c
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/slock.h
@@ -0,0 +1,71 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_SLOCK_H
+#define QPID_LINEARSTORE_JOURNAL_SLOCK_H
+
+#include "qpid/linearstore/journal/smutex.h"
+#include <pthread.h>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+// Ultra-simple scoped lock class, auto-releases mutex when it goes out-of-scope
+class slock
+{
+protected:
+ const smutex& _sm;
+public:
+ inline slock(const smutex& sm) : _sm(sm)
+ {
+ PTHREAD_CHK(::pthread_mutex_lock(_sm.get()), "::pthread_mutex_lock", "slock", "slock");
+ }
+ inline ~slock()
+ {
+ PTHREAD_CHK(::pthread_mutex_unlock(_sm.get()), "::pthread_mutex_unlock", "slock", "~slock");
+ }
+};
+
+// Ultra-simple scoped try-lock class, auto-releases mutex when it goes out-of-scope
+class stlock
+{
+protected:
+ const smutex& _sm;
+ bool _locked;
+public:
+ inline stlock(const smutex& sm) : _sm(sm), _locked(false)
+ {
+ int ret = ::pthread_mutex_trylock(_sm.get());
+ _locked = (ret == 0); // check if lock obtained
+ if (!_locked && ret != EBUSY) PTHREAD_CHK(ret, "::pthread_mutex_trylock", "stlock", "stlock");
+ }
+ inline ~stlock()
+ {
+ if (_locked)
+ PTHREAD_CHK(::pthread_mutex_unlock(_sm.get()), "::pthread_mutex_unlock", "stlock", "~stlock");
+ }
+ inline bool locked() const { return _locked; }
+};
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_SLOCK_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/smutex.h b/qpid/cpp/src/qpid/linearstore/journal/smutex.h
new file mode 100644
index 0000000000..b43f55944c
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/smutex.h
@@ -0,0 +1,51 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_SMUTEX_H
+#define QPID_LINEARSTORE_JOURNAL_SMUTEX_H
+
+#include "qpid/linearstore/journal/jexception.h"
+#include <pthread.h>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+ // Ultra-simple scoped mutex class that allows a posix mutex to be initialized and destroyed with error checks
+ class smutex
+ {
+ protected:
+ mutable pthread_mutex_t _m;
+ public:
+ inline smutex()
+ {
+ PTHREAD_CHK(::pthread_mutex_init(&_m, 0), "::pthread_mutex_init", "smutex", "smutex");
+ }
+ inline virtual ~smutex()
+ {
+ PTHREAD_CHK(::pthread_mutex_destroy(&_m), "::pthread_mutex_destroy", "smutex", "~smutex");
+ }
+ inline pthread_mutex_t* get() const { return &_m; }
+ };
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_SMUTEX_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/time_ns.cpp b/qpid/cpp/src/qpid/linearstore/journal/time_ns.cpp
new file mode 100644
index 0000000000..39f2cd1d88
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/time_ns.cpp
@@ -0,0 +1,41 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/time_ns.h"
+
+#include <sstream>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+const std::string
+time_ns::str(int precision) const
+{
+ const double t = tv_sec + (tv_nsec/1e9);
+ std::ostringstream oss;
+ oss.setf(std::ios::fixed, std::ios::floatfield);
+ oss.precision(precision);
+ oss << t;
+ return oss.str();
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/time_ns.h b/qpid/cpp/src/qpid/linearstore/journal/time_ns.h
new file mode 100644
index 0000000000..a228d47475
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/time_ns.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_TIME_NS_H
+#define QPID_LINEARSTORE_JOURNAL_TIME_NS_H
+
+#include <cerrno>
+#include <ctime>
+#include <string>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+struct time_ns : public timespec
+{
+ inline time_ns() { tv_sec = 0; tv_nsec = 0; }
+ inline time_ns(const std::time_t sec, const long nsec = 0) { tv_sec = sec; tv_nsec = nsec; }
+ inline time_ns(const time_ns& t) { tv_sec = t.tv_sec; tv_nsec = t.tv_nsec; }
+
+ inline void set_zero() { tv_sec = 0; tv_nsec = 0; }
+ inline bool is_zero() const { return tv_sec == 0 && tv_nsec == 0; }
+ inline int now() { if(::clock_gettime(CLOCK_REALTIME, this)) return errno; return 0; }
+ const std::string str(int precision = 6) const;
+
+ inline time_ns& operator=(const time_ns& rhs)
+ { tv_sec = rhs.tv_sec; tv_nsec = rhs.tv_nsec; return *this; }
+ inline time_ns& operator+=(const time_ns& rhs)
+ {
+ tv_nsec += rhs.tv_nsec;
+ if (tv_nsec >= 1000000000L) { tv_sec++; tv_nsec -= 1000000000L; }
+ tv_sec += rhs.tv_sec;
+ return *this;
+ }
+ inline time_ns& operator+=(const long ns)
+ {
+ tv_nsec += ns;
+ if (tv_nsec >= 1000000000L) { tv_sec++; tv_nsec -= 1000000000L; }
+ return *this;
+ }
+ inline time_ns& operator-=(const long ns)
+ {
+ tv_nsec -= ns;
+ if (tv_nsec < 0) { tv_sec--; tv_nsec += 1000000000L; }
+ return *this;
+ }
+ inline time_ns& operator-=(const time_ns& rhs)
+ {
+ tv_nsec -= rhs.tv_nsec;
+ if (tv_nsec < 0) { tv_sec--; tv_nsec += 1000000000L; }
+ tv_sec -= rhs.tv_sec;
+ return *this;
+ }
+ inline const time_ns operator+(const time_ns& rhs)
+ { time_ns t(*this); t += rhs; return t; }
+ inline const time_ns operator-(const time_ns& rhs)
+ { time_ns t(*this); t -= rhs; return t; }
+ inline bool operator==(const time_ns& rhs)
+ { return tv_sec == rhs.tv_sec && tv_nsec == rhs.tv_nsec; }
+ inline bool operator!=(const time_ns& rhs)
+ { return tv_sec != rhs.tv_sec || tv_nsec != rhs.tv_nsec; }
+ inline bool operator>(const time_ns& rhs)
+ { if(tv_sec == rhs.tv_sec) return tv_nsec > rhs.tv_nsec; return tv_sec > rhs.tv_sec; }
+ inline bool operator>=(const time_ns& rhs)
+ { if(tv_sec == rhs.tv_sec) return tv_nsec >= rhs.tv_nsec; return tv_sec >= rhs.tv_sec; }
+ inline bool operator<(const time_ns& rhs)
+ { if(tv_sec == rhs.tv_sec) return tv_nsec < rhs.tv_nsec; return tv_sec < rhs.tv_sec; }
+ inline bool operator<=(const time_ns& rhs)
+ { if(tv_sec == rhs.tv_sec) return tv_nsec <= rhs.tv_nsec; return tv_sec <= rhs.tv_sec; }
+};
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_TIME_NS_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/txn_map.cpp b/qpid/cpp/src/qpid/linearstore/journal/txn_map.cpp
new file mode 100644
index 0000000000..8336d36b80
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/txn_map.cpp
@@ -0,0 +1,263 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/txn_map.h"
+
+#include "qpid/linearstore/journal/slock.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+// return/error codes
+int16_t txn_map::TMAP_RID_NOT_FOUND = -2;
+int16_t txn_map::TMAP_XID_NOT_FOUND = -1;
+int16_t txn_map::TMAP_OK = 0;
+int16_t txn_map::TMAP_NOT_SYNCED = 0;
+int16_t txn_map::TMAP_SYNCED = 1;
+
+txn_data_t::txn_data_t(const uint64_t rid,
+ const uint64_t drid,
+ const uint64_t fid,
+ const uint64_t foffs,
+ const bool enq_flag,
+ const bool tpc_flag,
+ const bool commit_flag):
+ rid_(rid),
+ drid_(drid),
+ fid_(fid),
+ foffs_(foffs),
+ enq_flag_(enq_flag),
+ tpc_flag_(tpc_flag),
+ commit_flag_(commit_flag),
+ aio_compl_(false)
+{}
+
+txn_op_stats_t::txn_op_stats_t(const txn_data_list_t& tdl) :
+ enqCnt(0U),
+ deqCnt(0U),
+ tpcCnt(0U),
+ abortCnt(0U),
+ commitCnt(0U),
+ rid(0ULL)
+{
+ for (tdl_const_itr_t i=tdl.begin(); i!=tdl.end(); ++i) {
+ if (i->enq_flag_) {
+ ++enqCnt;
+ rid = i->rid_;
+ } else {
+ ++deqCnt;
+ if (i->commit_flag_) {
+ ++commitCnt;
+ } else {
+ ++abortCnt;
+ }
+ }
+ if (i->tpc_flag_) {
+ ++tpcCnt;
+ }
+ }
+ if (tpcCnt > 0 && tpcCnt != tdl.size()) {
+ throw jexception("Inconsistent 2PC count"); // TODO: complete exception details
+ }
+ if (abortCnt > 0 && commitCnt > 0) {
+ throw jexception("Both abort and commit in same transaction"); // TODO: complete exception details
+ }
+}
+
+txn_map::txn_map():
+ _map()/*,
+ _pfid_txn_cnt()*/
+{}
+
+txn_map::~txn_map() {}
+
+bool
+txn_map::insert_txn_data(const std::string& xid, const txn_data_t& td)
+{
+ bool ok = true;
+ slock s(_mutex);
+ xmap_itr itr = _map.find(xid);
+ if (itr == _map.end()) // not found in map
+ {
+ txn_data_list_t list;
+ list.push_back(td);
+ std::pair<xmap_itr, bool> ret = _map.insert(xmap_param(xid, list));
+ if (!ret.second) // duplicate
+ ok = false;
+ }
+ else
+ itr->second.push_back(td);
+ return ok;
+}
+
+const txn_data_list_t
+txn_map::get_tdata_list(const std::string& xid)
+{
+ slock s(_mutex);
+ return get_tdata_list_nolock(xid);
+}
+
+const txn_data_list_t
+txn_map::get_tdata_list_nolock(const std::string& xid)
+{
+ xmap_itr itr = _map.find(xid);
+ if (itr == _map.end()) // not found in map
+ return _empty_data_list;
+ return itr->second;
+}
+
+const txn_data_list_t
+txn_map::get_remove_tdata_list(const std::string& xid)
+{
+ slock s(_mutex);
+ xmap_itr itr = _map.find(xid);
+ if (itr == _map.end()) // not found in map
+ return _empty_data_list;
+ txn_data_list_t list = itr->second;
+ _map.erase(itr);
+ return list;
+}
+
+bool
+txn_map::in_map(const std::string& xid)
+{
+ slock s(_mutex);
+ xmap_itr itr= _map.find(xid);
+ return itr != _map.end();
+}
+
+uint32_t
+txn_map::enq_cnt()
+{
+ return cnt(true);
+}
+
+uint32_t
+txn_map::deq_cnt()
+{
+ return cnt(true);
+}
+
+uint32_t
+txn_map::cnt(const bool enq_flag)
+{
+ slock s(_mutex);
+ uint32_t c = 0;
+ for (xmap_itr i = _map.begin(); i != _map.end(); i++)
+ {
+ for (tdl_itr_t j = i->second.begin(); j < i->second.end(); j++)
+ {
+ if (j->enq_flag_ == enq_flag)
+ c++;
+ }
+ }
+ return c;
+}
+
+int16_t
+txn_map::is_txn_synced(const std::string& xid)
+{
+ slock s(_mutex);
+ xmap_itr itr = _map.find(xid);
+ if (itr == _map.end()) // not found in map
+ return TMAP_XID_NOT_FOUND;
+ bool is_synced = true;
+ for (tdl_itr_t litr = itr->second.begin(); litr < itr->second.end(); litr++)
+ {
+ if (!litr->aio_compl_)
+ {
+ is_synced = false;
+ break;
+ }
+ }
+ return is_synced ? TMAP_SYNCED : TMAP_NOT_SYNCED;
+}
+
+int16_t
+txn_map::set_aio_compl(const std::string& xid, const uint64_t rid)
+{
+ slock s(_mutex);
+ xmap_itr itr = _map.find(xid);
+ if (itr == _map.end()) // xid not found in map
+ return TMAP_XID_NOT_FOUND;
+ for (tdl_itr_t litr = itr->second.begin(); litr < itr->second.end(); litr++)
+ {
+ if (litr->rid_ == rid)
+ {
+ litr->aio_compl_ = true;
+ return TMAP_OK; // rid found
+ }
+ }
+ // xid present, but rid not found
+ return TMAP_RID_NOT_FOUND;
+}
+
+bool
+txn_map::data_exists(const std::string& xid, const uint64_t rid)
+{
+ bool found = false;
+ {
+ slock s(_mutex);
+ txn_data_list_t tdl = get_tdata_list_nolock(xid);
+ tdl_itr_t itr = tdl.begin();
+ while (itr != tdl.end() && !found)
+ {
+ found = itr->rid_ == rid;
+ itr++;
+ }
+ }
+ return found;
+}
+
+bool
+txn_map::is_enq(const uint64_t rid)
+{
+ bool found = false;
+ {
+ slock s(_mutex);
+ for (xmap_itr i = _map.begin(); i != _map.end() && !found; i++)
+ {
+ txn_data_list_t list = i->second;
+ for (tdl_itr_t j = list.begin(); j < list.end() && !found; j++)
+ {
+ if (j->enq_flag_)
+ found = j->rid_ == rid;
+ else
+ found = j->drid_ == rid;
+ }
+ }
+ }
+ return found;
+}
+
+void
+txn_map::xid_list(std::vector<std::string>& xv)
+{
+ xv.clear();
+ {
+ slock s(_mutex);
+ for (xmap_itr itr = _map.begin(); itr != _map.end(); itr++)
+ xv.push_back(itr->first);
+ }
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/txn_map.h b/qpid/cpp/src/qpid/linearstore/journal/txn_map.h
new file mode 100644
index 0000000000..e79c0522d8
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/txn_map.h
@@ -0,0 +1,150 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_TXN_MAP_H
+#define QPID_LINEARSTORE_JOURNAL_TXN_MAP_H
+
+#include "qpid/linearstore/journal/smutex.h"
+#include <map>
+#include <vector>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+ /**
+ * \struct txn_data_struct
+ * \brief Struct encapsulating transaction data necessary for processing a transaction
+ * in the journal once it is closed with either a commit or abort.
+ */
+ typedef struct txn_data_t
+ {
+ uint64_t rid_; ///< Record id for this operation
+ uint64_t drid_; ///< Dequeue record id for this operation
+ uint64_t fid_; ///< File seq number, to be used when transferring to emap on commit
+ uint64_t foffs_; ///< Offset in file for this record
+ bool enq_flag_; ///< If true, enq op, otherwise deq op
+ bool tpc_flag_; ///< 2PC transaction if true
+ bool commit_flag_; ///< TPL only: (2PC transactions) Records 2PC complete c/a mode
+ bool aio_compl_; ///< Initially false, set to true when record AIO returns
+ txn_data_t(const uint64_t rid,
+ const uint64_t drid,
+ const uint64_t fid,
+ const uint64_t foffs,
+ const bool enq_flag,
+ const bool tpc_flag,
+ const bool commit_flag);
+ } txn_data_t;
+ typedef std::vector<txn_data_t> txn_data_list_t;
+ typedef txn_data_list_t::iterator tdl_itr_t;
+ typedef txn_data_list_t::const_iterator tdl_const_itr_t;
+
+ typedef struct txn_op_stats_t
+ {
+ uint16_t enqCnt;
+ uint16_t deqCnt;
+ uint16_t tpcCnt;
+ uint16_t abortCnt;
+ uint16_t commitCnt;
+ uint64_t rid;
+ txn_op_stats_t(const txn_data_list_t& tdl);
+ } txn_op_stats_t;
+
+ /**
+ * \class txn_map
+ * \brief Class for storing transaction data for each open (ie not committed or aborted)
+ * xid in the store. If aborted, records are discarded; if committed, they are
+ * transferred to the enqueue map.
+ *
+ * The data is encapsulated by struct txn_data_struct. A vector containing the information
+ * for each operation included as part of the same transaction is mapped against the
+ * xid.
+ *
+ * The aio_compl flag is set true as each AIO write operation for the enqueue or dequeue
+ * returns. Checking that all of these flags are true for a given xid is the mechanism
+ * used to determine if the transaction is syncronized (through method is_txn_synced()).
+ *
+ * On transaction commit, then each operation is handled as follows:
+ *
+ * If an enqueue (_enq_flag is true), then the rid and pfid are transferred to the enq_map.
+ * If a dequeue (_enq_flag is false), then the rid stored in the drid field is used to
+ * remove the corresponding record from the enq_map.
+ *
+ * On transaction abort, then each operation is handled as follows:
+ *
+ * If an enqueue (_enq_flag is true), then the data is simply discarded.
+ * If a dequeue (_enq_flag is false), then the lock for the corresponding enqueue in enq_map
+ * (if not a part of the same transaction) is removed, and the data discarded.
+ *
+ * <pre>
+ * key data
+ *
+ * xid1 --- vector< [ rid, drid, pfid, enq_flag, commit_flag, aio_compl ] >
+ * xid2 --- vector< [ rid, drid, pfid, enq_flag, commit_flag, aio_compl ] >
+ * xid3 --- vector< [ rid, drid, pfid, enq_flag, commit_flag, aio_compl ] >
+ * ...
+ * </pre>
+ */
+ class txn_map
+ {
+ public:
+ // return/error codes
+ static int16_t TMAP_RID_NOT_FOUND;
+ static int16_t TMAP_XID_NOT_FOUND;
+ static int16_t TMAP_OK;
+ static int16_t TMAP_NOT_SYNCED;
+ static int16_t TMAP_SYNCED;
+
+ private:
+ typedef std::pair<std::string, txn_data_list_t> xmap_param;
+ typedef std::map<std::string, txn_data_list_t> xmap;
+ typedef xmap::iterator xmap_itr;
+
+ xmap _map;
+ smutex _mutex;
+ const txn_data_list_t _empty_data_list;
+
+ public:
+ txn_map();
+ virtual ~txn_map();
+
+ bool insert_txn_data(const std::string& xid, const txn_data_t& td);
+ const txn_data_list_t get_tdata_list(const std::string& xid);
+ const txn_data_list_t get_remove_tdata_list(const std::string& xid);
+ bool in_map(const std::string& xid);
+ uint32_t enq_cnt();
+ uint32_t deq_cnt();
+ int16_t is_txn_synced(const std::string& xid); // -1=xid not found; 0=not synced; 1=synced
+ int16_t set_aio_compl(const std::string& xid, const uint64_t rid); // -2=rid not found; -1=xid not found; 0=done
+ bool data_exists(const std::string& xid, const uint64_t rid);
+ bool is_enq(const uint64_t rid);
+ inline void clear() { _map.clear(); }
+ inline bool empty() const { return _map.empty(); }
+ inline size_t size() const { return _map.size(); }
+ void xid_list(std::vector<std::string>& xv);
+ private:
+ uint32_t cnt(const bool enq_flag);
+ const txn_data_list_t get_tdata_list_nolock(const std::string& xid);
+ };
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_TXN_MAP_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/txn_rec.cpp b/qpid/cpp/src/qpid/linearstore/journal/txn_rec.cpp
new file mode 100644
index 0000000000..298ab608b1
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/txn_rec.cpp
@@ -0,0 +1,305 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/txn_rec.h"
+
+#include <cassert>
+#include <cstring>
+#include "qpid/linearstore/journal/Checksum.h"
+#include "qpid/linearstore/journal/jexception.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+txn_rec::txn_rec():
+ _xidp(0),
+ _xid_buff(0)
+{
+ ::txn_hdr_init(&_txn_hdr, 0, QLS_JRNL_VERSION, 0, 0, 0, 0);
+ ::rec_tail_init(&_txn_tail, 0, 0, 0, 0);
+}
+
+txn_rec::~txn_rec()
+{
+ clean();
+}
+
+void
+txn_rec::reset(const bool commitFlag, const uint64_t serial, const uint64_t rid, const void* const xidp,
+ const std::size_t xidlen)
+{
+ _txn_hdr._rhdr._magic = commitFlag ? QLS_TXC_MAGIC : QLS_TXA_MAGIC;
+ _txn_hdr._rhdr._serial = serial;
+ _txn_hdr._rhdr._rid = rid;
+ _txn_hdr._xidsize = xidlen;
+ _xidp = xidp;
+ _xid_buff = 0;
+ _txn_tail._xmagic = ~_txn_hdr._rhdr._magic;
+ _txn_tail._serial = serial;
+ _txn_tail._rid = rid;
+ _txn_tail._checksum = 0UL;
+}
+
+uint32_t
+txn_rec::encode(void* wptr, uint32_t rec_offs_dblks, uint32_t max_size_dblks, Checksum& checksum)
+{
+ assert(wptr != 0);
+ assert(max_size_dblks > 0);
+ assert(_xidp != 0 && _txn_hdr._xidsize > 0);
+
+ std::size_t rec_offs = rec_offs_dblks * QLS_DBLK_SIZE_BYTES;
+ std::size_t rem = max_size_dblks * QLS_DBLK_SIZE_BYTES;
+ std::size_t wr_cnt = 0;
+ if (rec_offs_dblks) // Continuation of split dequeue record (over 2 or more pages)
+ {
+ if (size_dblks(rec_size()) - rec_offs_dblks > max_size_dblks) // Further split required
+ {
+ rec_offs -= sizeof(txn_hdr_t);
+ std::size_t wsize = _txn_hdr._xidsize > rec_offs ? _txn_hdr._xidsize - rec_offs : 0;
+ std::size_t wsize2 = wsize;
+ if (wsize)
+ {
+ if (wsize > rem)
+ wsize = rem;
+ std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize);
+ wr_cnt += wsize;
+ rem -= wsize;
+ }
+ rec_offs -= _txn_hdr._xidsize - wsize2;
+ checksum.addData((unsigned char*)wptr, wr_cnt);
+ if (rem)
+ {
+ _txn_tail._checksum = checksum.getChecksum();
+ wsize = sizeof(_txn_tail) > rec_offs ? sizeof(_txn_tail) - rec_offs : 0;
+ wsize2 = wsize;
+ if (wsize)
+ {
+ if (wsize > rem)
+ wsize = rem;
+ std::memcpy((char*)wptr + wr_cnt, (char*)&_txn_tail + rec_offs, wsize);
+ wr_cnt += wsize;
+ rem -= wsize;
+ }
+ rec_offs -= sizeof(_txn_tail) - wsize2;
+ }
+ assert(rem == 0);
+ assert(rec_offs == 0);
+ }
+ else // No further split required
+ {
+ rec_offs -= sizeof(txn_hdr_t);
+ std::size_t wsize = _txn_hdr._xidsize > rec_offs ? _txn_hdr._xidsize - rec_offs : 0;
+ if (wsize)
+ {
+ std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize);
+ wr_cnt += wsize;
+ checksum.addData((unsigned char*)wptr, wr_cnt);
+ }
+ rec_offs -= _txn_hdr._xidsize - wsize;
+ _txn_tail._checksum = checksum.getChecksum();
+ wsize = sizeof(_txn_tail) > rec_offs ? sizeof(_txn_tail) - rec_offs : 0;
+ if (wsize)
+ {
+ std::memcpy((char*)wptr + wr_cnt, (char*)&_txn_tail + rec_offs, wsize);
+ wr_cnt += wsize;
+#ifdef QLS_CLEAN
+ std::size_t rec_offs = rec_offs_dblks * QLS_DBLK_SIZE_BYTES;
+ std::size_t dblk_rec_size = size_dblks(rec_size() - rec_offs) * QLS_DBLK_SIZE_BYTES;
+ std::memset((char*)wptr + wr_cnt, QLS_CLEAN_CHAR, dblk_rec_size - wr_cnt);
+#endif
+ }
+ rec_offs -= sizeof(_txn_tail) - wsize;
+ assert(rec_offs == 0);
+ }
+ }
+ else // Start at beginning of data record
+ {
+ // Assumption: the header will always fit into the first dblk
+ std::memcpy(wptr, (void*)&_txn_hdr, sizeof(txn_hdr_t));
+ wr_cnt = sizeof(txn_hdr_t);
+ if (size_dblks(rec_size()) > max_size_dblks) // Split required
+ {
+ std::size_t wsize;
+ rem -= sizeof(txn_hdr_t);
+ if (rem)
+ {
+ wsize = rem >= _txn_hdr._xidsize ? _txn_hdr._xidsize : rem;
+ std::memcpy((char*)wptr + wr_cnt, _xidp, wsize);
+ wr_cnt += wsize;
+ rem -= wsize;
+ }
+ checksum.addData((unsigned char*)wptr, wr_cnt);
+ if (rem)
+ {
+ _txn_tail._checksum = checksum.getChecksum();
+ wsize = rem >= sizeof(_txn_tail) ? sizeof(_txn_tail) : rem;
+ std::memcpy((char*)wptr + wr_cnt, (void*)&_txn_tail, wsize);
+ wr_cnt += wsize;
+ rem -= wsize;
+ }
+ assert(rem == 0);
+ }
+ else // No split required
+ {
+ std::memcpy((char*)wptr + wr_cnt, _xidp, _txn_hdr._xidsize);
+ wr_cnt += _txn_hdr._xidsize;
+ checksum.addData((unsigned char*)wptr, wr_cnt);
+ _txn_tail._checksum = checksum.getChecksum();
+ std::memcpy((char*)wptr + wr_cnt, (void*)&_txn_tail, sizeof(_txn_tail));
+ wr_cnt += sizeof(_txn_tail);
+#ifdef QLS_CLEAN
+ std::size_t dblk_rec_size = size_dblks(rec_size()) * QLS_DBLK_SIZE_BYTES;
+ std::memset((char*)wptr + wr_cnt, QLS_CLEAN_CHAR, dblk_rec_size - wr_cnt);
+#endif
+ }
+ }
+ return size_dblks(wr_cnt);
+}
+
+bool
+txn_rec::decode(::rec_hdr_t& h, std::ifstream* ifsp, std::size_t& rec_offs, const std::streampos rec_start)
+{
+ if (rec_offs == 0)
+ {
+ // Read header, allocate for xid
+ ::rec_hdr_copy(&_txn_hdr._rhdr, &h);
+ ifsp->read((char*)&_txn_hdr._xidsize, sizeof(_txn_hdr._xidsize));
+ rec_offs = sizeof(::txn_hdr_t);
+ _xid_buff = std::malloc(_txn_hdr._xidsize);
+ MALLOC_CHK(_xid_buff, "_buff", "txn_rec", "rcv_decode");
+ }
+ if (rec_offs < sizeof(txn_hdr_t) + _txn_hdr._xidsize)
+ {
+ // Read xid (or continue reading xid)
+ std::size_t offs = rec_offs - sizeof(txn_hdr_t);
+ ifsp->read((char*)_xid_buff + offs, _txn_hdr._xidsize - offs);
+ std::size_t size_read = ifsp->gcount();
+ rec_offs += size_read;
+ if (size_read < _txn_hdr._xidsize - offs)
+ {
+ assert(ifsp->eof());
+ // As we may have read past eof, turn off fail bit
+ ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit));
+ assert(!ifsp->fail() && !ifsp->bad());
+ return false;
+ }
+ }
+ if (rec_offs < sizeof(txn_hdr_t) + _txn_hdr._xidsize + sizeof(rec_tail_t))
+ {
+ // Read tail (or continue reading tail)
+ std::size_t offs = rec_offs - sizeof(txn_hdr_t) - _txn_hdr._xidsize;
+ ifsp->read((char*)&_txn_tail + offs, sizeof(rec_tail_t) - offs);
+ std::size_t size_read = ifsp->gcount();
+ rec_offs += size_read;
+ if (size_read < sizeof(rec_tail_t) - offs)
+ {
+ assert(ifsp->eof());
+ // As we may have read past eof, turn off fail bit
+ ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit));
+ assert(!ifsp->fail() && !ifsp->bad());
+ return false;
+ }
+ check_rec_tail(rec_start);
+ }
+ ifsp->ignore(rec_size_dblks() * QLS_DBLK_SIZE_BYTES - rec_size());
+ assert(!ifsp->fail() && !ifsp->bad());
+ assert(_txn_hdr._xidsize > 0);
+ return true;
+}
+
+std::size_t
+txn_rec::get_xid(void** const xidpp)
+{
+ if (!_xid_buff)
+ {
+ *xidpp = 0;
+ return 0;
+ }
+ *xidpp = _xid_buff;
+ return _txn_hdr._xidsize;
+}
+
+std::string&
+txn_rec::str(std::string& str) const
+{
+ std::ostringstream oss;
+ if (_txn_hdr._rhdr._magic == QLS_TXA_MAGIC)
+ oss << "dtxa_rec: m=" << _txn_hdr._rhdr._magic;
+ else
+ oss << "dtxc_rec: m=" << _txn_hdr._rhdr._magic;
+ oss << " v=" << (int)_txn_hdr._rhdr._version;
+ oss << " rid=" << _txn_hdr._rhdr._rid;
+ oss << " xid=\"" << _xidp << "\"";
+ str.append(oss.str());
+ return str;
+}
+
+std::size_t
+txn_rec::xid_size() const
+{
+ return _txn_hdr._xidsize;
+}
+
+std::size_t
+txn_rec::rec_size() const
+{
+ return sizeof(txn_hdr_t) + _txn_hdr._xidsize + sizeof(rec_tail_t);
+}
+
+void
+txn_rec::check_rec_tail(const std::streampos rec_start) const {
+ Checksum checksum;
+ checksum.addData((const unsigned char*)&_txn_hdr, sizeof(::txn_hdr_t));
+ if (_txn_hdr._xidsize > 0) {
+ checksum.addData((const unsigned char*)_xid_buff, _txn_hdr._xidsize);
+ }
+ uint32_t cs = checksum.getChecksum();
+ uint16_t res = ::rec_tail_check(&_txn_tail, &_txn_hdr._rhdr, cs);
+ if (res != 0) {
+ std::stringstream oss;
+ oss << std::endl << " Record offset: 0x" << std::hex << rec_start;
+ if (res & ::REC_TAIL_MAGIC_ERR_MASK) {
+ oss << std::endl << " Magic: expected 0x" << ~_txn_hdr._rhdr._magic << "; found 0x" << _txn_tail._xmagic;
+ }
+ if (res & ::REC_TAIL_SERIAL_ERR_MASK) {
+ oss << std::endl << " Serial: expected 0x" << _txn_hdr._rhdr._serial << "; found 0x" << _txn_tail._serial;
+ }
+ if (res & ::REC_TAIL_RID_ERR_MASK) {
+ oss << std::endl << " Record Id: expected 0x" << _txn_hdr._rhdr._rid << "; found 0x" << _txn_tail._rid;
+ }
+ if (res & ::REC_TAIL_CHECKSUM_ERR_MASK) {
+ oss << std::endl << " Checksum: expected 0x" << cs << "; found 0x" << _txn_tail._checksum;
+ }
+ throw jexception(jerrno::JERR_JREC_BADRECTAIL, oss.str(), "txn_rec", "check_rec_tail");
+ }
+}
+
+void
+txn_rec::clean()
+{
+ if (_xid_buff) {
+ std::free(_xid_buff);
+ _xid_buff = 0;
+ }
+}
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/txn_rec.h b/qpid/cpp/src/qpid/linearstore/journal/txn_rec.h
new file mode 100644
index 0000000000..4552071595
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/txn_rec.h
@@ -0,0 +1,68 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_TXN_REC_H
+#define QPID_LINEARSTORE_JOURNAL_TXN_REC_H
+
+#include "qpid/linearstore/journal/jrec.h"
+#include "qpid/linearstore/journal/utils/txn_hdr.h"
+#include "qpid/linearstore/journal/utils/rec_tail.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+/**
+* \class txn_rec
+* \brief Class to handle a single journal commit or abort record.
+*/
+class txn_rec : public jrec
+{
+private:
+ ::txn_hdr_t _txn_hdr; ///< Local instance of transaction header struct
+ const void* _xidp; ///< xid pointer for encoding (writing to disk)
+ void* _xid_buff; ///< Pointer to buffer to receive xid read from disk
+ ::rec_tail_t _txn_tail; ///< Local instance of enqueue tail struct
+
+public:
+ txn_rec();
+ virtual ~txn_rec();
+
+ void reset(const bool commitFlag, const uint64_t serial, const uint64_t rid, const void* const xidp,
+ const std::size_t xidlen);
+ uint32_t encode(void* wptr, uint32_t rec_offs_dblks, uint32_t max_size_dblks, Checksum& checksum);
+ bool decode(::rec_hdr_t& h, std::ifstream* ifsp, std::size_t& rec_offs, const std::streampos rec_start);
+
+ std::size_t get_xid(void** const xidpp);
+ std::string& str(std::string& str) const;
+ inline std::size_t data_size() const { return 0; } // This record never carries data
+ std::size_t xid_size() const;
+ std::size_t rec_size() const;
+ inline uint64_t rid() const { return _txn_hdr._rhdr._rid; }
+ void check_rec_tail(const std::streampos rec_start) const;
+
+private:
+ virtual void clean();
+};
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_TXN_REC_H
diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/deq_hdr.c b/qpid/cpp/src/qpid/linearstore/journal/utils/deq_hdr.c
new file mode 100644
index 0000000000..b55c1c16c8
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/utils/deq_hdr.c
@@ -0,0 +1,46 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "deq_hdr.h"
+
+/*static const uint16_t DEQ_HDR_TXNCMPLCOMMIT_MASK = 0x10;*/
+
+void deq_hdr_init(deq_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag,
+ const uint64_t serial, const uint64_t rid, const uint64_t deq_rid, const uint64_t xidsize) {
+ rec_hdr_init(&dest->_rhdr, magic, version, uflag, serial, rid);
+ dest->_deq_rid = deq_rid;
+ dest->_xidsize = xidsize;
+}
+
+void deq_hdr_copy(deq_hdr_t* dest, const deq_hdr_t* src) {
+ rec_hdr_copy(&dest->_rhdr, &src->_rhdr);
+ dest->_deq_rid = src->_deq_rid;
+ dest->_xidsize = src->_xidsize;
+}
+
+bool is_txn_coml_commit(const deq_hdr_t *dh) {
+ return dh->_rhdr._uflag & DEQ_HDR_TXNCMPLCOMMIT_MASK;
+}
+
+void set_txn_coml_commit(deq_hdr_t *dh, const bool commit) {
+ dh->_rhdr._uflag = commit ? dh->_rhdr._uflag | DEQ_HDR_TXNCMPLCOMMIT_MASK : // set flag bit
+ dh->_rhdr._uflag & (~DEQ_HDR_TXNCMPLCOMMIT_MASK); // unset flag bit
+}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/deq_hdr.h b/qpid/cpp/src/qpid/linearstore/journal/utils/deq_hdr.h
new file mode 100644
index 0000000000..3392867153
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/utils/deq_hdr.h
@@ -0,0 +1,83 @@
+#ifndef QPID_LINEARSTORE_JOURNAL_UTILS_DEQ_HDR_H
+#define QPID_LINEARSTORE_JOURNAL_UTILS_DEQ_HDR_H
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include <stdbool.h>
+#include "rec_hdr.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#pragma pack(1)
+
+/**
+ * \brief Struct for dequeue record.
+ *
+ * Struct for dequeue record. If this record has a non-zero xidsize field (i.e., there is a
+ * valid XID), then this header is followed by the XID of xidsize bytes and a rec_tail. If,
+ * on the other hand, this record has a zero xidsize (i.e., there is no XID), then the rec_tail
+ * is absent.
+ *
+ * Note that this record had its own rid distinct from the rid of the record it is dequeueing.
+ * The rid field below is the rid of the dequeue record itself; the deq-rid field is the rid of a
+ * previous enqueue record being dequeued by this record.
+ *
+ * Record header info in binary format (40 bytes):
+ * <pre>
+ * 0 7
+ * +---+---+---+---+---+---+---+---+ -+
+ * | magic | ver | flags | |
+ * +---+---+---+---+---+---+---+---+ |
+ * | serial | | struct rec_hdr_t
+ * +---+---+---+---+---+---+---+---+ |
+ * | rid | |
+ * +---+---+---+---+---+---+---+---+ -+
+ * | deq-rid |
+ * +---+---+---+---+---+---+---+---+
+ * | xidsize |
+ * +---+---+---+---+---+---+---+---+
+ *
+ * deq-rid = dequeue record ID
+ * </pre>
+ */
+typedef struct deq_hdr_t {
+ rec_hdr_t _rhdr; /**< Common record header struct */
+ uint64_t _deq_rid; /**< Record ID of record being dequeued */
+ uint64_t _xidsize; /**< XID size */
+} deq_hdr_t;
+
+static const uint16_t DEQ_HDR_TXNCMPLCOMMIT_MASK = 0x10;
+
+void deq_hdr_init(deq_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag,
+ const uint64_t serial, const uint64_t rid, const uint64_t deq_rid, const uint64_t xidsize);
+void deq_hdr_copy(deq_hdr_t* dest, const deq_hdr_t* src);
+bool is_txn_coml_commit(const deq_hdr_t *dh);
+void set_txn_coml_commit(deq_hdr_t *dh, const bool commit);
+
+#pragma pack()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ifndef QPID_LINEARSTORE_JOURNAL_UTILS_DEQ_HDR_H */
diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/enq_hdr.c b/qpid/cpp/src/qpid/linearstore/journal/utils/enq_hdr.c
new file mode 100644
index 0000000000..b4e8b62ff1
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/utils/enq_hdr.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "enq_hdr.h"
+
+//static const uint16_t ENQ_HDR_TRANSIENT_MASK = 0x10;
+//static const uint16_t ENQ_HDR_EXTERNAL_MASK = 0x20;
+
+void enq_hdr_init(enq_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag,
+ const uint64_t serial, const uint64_t rid, const uint64_t xidsize, const uint64_t dsize) {
+ rec_hdr_init(&dest->_rhdr, magic, version, uflag, serial, rid);
+ dest->_xidsize = xidsize;
+ dest->_dsize = dsize;
+}
+
+void enq_hdr_copy(enq_hdr_t* dest, const enq_hdr_t* src) {
+ rec_hdr_copy(&dest->_rhdr, &src->_rhdr);
+ dest->_xidsize = src->_xidsize;
+ dest->_dsize = src->_dsize;
+}
+
+bool is_enq_transient(const enq_hdr_t *eh) {
+ return eh->_rhdr._uflag & ENQ_HDR_TRANSIENT_MASK;
+}
+
+void set_enq_transient(enq_hdr_t *eh, const bool transient) {
+ eh->_rhdr._uflag = transient ? eh->_rhdr._uflag | ENQ_HDR_TRANSIENT_MASK :
+ eh->_rhdr._uflag & (~ENQ_HDR_TRANSIENT_MASK);
+}
+
+bool is_enq_external(const enq_hdr_t *eh) {
+ return eh->_rhdr._uflag & ENQ_HDR_EXTERNAL_MASK;
+}
+
+void set_enq_external(enq_hdr_t *eh, const bool external) {
+ eh->_rhdr._uflag = external ? eh->_rhdr._uflag | ENQ_HDR_EXTERNAL_MASK :
+ eh->_rhdr._uflag & (~ENQ_HDR_EXTERNAL_MASK);
+}
+
+bool validate_enq_hdr(enq_hdr_t *eh, const uint32_t magic, const uint16_t version, const uint64_t rid) {
+ return eh->_rhdr._magic == magic &&
+ eh->_rhdr._version == version &&
+ rid > 0 ? eh->_rhdr._rid == rid /* If rid == 0, don't compare rids */
+ : true;
+}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/enq_hdr.h b/qpid/cpp/src/qpid/linearstore/journal/utils/enq_hdr.h
new file mode 100644
index 0000000000..00108792bc
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/utils/enq_hdr.h
@@ -0,0 +1,83 @@
+#ifndef QPID_LINEARSTORE_JOURNAL_UTILS_ENQ_HDR_H
+#define QPID_LINEARSTORE_JOURNAL_UTILS_ENQ_HDR_H
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include <stdbool.h>
+#include "rec_hdr.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#pragma pack(1)
+
+/**
+ * \brief Struct for enqueue record.
+ *
+ * Struct for enqueue record. In addition to the common data, this header includes both the
+ * xid and data blob sizes.
+ *
+ * This header precedes all enqueue data in journal files.
+ *
+ * Record header info in binary format (40 bytes):
+ * <pre>
+ * 0 7
+ * +---+---+---+---+---+---+---+---+ -+
+ * | magic | ver | flags | |
+ * +---+---+---+---+---+---+---+---+ |
+ * | serial | | struct rec_hdr_t
+ * +---+---+---+---+---+---+---+---+ |
+ * | rid | |
+ * +---+---+---+---+---+---+---+---+ -+
+ * | xidsize |
+ * +---+---+---+---+---+---+---+---+
+ * | dsize |
+ * +---+---+---+---+---+---+---+---+
+ * v = file version (If the format or encoding of this file changes, then this
+ * number should be incremented)
+ * </pre>
+ */
+typedef struct enq_hdr_t {
+ rec_hdr_t _rhdr; /**< Common record header struct */
+ uint64_t _xidsize; /**< XID size in octets */
+ uint64_t _dsize; /**< Record data size in octets */
+} enq_hdr_t;
+
+static const uint16_t ENQ_HDR_TRANSIENT_MASK = 0x10;
+static const uint16_t ENQ_HDR_EXTERNAL_MASK = 0x20;
+
+void enq_hdr_init(enq_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag,
+ const uint64_t serial, const uint64_t rid, const uint64_t xidsize, const uint64_t dsize);
+void enq_hdr_copy(enq_hdr_t* dest, const enq_hdr_t* src);
+bool is_enq_transient(const enq_hdr_t *eh);
+void set_enq_transient(enq_hdr_t *eh, const bool transient);
+bool is_enq_external(const enq_hdr_t *eh);
+void set_enq_external(enq_hdr_t *eh, const bool external);
+bool validate_enq_hdr(enq_hdr_t *eh, const uint32_t magic, const uint16_t version, const uint64_t rid);
+
+#pragma pack()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ifndef QPID_LINEARSTORE_JOURNAL_UTILS_ENQ_HDR_H */
diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/file_hdr.c b/qpid/cpp/src/qpid/linearstore/journal/utils/file_hdr.c
new file mode 100644
index 0000000000..4e6cf1b8fa
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/utils/file_hdr.c
@@ -0,0 +1,115 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "file_hdr.h"
+#include <string.h>
+
+void file_hdr_create(file_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t fhdr_size_sblks,
+ const uint16_t efp_partition, const uint64_t file_size) {
+ rec_hdr_init(&dest->_rhdr, magic, version, 0, 0, 0);
+ dest->_fhdr_size_sblks = fhdr_size_sblks;
+ dest->_efp_partition = efp_partition;
+ dest->_reserved = 0;
+ dest->_data_size_kib = file_size;
+ dest->_fro = 0;
+ dest->_ts_nsec = 0;
+ dest->_ts_sec = 0;
+ dest->_file_number = 0;
+ dest->_queue_name_len = 0;
+}
+
+int file_hdr_init(void* dest, const uint64_t dest_len, const uint16_t uflag, const uint64_t serial, const uint64_t rid,
+ const uint64_t fro, const uint64_t file_number, const uint16_t queue_name_len, const char* queue_name) {
+ file_hdr_t* fhp = (file_hdr_t*)dest;
+ fhp->_rhdr._uflag = uflag;
+ fhp->_rhdr._serial = serial;
+ fhp->_rhdr._rid = rid;
+ fhp->_fro = fro;
+ fhp->_file_number = file_number;
+ if (sizeof(file_hdr_t) + queue_name_len < MAX_FILE_HDR_LEN) {
+ fhp->_queue_name_len = queue_name_len;
+ } else {
+ fhp->_queue_name_len = MAX_FILE_HDR_LEN - sizeof(file_hdr_t);
+ }
+ fhp->_queue_name_len = queue_name_len;
+ memcpy((char*)dest + sizeof(file_hdr_t), queue_name, queue_name_len);
+ memset((char*)dest + sizeof(file_hdr_t) + queue_name_len, 0, dest_len - sizeof(file_hdr_t) - queue_name_len);
+ return set_time_now(dest);
+}
+
+int file_hdr_check(file_hdr_t* hdr, const uint32_t magic, const uint16_t version, const uint64_t data_size_kib, const uint16_t max_queue_name_len) {
+ int err = rec_hdr_check_base(&hdr->_rhdr, magic, version);
+ if (data_size_kib && hdr->_data_size_kib != data_size_kib) err |= 0x1000;
+ if (hdr->_queue_name_len > max_queue_name_len) err |= 0x10000;
+ return err;
+}
+
+void file_hdr_copy(file_hdr_t* dest, const file_hdr_t* src) {
+ rec_hdr_copy(&dest->_rhdr, &src->_rhdr);
+ dest->_fhdr_size_sblks = src->_fhdr_size_sblks; // Should this be copied?
+ dest->_efp_partition = src->_efp_partition; // Should this be copied?
+ dest->_data_size_kib = src->_data_size_kib;
+ dest->_fro = src->_fro;
+ dest->_ts_sec = src->_ts_sec;
+ dest->_ts_nsec = src->_ts_nsec;
+ dest->_file_number = src->_file_number;
+}
+
+void file_hdr_reset(file_hdr_t* target) {
+ target->_rhdr._uflag = 0;
+ target->_rhdr._serial = 0;
+ target->_rhdr._rid = 0;
+ target->_fro = 0;
+ target->_ts_sec = 0;
+ target->_ts_nsec = 0;
+ target->_file_number = 0;
+ target->_queue_name_len = 0;
+}
+
+int is_file_hdr_reset(file_hdr_t* target) {
+ return target->_rhdr._uflag == 0 &&
+ target->_rhdr._serial == 0 &&
+ target->_rhdr._rid == 0 &&
+ target->_ts_sec == 0 &&
+ target->_ts_nsec == 0 &&
+ target->_file_number == 0 &&
+ target->_queue_name_len == 0;
+}
+
+int set_time_now(file_hdr_t *fh)
+{
+ struct timespec ts;
+ int err = clock_gettime(CLOCK_REALTIME, &ts);
+ if (err)
+ return err;
+ fh->_ts_sec = ts.tv_sec;
+ fh->_ts_nsec = ts.tv_nsec;
+ return 0;
+}
+
+
+void set_time(file_hdr_t *fh, struct timespec *ts)
+{
+ fh->_ts_sec = ts->tv_sec;
+ fh->_ts_nsec = ts->tv_nsec;
+}
+
+
diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/file_hdr.h b/qpid/cpp/src/qpid/linearstore/journal/utils/file_hdr.h
new file mode 100644
index 0000000000..5987e1871e
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/utils/file_hdr.h
@@ -0,0 +1,111 @@
+#ifndef QPID_LINEARSTORE_JOURNAL_UTILS_FILE_HDR_H
+#define QPID_LINEARSTORE_JOURNAL_UTILS_FILE_HDR_H
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include <time.h>
+#include "rec_hdr.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#define MAX_FILE_HDR_LEN 4096 // Set to 1 sblk
+
+#pragma pack(1)
+
+/**
+ * \brief Struct for data common to the head of all journal files. In addition to
+ * the common data, this includes the record ID and offset of the first record in
+ * the file.
+ *
+ * This header precedes all data in journal files and occupies the first complete
+ * block in the file. The record ID and offset are updated on each overwrite of the
+ * file.
+ *
+ * File header info in binary format (74 bytes + size of file name in octets):
+ * <pre>
+ * 0 7
+ * +---+---+---+---+---+---+---+---+ -+
+ * | magic | ver | flags | |
+ * +---+---+---+---+---+---+---+---+ |
+ * | serial | | struct rec_hdr_t
+ * +---+---+---+---+---+---+---+---+ |
+ * | rid | |
+ * +---+---+---+---+---+---+---+---+ -+
+ * | fhs | partn | reserved |
+ * +---+---+---+---+---+---+---+---+
+ * | data-size |
+ * +---+---+---+---+---+---+---+---+
+ * | fro |
+ * +---+---+---+---+---+---+---+---+
+ * | timestamp (sec) |
+ * +---+---+---+---+---+---+---+---+
+ * | timestamp (ns) |
+ * +---+---+---+---+---+---+---+---+
+ * | file-number |
+ * +---+---+---+---+---+---+---+---+
+ * | qnl | Queue Name... |
+ * +-------+ |
+ * | |
+ * +---+---+---+---+---+---+---+---+
+ *
+ * ver = Journal version
+ * rid = Record ID
+ * fhs = File header size in sblks (defined by JRNL_SBLK_SIZE)
+ * partn = EFP partition from which this file came
+ * fro = First Record Offset
+ * qnl = Length of the queue name in octets.
+ * </pre>
+ */
+typedef struct file_hdr_t {
+ rec_hdr_t _rhdr; /**< Common record header struct, but rid field is used for rid of first compete record in file */
+ uint16_t _fhdr_size_sblks; /**< File header size in sblks (defined by JRNL_SBLK_SIZE) */
+ uint16_t _efp_partition; /**< EFP Partition number from which this file was obtained */
+ uint32_t _reserved;
+ uint64_t _data_size_kib; /**< Size of the data part of this file in KiB. (ie file size excluding file header sblk) */
+ uint64_t _fro; /**< First Record Offset (FRO) */
+ uint64_t _ts_sec; /**< Time stamp (seconds part) */
+ uint64_t _ts_nsec; /**< Time stamp (nanoseconds part) */
+ uint64_t _file_number; /**< The logical number of this file in a monotonically increasing sequence */
+ uint16_t _queue_name_len; /**< Length of the queue name in octets, which follows this struct in the header */
+} file_hdr_t;
+
+void file_hdr_create(file_hdr_t* dest, const uint32_t magic, const uint16_t version,
+ const uint16_t fhdr_size_sblks, const uint16_t efp_partition, const uint64_t file_size);
+int file_hdr_init(void* dest, const uint64_t dest_len, const uint16_t uflag, const uint64_t serial, const uint64_t rid,
+ const uint64_t fro, const uint64_t file_number, const uint16_t queue_name_len,
+ const char* queue_name);
+int file_hdr_check(file_hdr_t* hdr, const uint32_t magic, const uint16_t version, const uint64_t data_size_kib,
+ const uint16_t max_queue_name_len);
+void file_hdr_reset(file_hdr_t* target);
+int is_file_hdr_reset(file_hdr_t* target);
+void file_hdr_copy(file_hdr_t* dest, const file_hdr_t* src);
+int set_time_now(file_hdr_t *fh);
+void set_time(file_hdr_t *fh, struct timespec *ts);
+
+#pragma pack()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ifndef QPID_LINEARSTORE_JOURNAL_UTILS_FILE_HDR_H */
diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/rec_hdr.c b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_hdr.c
new file mode 100644
index 0000000000..32eda8de5a
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_hdr.c
@@ -0,0 +1,51 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "rec_hdr.h"
+
+void rec_hdr_init(rec_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag, const uint64_t serial, const uint64_t rid) {
+ dest->_magic = magic;
+ dest->_version = version;
+ dest->_uflag = uflag;
+ dest->_serial = serial;
+ dest->_rid = rid;
+}
+
+void rec_hdr_copy(rec_hdr_t* dest, const rec_hdr_t* src) {
+ dest->_magic = src->_magic;
+ dest->_version = src->_version;
+ dest->_uflag = src->_uflag;
+ dest->_serial = src->_serial;
+ dest->_rid = src->_rid;
+}
+
+int rec_hdr_check_base(rec_hdr_t* header, const uint32_t magic, const uint16_t version) {
+ int err = 0;
+ if (header->_magic != magic) err |= 0x1;
+ if (header->_version != version) err |= 0x10;
+ return err;
+}
+
+int rec_hdr_check(rec_hdr_t* header, const uint32_t magic, const uint16_t version, const uint64_t serial) {
+ int err = rec_hdr_check_base(header, magic, version);
+ if (header->_serial != serial) err |= 0x100;
+ return err;
+}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/rec_hdr.h b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_hdr.h
new file mode 100644
index 0000000000..64349b5ab8
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_hdr.h
@@ -0,0 +1,72 @@
+#ifndef QPID_LINEARSTORE_JOURNAL_UTILS_REC_HDR_H
+#define QPID_LINEARSTORE_JOURNAL_UTILS_REC_HDR_H
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#pragma pack(1)
+
+/**
+ * \brief Struct for data common to the head of all journal files and records.
+ * This includes identification for the file type, the encoding version, endian
+ * indicator and a record ID.
+ *
+ * File header info in binary format (24 bytes):
+ * <pre>
+ * 0 7
+ * +---+---+---+---+---+---+---+---+
+ * | magic | ver | uflag |
+ * +---+---+---+---+---+---+---+---+
+ * | serial |
+ * +---+---+---+---+---+---+---+---+
+ * | rid |
+ * +---+---+---+---+---+---+---+---+
+ *
+ * ver = file version (If the format or encoding of this file changes, then this
+ * number should be incremented)
+ * rid = Record ID
+ * </pre>
+ */
+typedef struct rec_hdr_t {
+ uint32_t _magic; /**< File type identifier (magic number) */
+ uint16_t _version; /**< File encoding version */
+ uint16_t _uflag; /**< User-defined flags */
+ uint64_t _serial; /**< Serial number for this journal file */
+ uint64_t _rid; /**< Record ID (rotating 64-bit counter) */
+} rec_hdr_t;
+
+void rec_hdr_init(rec_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag, const uint64_t serial, const uint64_t rid);
+void rec_hdr_copy(rec_hdr_t* dest, const rec_hdr_t* src);
+int rec_hdr_check_base(rec_hdr_t* header, const uint32_t magic, const uint16_t version);
+int rec_hdr_check(rec_hdr_t* header, const uint32_t magic, const uint16_t version, const uint64_t serial);
+
+#pragma pack()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ifndef QPID_LINEARSTORE_JOURNAL_UTILS_REC_HDR_H */
diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/rec_tail.c b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_tail.c
new file mode 100644
index 0000000000..7128c96f32
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_tail.c
@@ -0,0 +1,46 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "rec_tail.h"
+
+void rec_tail_init(rec_tail_t* dest, const uint32_t xmagic, const uint32_t checksum, const uint64_t serial,
+ const uint64_t rid) {
+ dest->_xmagic = xmagic;
+ dest->_checksum = checksum;
+ dest->_serial = serial;
+ dest->_rid = rid;
+}
+
+void rec_tail_copy(rec_tail_t* dest, const rec_hdr_t* src, const uint32_t checksum) {
+ dest->_xmagic = ~(src->_magic);
+ dest->_checksum = checksum;
+ dest->_serial = src->_serial;
+ dest->_rid = src->_rid;
+}
+
+uint16_t rec_tail_check(const rec_tail_t* tail, const rec_hdr_t* header, const uint32_t checksum) {
+ uint16_t err = 0;
+ if (tail->_xmagic != ~header->_magic) err |= REC_TAIL_MAGIC_ERR_MASK;
+ if (tail->_serial != header->_serial) err |= REC_TAIL_SERIAL_ERR_MASK;
+ if (tail->_rid != header->_rid) err |= REC_TAIL_RID_ERR_MASK;
+ if (tail->_checksum != checksum) err |= REC_TAIL_CHECKSUM_ERR_MASK;
+ return err;
+}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/rec_tail.h b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_tail.h
new file mode 100644
index 0000000000..afc71c104a
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_tail.h
@@ -0,0 +1,82 @@
+#ifndef QPID_LINEARSTORE_JOURNAL_UTILS_REC_TAIL_H
+#define QPID_LINEARSTORE_JOURNAL_UTILS_REC_TAIL_H
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include <stdint.h>
+#include "rec_hdr.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#pragma pack(1)
+
+/**
+ * \brief Struct for data common to the tail of all records. The magic number
+ * used here is the binary inverse (1's complement) of the magic used in the
+ * record header; this minimizes possible confusion with other headers that may
+ * be present during recovery. The tail is used with all records that have either
+ * XIDs or data - ie any size-variable content. Currently the only records that
+ * do NOT use the tail are non-transactional dequeues and filler records.
+ *
+ * The checksum is used to verify the xid and/or data portion of the record
+ * on recovery, and excludes the header and tail.
+ *
+ * Record header info in binary format (24 bytes):
+ * <pre>
+ * 0 7
+ * +---+---+---+---+---+---+---+---+
+ * | ~(magic) | checksum |
+ * +---+---+---+---+---+---+---+---+
+ * | serial |
+ * +---+---+---+---+---+---+---+---+
+ * | rid |
+ * +---+---+---+---+---+---+---+---+
+ *
+ * ~(magic) = 1's compliment of magic of matching record header
+ * rid = Record ID of matching record header
+ * </pre>
+ */
+typedef struct rec_tail_t {
+ uint32_t _xmagic; /**< Binary inverse (1's complement) of hdr magic number */
+ uint32_t _checksum; /**< Checksum of xid and data (excluding header itself) */
+ uint64_t _serial; /**< Serial number for this journal file */
+ uint64_t _rid; /**< Record ID (rotating 64-bit counter) */
+} rec_tail_t;
+
+static const uint16_t REC_TAIL_MAGIC_ERR_MASK = 0x01;
+static const uint16_t REC_TAIL_SERIAL_ERR_MASK = 0x02;
+static const uint16_t REC_TAIL_RID_ERR_MASK = 0x04;
+static const uint16_t REC_TAIL_CHECKSUM_ERR_MASK = 0x08;
+
+void rec_tail_init(rec_tail_t* dest, const uint32_t xmagic, const uint32_t checksum, const uint64_t serial,
+ const uint64_t rid);
+void rec_tail_copy(rec_tail_t* dest, const rec_hdr_t* src, const uint32_t checksum);
+uint16_t rec_tail_check(const rec_tail_t* tail, const rec_hdr_t* header, const uint32_t checksum);
+
+#pragma pack()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ifnedf QPID_LINEARSTORE_JOURNAL_UTILS_REC_TAIL_H */
diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/txn_hdr.c b/qpid/cpp/src/qpid/linearstore/journal/utils/txn_hdr.c
new file mode 100644
index 0000000000..58d4cdebe4
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/utils/txn_hdr.c
@@ -0,0 +1,33 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "txn_hdr.h"
+
+void txn_hdr_init(txn_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag,
+ const uint64_t serial, const uint64_t rid, const uint64_t xidsize) {
+ rec_hdr_init(&dest->_rhdr, magic, version, uflag, serial, rid);
+ dest->_xidsize = xidsize;
+}
+
+void txn_hdr_copy(txn_hdr_t* dest, const txn_hdr_t* src) {
+ rec_hdr_copy(&dest->_rhdr, &src->_rhdr);
+ dest->_xidsize = src->_xidsize;
+}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/txn_hdr.h b/qpid/cpp/src/qpid/linearstore/journal/utils/txn_hdr.h
new file mode 100644
index 0000000000..442a1d373d
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/utils/txn_hdr.h
@@ -0,0 +1,72 @@
+#ifndef QPID_LINEARSTORE_JOURNAL_UTILS_TXN_HDR_H
+#define QPID_LINEARSTORE_JOURNAL_UTILS_TXN_HDR_H
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "rec_hdr.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#pragma pack(1)
+
+/**
+ * \brief Struct for transaction commit and abort records.
+ *
+ * Struct for local and DTX commit and abort records. Only the magic distinguishes between them.
+ * Since this record must be used in the context of a valid XID, the xidsize field must not be
+ * zero. Immediately following this record is the XID itself which is xidsize bytes long,
+ * followed by a rec_tail.
+ *
+ * Note that this record had its own rid distinct from the rids of the record(s) making up the
+ * transaction it is committing or aborting.
+ *
+ * Record header info in binary format (32 bytes):
+ * <pre>
+ * 0 7
+ * +---+---+---+---+---+---+---+---+ -+
+ * | magic | ver | flags | |
+ * +---+---+---+---+---+---+---+---+ |
+ * | serial | | struct rec_hdr_t
+ * +---+---+---+---+---+---+---+---+ |
+ * | rid | |
+ * +---+---+---+---+---+---+---+---+ -+
+ * | xidsize |
+ * +---+---+---+---+---+---+---+---+
+ * </pre>
+ */
+typedef struct txn_hdr_t {
+ rec_hdr_t _rhdr; /**< Common record header struct */
+ uint64_t _xidsize; /**< XID size */
+} txn_hdr_t;
+
+void txn_hdr_init(txn_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag,
+ const uint64_t serial, const uint64_t rid, const uint64_t xidsize);
+void txn_hdr_copy(txn_hdr_t* dest, const txn_hdr_t* src);
+
+#pragma pack()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ifndef QPID_LINEARSTORE_JOURNAL_UTILS_TXN_HDR_H */
diff --git a/qpid/cpp/src/qpid/linearstore/journal/wmgr.cpp b/qpid/cpp/src/qpid/linearstore/journal/wmgr.cpp
new file mode 100644
index 0000000000..1ff18da663
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/wmgr.cpp
@@ -0,0 +1,1086 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/linearstore/journal/wmgr.h"
+
+#include <cassert>
+#include "qpid/linearstore/journal/aio_callback.h"
+#include "qpid/linearstore/journal/Checksum.h"
+#include "qpid/linearstore/journal/data_tok.h"
+#include "qpid/linearstore/journal/jcntl.h"
+#include "qpid/linearstore/journal/JournalFile.h"
+#include "qpid/linearstore/journal/LinearFileController.h"
+#include "qpid/linearstore/journal/utils/file_hdr.h"
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+wmgr::wmgr(jcntl* jc,
+ enq_map& emap,
+ txn_map& tmap,
+ LinearFileController& lfc):
+ pmgr(jc, emap, tmap),
+ _lfc(lfc),
+ _max_dtokpp(0),
+ _max_io_wait_us(0),
+ _cached_offset_dblks(0),
+ _enq_busy(false),
+ _deq_busy(false),
+ _abort_busy(false),
+ _commit_busy(false),
+ _txn_pending_map()
+{}
+
+wmgr::wmgr(jcntl* jc,
+ enq_map& emap,
+ txn_map& tmap,
+ LinearFileController& lfc,
+ const uint32_t max_dtokpp,
+ const uint32_t max_iowait_us):
+ pmgr(jc, emap, tmap),
+ _lfc(lfc),
+ _max_dtokpp(max_dtokpp),
+ _max_io_wait_us(max_iowait_us),
+ _cached_offset_dblks(0),
+ _enq_busy(false),
+ _deq_busy(false),
+ _abort_busy(false),
+ _commit_busy(false),
+ _txn_pending_map()
+{}
+
+wmgr::~wmgr()
+{
+ wmgr::clean();
+}
+
+void
+wmgr::initialize(aio_callback* const cbp,
+ const uint32_t wcache_pgsize_sblks,
+ const uint16_t wcache_num_pages,
+ const uint32_t max_dtokpp,
+ const uint32_t max_iowait_us,
+ std::size_t end_offset)
+{
+ _enq_busy = false;
+ _deq_busy = false;
+ _abort_busy = false;
+ _commit_busy = false;
+ _max_dtokpp = max_dtokpp;
+ _max_io_wait_us = max_iowait_us;
+
+ initialize(cbp, wcache_pgsize_sblks, wcache_num_pages);
+
+ if (end_offset)
+ {
+ if(!aio::is_aligned((const void*)end_offset, QLS_AIO_ALIGN_BOUNDARY_BYTES)) {
+ std::ostringstream oss;
+ oss << "Recovery using misaligned end_offset (0x" << std::hex << end_offset << std::dec << ")" << std::endl;
+ throw jexception(jerrno::JERR_WMGR_NOTSBLKALIGNED, oss.str(), "wmgr", "initialize");
+ }
+ const uint32_t wr_pg_size_dblks = _cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS;
+ uint32_t data_dblks = (end_offset / QLS_DBLK_SIZE_BYTES) - (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_DBLKS); // exclude file header
+ _pg_cntr = data_dblks / wr_pg_size_dblks; // Must be set to get file rotation synchronized (this is determined by value of _pg_cntr)
+ _pg_offset_dblks = data_dblks - (_pg_cntr * wr_pg_size_dblks);
+ }
+}
+
+iores
+wmgr::enqueue(const void* const data_buff,
+ const std::size_t tot_data_len,
+ const std::size_t this_data_len,
+ data_tok* dtokp,
+ const void* const xid_ptr,
+ const std::size_t xid_len,
+ const bool tpc_flag,
+ const bool transient,
+ const bool external)
+{
+//std::cout << _lfc.status(10) << std::endl;
+ if (xid_len)
+ assert(xid_ptr != 0);
+
+ if (_deq_busy || _abort_busy || _commit_busy) {
+ std::ostringstream oss;
+ oss << "RHM_IORES_BUSY: enqueue while part way through another op:";
+ oss << " _deq_busy=" << (_deq_busy?"T":"F");
+ oss << " _abort_busy=" << (_abort_busy?"T":"F");
+ oss << " _commit_busy=" << (_commit_busy?"T":"F");
+ throw jexception(oss.str()); // TODO: complete exception
+ }
+
+ if (this_data_len != tot_data_len && !external) {
+ throw jexception("RHM_IORES_NOTIMPL: partial enqueues not implemented"); // TODO: complete exception;
+ }
+
+ iores res = pre_write_check(WMGR_ENQUEUE, dtokp, xid_len, tot_data_len, external);
+ if (res != RHM_IORES_SUCCESS)
+ return res;
+
+ bool cont = false;
+ if (_enq_busy) // If enqueue() exited last time with RHM_IORES_FULL or RHM_IORES_PAGE_AIOWAIT
+ {
+ if (dtokp->wstate() == data_tok::ENQ_PART)
+ cont = true;
+ else
+ {
+ std::ostringstream oss;
+ oss << "This data_tok: id=" << dtokp->id() << " state=" << dtokp->wstate_str();
+ throw jexception(jerrno::JERR_WMGR_ENQDISCONT, oss.str(), "wmgr", "enqueue");
+ }
+ }
+
+ uint64_t rid = (dtokp->external_rid() | cont) ? dtokp->rid() : _lfc.getNextRecordId();
+ _enq_rec.reset(_lfc.getCurrentSerial(), rid, data_buff, tot_data_len, xid_ptr, xid_len, transient, external);
+ if (!cont)
+ {
+ dtokp->set_rid(rid);
+ dtokp->set_dequeue_rid(0);
+ if (xid_len)
+ dtokp->set_xid(xid_ptr, xid_len);
+ else
+ dtokp->clear_xid();
+ _enq_busy = true;
+ }
+//std::cout << "---+++ wmgr::enqueue() ENQ rid=0x" << std::hex << rid << " po=0x" << _pg_offset_dblks << " cs=0x" << (_cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS) << " " << std::dec << std::flush; // DEBUG
+ bool done = false;
+ Checksum checksum;
+ while (!done)
+ {
+//std::cout << "*" << std::flush; // DEBUG
+ assert(_pg_offset_dblks < _cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS);
+ void* wptr = (void*)((char*)_page_ptr_arr[_pg_index] + _pg_offset_dblks * QLS_DBLK_SIZE_BYTES);
+ uint32_t data_offs_dblks = dtokp->dblocks_written();
+ uint32_t ret = _enq_rec.encode(wptr, data_offs_dblks,
+ (_cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS) - _pg_offset_dblks, checksum);
+
+ // Remember fid which contains the record header in case record is split over several files
+ if (data_offs_dblks == 0) {
+ dtokp->set_fid(_lfc.getCurrentFileSeqNum());
+ }
+ _pg_offset_dblks += ret;
+ _cached_offset_dblks += ret;
+ dtokp->incr_dblocks_written(ret);
+ dtokp->incr_pg_cnt();
+ _page_cb_arr[_pg_index]._pdtokl->push_back(dtokp);
+
+ // Is the encoding of this record complete?
+ if (dtokp->dblocks_written() >= _enq_rec.rec_size_dblks())
+ {
+//std::cout << "!" << std::flush; // DEBUG
+ // TODO: Incorrect - must set state to ENQ_CACHED; ENQ_SUBM is set when AIO returns.
+ dtokp->set_wstate(data_tok::ENQ_SUBM);
+ dtokp->set_dsize(tot_data_len);
+ // Only add this data token to page token list when submit is complete, this way
+ // long multi-page messages have their token on the page containing the END of the
+ // message. AIO callbacks will then only process this token when entire message is
+ // enqueued.
+ _lfc.incrEnqueuedRecordCount(dtokp->fid());
+//std::cout << "[0x" << std::hex << _lfc.getEnqueuedRecordCount(dtokp->fid()) << std::dec << std::flush; // DEBUG
+
+ if (xid_len) // If part of transaction, add to transaction map
+ {
+ std::string xid((const char*)xid_ptr, xid_len);
+ _tmap.insert_txn_data(xid, txn_data_t(rid, 0, dtokp->fid(), 0, true, tpc_flag, false));
+ }
+ else
+ {
+ if (_emap.insert_pfid(rid, dtokp->fid(), 0) < enq_map::EMAP_OK) // fail
+ {
+ // The only error code emap::insert_pfid() returns is enq_map::EMAP_DUP_RID.
+ std::ostringstream oss;
+ oss << std::hex << "rid=0x" << rid << " _pfid=0x" << dtokp->fid();
+ throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "wmgr", "enqueue");
+ }
+ }
+
+ done = true;
+ } else {
+//std::cout << "$" << std::flush; // DEBUG
+ dtokp->set_wstate(data_tok::ENQ_PART);
+ }
+
+ file_header_check(rid, cont, _enq_rec.rec_size_dblks() - data_offs_dblks);
+ flush_check(res, cont, done, rid);
+ }
+ if (dtokp->wstate() >= data_tok::ENQ_SUBM)
+ _enq_busy = false;
+//std::cout << " res=" << iores_str(res) << " _enq_busy=" << (_enq_busy?"T":"F") << std::endl << std::flush; // DEBUG
+ return res;
+}
+
+iores
+wmgr::dequeue(data_tok* dtokp,
+ const void* const xid_ptr,
+ const std::size_t xid_len,
+ const bool tpc_flag,
+ const bool txn_coml_commit)
+{
+ if (xid_len)
+ assert(xid_ptr != 0);
+
+ if (_enq_busy || _abort_busy || _commit_busy) {
+ std::ostringstream oss;
+ oss << "RHM_IORES_BUSY: dequeue while part way through another op:";
+ oss << " _enq_busy=" << (_enq_busy?"T":"F");
+ oss << " _abort_busy=" << (_abort_busy?"T":"F");
+ oss << " _commit_busy=" << (_commit_busy?"T":"F");
+ throw jexception(oss.str()); // TODO: complete exception
+ }
+
+ iores res = pre_write_check(WMGR_DEQUEUE, dtokp);
+ if (res != RHM_IORES_SUCCESS)
+ return res;
+
+ bool cont = false;
+ if (_deq_busy) // If dequeue() exited last time with RHM_IORES_FULL or RHM_IORES_PAGE_AIOWAIT
+ {
+ if (dtokp->wstate() == data_tok::DEQ_PART)
+ cont = true;
+ else
+ {
+ std::ostringstream oss;
+ oss << "This data_tok: id=" << dtokp->id() << " state=" << dtokp->wstate_str();
+ throw jexception(jerrno::JERR_WMGR_DEQDISCONT, oss.str(), "wmgr", "dequeue");
+ }
+ }
+
+ const bool ext_rid = dtokp->external_rid();
+ uint64_t rid = (ext_rid | cont) ? dtokp->rid() : _lfc.getNextRecordId();
+ uint64_t dequeue_rid = (ext_rid | cont) ? dtokp->dequeue_rid() : dtokp->rid();
+ _deq_rec.reset(_lfc.getCurrentSerial(), rid, dequeue_rid, xid_ptr, xid_len, txn_coml_commit);
+ if (!cont)
+ {
+ if (!ext_rid)
+ {
+ dtokp->set_rid(rid);
+ dtokp->set_dequeue_rid(dequeue_rid);
+ }
+ if (xid_len)
+ dtokp->set_xid(xid_ptr, xid_len);
+ else
+ dtokp->clear_xid();
+ dequeue_check(dtokp->xid(), dequeue_rid);
+ dtokp->set_dblocks_written(0); // Reset dblks_written from previous op
+ _deq_busy = true;
+ }
+//std::cout << "---+++ wmgr::dequeue() DEQ rid=0x" << std::hex << rid << " drid=0x" << dequeue_rid << " " << std::dec << std::flush; // DEBUG
+ std::string xid((const char*)xid_ptr, xid_len);
+ bool done = false;
+ Checksum checksum;
+ while (!done)
+ {
+//std::cout << "*" << std::flush; // DEBUG
+ assert(_pg_offset_dblks < _cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS);
+ void* wptr = (void*)((char*)_page_ptr_arr[_pg_index] + _pg_offset_dblks * QLS_DBLK_SIZE_BYTES);
+ uint32_t data_offs_dblks = dtokp->dblocks_written();
+ uint32_t ret = _deq_rec.encode(wptr, data_offs_dblks,
+ (_cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS) - _pg_offset_dblks, checksum);
+
+ if (data_offs_dblks == 0) {
+ uint64_t fid;
+ short eres = _emap.get_pfid(dtokp->dequeue_rid(), fid);
+ if (eres == enq_map::EMAP_OK) {
+ dtokp->set_fid(fid);
+ } else if (xid_len > 0) {
+ txn_data_list_t tdl = _tmap.get_tdata_list(xid);
+ bool found = false;
+ for (tdl_const_itr_t i=tdl.begin(); i!=tdl.end() && !found; ++i) {
+ if (i->rid_ == dtokp->dequeue_rid()) {
+ found = true;
+ dtokp->set_fid(i->fid_);
+ break;
+ }
+ }
+ if (!found) {
+ throw jexception("rid found in neither emap nor tmap, transactional");
+ }
+ } else {
+ throw jexception("rid not found in emap, non-transactional");
+ }
+ }
+ _pg_offset_dblks += ret;
+ _cached_offset_dblks += ret;
+ dtokp->incr_dblocks_written(ret);
+ dtokp->incr_pg_cnt();
+ _page_cb_arr[_pg_index]._pdtokl->push_back(dtokp);
+
+ // Is the encoding of this record complete?
+ if (dtokp->dblocks_written() >= _deq_rec.rec_size_dblks())
+ {
+//std::cout << "!" << std::flush; // DEBUG
+ // TODO: Incorrect - must set state to ENQ_CACHED; ENQ_SUBM is set when AIO returns.
+ dtokp->set_wstate(data_tok::DEQ_SUBM);
+
+ if (xid_len) // If part of transaction, add to transaction map
+ {
+ // If the enqueue is part of a pending txn, it will not yet be in emap
+ _emap.lock(dequeue_rid); // ignore rid not found error
+ std::string xid((const char*)xid_ptr, xid_len);
+ _tmap.insert_txn_data(xid, txn_data_t(rid, dequeue_rid, dtokp->fid(), 0, false, tpc_flag, false));
+ }
+ else
+ {
+ uint64_t fid;
+ short eres = _emap.get_remove_pfid(dtokp->dequeue_rid(), fid);
+ if (eres < enq_map::EMAP_OK) // fail
+ {
+ if (eres == enq_map::EMAP_RID_NOT_FOUND)
+ {
+ std::ostringstream oss;
+ oss << std::hex << "emap: rid=0x" << rid;
+ throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "wmgr", "dequeue");
+ }
+ if (eres == enq_map::EMAP_LOCKED)
+ {
+ std::ostringstream oss;
+ oss << std::hex << "rid=0x" << rid;
+ throw jexception(jerrno::JERR_MAP_LOCKED, oss.str(), "wmgr", "dequeue");
+ }
+ }
+ }
+
+ done = true;
+ } else {
+//std::cout << "$" << std::flush; // DEBUG
+ dtokp->set_wstate(data_tok::DEQ_PART);
+ }
+
+ file_header_check(rid, cont, _deq_rec.rec_size_dblks() - data_offs_dblks);
+ flush_check(res, cont, done, rid);
+ }
+ if (dtokp->wstate() >= data_tok::DEQ_SUBM)
+ _deq_busy = false;
+//std::cout << " res=" << iores_str(res) << " _deq_busy=" << (_deq_busy?"T":"F") << std::endl << std::flush; // DEBUG
+ return res;
+}
+
+iores
+wmgr::abort(data_tok* dtokp,
+ const void* const xid_ptr,
+ const std::size_t xid_len)
+{
+ // commit and abort MUST have a valid xid
+ assert(xid_ptr != 0 && xid_len > 0);
+
+ if (_enq_busy || _deq_busy || _commit_busy) {
+ std::ostringstream oss;
+ oss << "RHM_IORES_BUSY: abort while part way through another op:";
+ oss << " _enq_busy=" << (_enq_busy?"T":"F");
+ oss << " _deq_busy=" << (_deq_busy?"T":"F");
+ oss << " _commit_busy=" << (_commit_busy?"T":"F");
+ throw jexception(oss.str()); // TODO: complete exception
+ }
+
+ iores res = pre_write_check(WMGR_ABORT, dtokp);
+ if (res != RHM_IORES_SUCCESS)
+ return res;
+
+ bool cont = false;
+ if (_abort_busy) // If abort() exited last time with RHM_IORES_FULL or RHM_IORES_PAGE_AIOWAIT
+ {
+ if (dtokp->wstate() == data_tok::ABORT_PART)
+ cont = true;
+ else
+ {
+ std::ostringstream oss;
+ oss << "This data_tok: id=" << dtokp->id() << " state=" << dtokp->wstate_str();
+ throw jexception(jerrno::JERR_WMGR_DEQDISCONT, oss.str(), "wmgr", "abort");
+ }
+ }
+
+ uint64_t rid = (dtokp->external_rid() | cont) ? dtokp->rid() : _lfc.getNextRecordId();
+ _txn_rec.reset(false, _lfc.getCurrentSerial(), rid, xid_ptr, xid_len);
+ if (!cont)
+ {
+ dtokp->set_rid(rid);
+ dtokp->set_dequeue_rid(0);
+ dtokp->set_xid(xid_ptr, xid_len);
+ dtokp->set_dblocks_written(0); // Reset dblks_written from previous op
+ _abort_busy = true;
+ }
+ bool done = false;
+ Checksum checksum;
+ while (!done)
+ {
+ assert(_pg_offset_dblks < _cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS);
+ void* wptr = (void*)((char*)_page_ptr_arr[_pg_index] + _pg_offset_dblks * QLS_DBLK_SIZE_BYTES);
+ uint32_t data_offs_dblks = dtokp->dblocks_written();
+ uint32_t ret = _txn_rec.encode(wptr, data_offs_dblks,
+ (_cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS) - _pg_offset_dblks, checksum);
+
+ // Remember fid which contains the record header in case record is split over several files
+ if (data_offs_dblks == 0)
+ dtokp->set_fid(_lfc.getCurrentFileSeqNum());
+ _pg_offset_dblks += ret;
+ _cached_offset_dblks += ret;
+ dtokp->incr_dblocks_written(ret);
+ dtokp->incr_pg_cnt();
+ _page_cb_arr[_pg_index]._pdtokl->push_back(dtokp);
+
+ // Is the encoding of this record complete?
+ if (dtokp->dblocks_written() >= _txn_rec.rec_size_dblks())
+ {
+ dtokp->set_wstate(data_tok::ABORT_SUBM);
+
+ // Delete this txn from tmap, unlock any locked records in emap
+ std::string xid((const char*)xid_ptr, xid_len);
+ txn_data_list_t tdl = _tmap.get_remove_tdata_list(xid); // tdl will be empty if xid not found
+ fidl_t fidl;
+ for (tdl_itr_t itr = tdl.begin(); itr != tdl.end(); itr++)
+ {
+ if (!itr->enq_flag_)
+ _emap.unlock(itr->drid_); // ignore rid not found error
+ if (itr->enq_flag_) {
+ fidl.push_back(itr->fid_);
+ }
+ }
+ std::pair<pending_txn_map_itr_t, bool> res = _txn_pending_map.insert(std::pair<std::string, fidl_t>(xid, fidl));
+ if (!res.second)
+ {
+ std::ostringstream oss;
+ oss << std::hex << "_txn_pending_set: xid=\"" << xid << "\"";
+ throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "wmgr", "abort");
+ }
+
+ done = true;
+ } else {
+ dtokp->set_wstate(data_tok::ABORT_PART);
+ }
+
+ file_header_check(rid, cont, _txn_rec.rec_size_dblks() - data_offs_dblks);
+ flush_check(res, cont, done, rid);
+ }
+ if (dtokp->wstate() >= data_tok::ABORT_SUBM)
+ _abort_busy = false;
+ return res;
+}
+
+iores
+wmgr::commit(data_tok* dtokp,
+ const void* const xid_ptr,
+ const std::size_t xid_len)
+{
+ // commit and abort MUST have a valid xid
+ assert(xid_ptr != 0 && xid_len > 0);
+
+ if (_enq_busy || _deq_busy || _abort_busy) {
+ std::ostringstream oss;
+ oss << "RHM_IORES_BUSY: commit while part way through another op:";
+ oss << " _enq_busy=" << (_enq_busy?"T":"F");
+ oss << " _deq_busy=" << (_deq_busy?"T":"F");
+ oss << " _abort_busy=" << (_abort_busy?"T":"F");
+ throw jexception(oss.str()); // TODO: complete exception
+ }
+
+ iores res = pre_write_check(WMGR_COMMIT, dtokp);
+ if (res != RHM_IORES_SUCCESS)
+ return res;
+
+ bool cont = false;
+ if (_commit_busy) // If commit() exited last time with RHM_IORES_FULL or RHM_IORES_PAGE_AIOWAIT
+ {
+ if (dtokp->wstate() == data_tok::COMMIT_PART)
+ cont = true;
+ else
+ {
+ std::ostringstream oss;
+ oss << "This data_tok: id=" << dtokp->id() << " state=" << dtokp->wstate_str();
+ throw jexception(jerrno::JERR_WMGR_DEQDISCONT, oss.str(), "wmgr", "commit");
+ }
+ }
+
+ uint64_t rid = (dtokp->external_rid() | cont) ? dtokp->rid() : _lfc.getNextRecordId();
+ _txn_rec.reset(true, _lfc.getCurrentSerial(), rid, xid_ptr, xid_len);
+ if (!cont)
+ {
+ dtokp->set_rid(rid);
+ dtokp->set_dequeue_rid(0);
+ dtokp->set_xid(xid_ptr, xid_len);
+ dtokp->set_dblocks_written(0); // Reset dblks_written from previous op
+ _commit_busy = true;
+ }
+ bool done = false;
+ Checksum checksum;
+ while (!done)
+ {
+ assert(_pg_offset_dblks < _cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS);
+ void* wptr = (void*)((char*)_page_ptr_arr[_pg_index] + _pg_offset_dblks * QLS_DBLK_SIZE_BYTES);
+ uint32_t data_offs_dblks = dtokp->dblocks_written();
+ uint32_t ret = _txn_rec.encode(wptr, data_offs_dblks,
+ (_cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS) - _pg_offset_dblks, checksum);
+
+ // Remember fid which contains the record header in case record is split over several files
+ if (data_offs_dblks == 0)
+ dtokp->set_fid(_lfc.getCurrentFileSeqNum());
+ _pg_offset_dblks += ret;
+ _cached_offset_dblks += ret;
+ dtokp->incr_dblocks_written(ret);
+ dtokp->incr_pg_cnt();
+ _page_cb_arr[_pg_index]._pdtokl->push_back(dtokp);
+
+ // Is the encoding of this record complete?
+ if (dtokp->dblocks_written() >= _txn_rec.rec_size_dblks())
+ {
+ dtokp->set_wstate(data_tok::COMMIT_SUBM);
+
+ // Delete this txn from tmap, process records into emap
+ std::string xid((const char*)xid_ptr, xid_len);
+ txn_data_list_t tdl = _tmap.get_remove_tdata_list(xid); // tdl will be empty if xid not found
+ fidl_t fidl;
+ for (tdl_itr_t itr = tdl.begin(); itr != tdl.end(); itr++)
+ {
+ if (itr->enq_flag_) // txn enqueue
+ {
+ if (_emap.insert_pfid(itr->rid_, itr->fid_, 0) < enq_map::EMAP_OK) // fail
+ {
+ // The only error code emap::insert_pfid() returns is enq_map::EMAP_DUP_RID.
+ std::ostringstream oss;
+ oss << std::hex << "rid=0x" << itr->rid_ << " _pfid=0x" << itr->fid_;
+ throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "wmgr", "commit");
+ }
+ }
+ else // txn dequeue
+ {
+ uint64_t fid;
+ short eres = _emap.get_remove_pfid(itr->drid_, fid, true);
+ if (eres < enq_map::EMAP_OK) // fail
+ {
+ if (eres == enq_map::EMAP_RID_NOT_FOUND)
+ {
+ std::ostringstream oss;
+ oss << std::hex << "emap: rid=0x" << itr->drid_;
+ throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "wmgr", "commit");
+ }
+ if (eres == enq_map::EMAP_LOCKED)
+ {
+ std::ostringstream oss;
+ oss << std::hex << "rid=0x" << itr->drid_;
+ throw jexception(jerrno::JERR_MAP_LOCKED, oss.str(), "wmgr", "commit");
+ }
+ }
+ fidl.push_back(fid);
+ }
+ }
+ std::pair<pending_txn_map_itr_t, bool> res = _txn_pending_map.insert(std::pair<std::string, fidl_t>(xid, fidl));
+ if (!res.second)
+ {
+ std::ostringstream oss;
+ oss << std::hex << "_txn_pending_set: xid=\"" << xid << "\"";
+ throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "wmgr", "commit");
+ }
+
+ done = true;
+ } else {
+ dtokp->set_wstate(data_tok::COMMIT_PART);
+ }
+
+ file_header_check(rid, cont, _txn_rec.rec_size_dblks() - data_offs_dblks);
+ flush_check(res, cont, done, rid);
+ }
+ if (dtokp->wstate() >= data_tok::COMMIT_SUBM)
+ _commit_busy = false;
+ return res;
+}
+
+void
+wmgr::file_header_check(const uint64_t rid,
+ const bool cont,
+ const uint32_t rec_dblks_rem)
+{
+ if (_lfc.isEmpty()) // File never written (i.e. no header or data)
+ {
+//std::cout << "e" << std::flush;
+ std::size_t fro = 0;
+ if (cont) {
+ bool file_fit = rec_dblks_rem <= _lfc.dataSize_sblks() * QLS_SBLK_SIZE_DBLKS; // Will fit within this journal file
+ bool file_full = rec_dblks_rem == _lfc.dataSize_sblks() * QLS_SBLK_SIZE_DBLKS; // Will exactly fill this journal file
+ if (file_fit && !file_full) {
+ fro = (rec_dblks_rem + (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_DBLKS)) * QLS_DBLK_SIZE_BYTES;
+ }
+ } else {
+ fro = QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES;
+ }
+ _lfc.asyncFileHeaderWrite(_ioctx, 0, rid, fro);
+ _aio_evt_rem++;
+ }
+}
+
+void
+wmgr::flush_check(iores& res,
+ bool& cont,
+ bool& done, const uint64_t /*rid*/) // DEBUG
+{
+ // Is page is full, flush
+ if (_pg_offset_dblks >= _cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS)
+ {
+//std::cout << "^" << _pg_offset_dblks << ">=" << (_cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS) << std::flush;
+ res = write_flush();
+ assert(res == RHM_IORES_SUCCESS);
+
+ if (_page_cb_arr[_pg_index]._state == AIO_PENDING && !done)
+ {
+ res = RHM_IORES_PAGE_AIOWAIT;
+ done = true;
+ }
+
+ // If file is full, rotate to next file
+ uint32_t dataSize_pgs = _lfc.dataSize_sblks() / _cache_pgsize_sblks;
+ if (_pg_cntr >= dataSize_pgs)
+ {
+//std::cout << _pg_cntr << ">=" << fileSize_pgs << std::flush;
+ get_next_file();
+ if (!done) {
+ cont = true;
+ }
+//std::cout << "***** wmgr::flush_check(): GET NEXT FILE: rid=0x" << std::hex << rid << std::dec << " res=" << iores_str(res) << " cont=" << (cont?"T":"F") << " done=" << (done?"T":"F") << std::endl; // DEBUG
+ }
+ }
+}
+
+iores
+wmgr::flush()
+{
+ iores res = write_flush();
+ uint32_t dataSize_pgs = _lfc.dataSize_sblks() / _cache_pgsize_sblks;
+ if (res == RHM_IORES_SUCCESS && _pg_cntr >= dataSize_pgs) {
+ get_next_file();
+ }
+ return res;
+}
+
+iores
+wmgr::write_flush()
+{
+ iores res = RHM_IORES_SUCCESS;
+ // Don't bother flushing an empty page or one that is still in state AIO_PENDING
+ if (_cached_offset_dblks)
+ {
+ if (_page_cb_arr[_pg_index]._state == AIO_PENDING) {
+//std::cout << "#" << std::flush; // DEBUG
+ res = RHM_IORES_PAGE_AIOWAIT;
+ } else {
+ if (_page_cb_arr[_pg_index]._state != IN_USE)
+ {
+ std::ostringstream oss;
+ oss << "pg_index=" << _pg_index << " state=" << _page_cb_arr[_pg_index].state_str();
+ throw jexception(jerrno::JERR_WMGR_BADPGSTATE, oss.str(), "wmgr", "write_flush");
+ }
+
+ // Send current page using AIO
+
+ // In manual flushes, dblks may not coincide with sblks, add filler records ("RHMx") if necessary.
+ dblk_roundup();
+
+ std::size_t pg_offs = (_pg_offset_dblks - _cached_offset_dblks) * QLS_DBLK_SIZE_BYTES;
+ aio_cb* aiocbp = &_aio_cb_arr[_pg_index];
+ _lfc.asyncPageWrite(_ioctx, aiocbp, (char*)_page_ptr_arr[_pg_index] + pg_offs, _cached_offset_dblks);
+ _page_cb_arr[_pg_index]._state = AIO_PENDING;
+ _aio_evt_rem++;
+//std::cout << "." << _aio_evt_rem << std::flush; // DEBUG
+ _cached_offset_dblks = 0;
+ _jc->instr_incr_outstanding_aio_cnt();
+
+ rotate_page(); // increments _pg_index, resets _pg_offset_dblks if req'd
+ if (_page_cb_arr[_pg_index]._state == UNUSED)
+ _page_cb_arr[_pg_index]._state = IN_USE;
+ }
+ }
+ get_events(0, false);
+ if (_page_cb_arr[_pg_index]._state == UNUSED)
+ _page_cb_arr[_pg_index]._state = IN_USE;
+ return res;
+}
+
+void
+wmgr::get_next_file()
+{
+ _pg_cntr = 0;
+//std::cout << "&&&&& wmgr::get_next_file(): " << status_str() << std::flush << std::endl; // DEBUG
+ _lfc.getNextJournalFile();
+}
+
+int32_t
+wmgr::get_events(timespec* const timeout,
+ bool flush)
+{
+ if (_aio_evt_rem == 0) // no events to get
+ return 0;
+
+ int ret = 0;
+ if ((ret = aio::getevents(_ioctx, flush ? _aio_evt_rem : 1, _aio_evt_rem, _aio_event_arr, timeout)) < 0)
+ {
+ if (ret == -EINTR) // Interrupted by signal
+ return 0;
+ std::ostringstream oss;
+ oss << "io_getevents() failed: " << std::strerror(-ret) << " (" << ret << ") ctx_id=" << _ioctx;
+ oss << " min_nr=" << (flush ? _aio_evt_rem : 1) << " nr=" << _aio_evt_rem;
+ throw jexception(jerrno::JERR__AIO, oss.str(), "wmgr", "get_events");
+ }
+
+ if (ret == 0 && timeout)
+ return jerrno::AIO_TIMEOUT;
+
+ int32_t tot_data_toks = 0;
+ for (int i=0; i<ret; i++) // Index of returned AIOs
+ {
+ if (_aio_evt_rem == 0)
+ {
+ std::ostringstream oss;
+ oss << "_aio_evt_rem; evt " << (i + 1) << " of " << ret;
+ throw jexception(jerrno::JERR__UNDERFLOW, oss.str(), "wmgr", "get_events");
+ }
+ _aio_evt_rem--;
+//std::cout << "'" << _aio_evt_rem; // DEBUG
+ aio_cb* aiocbp = _aio_event_arr[i].obj; // This I/O control block (iocb)
+ page_cb* pcbp = (page_cb*)(aiocbp->data); // This page control block (pcb)
+ long aioret = (long)_aio_event_arr[i].res;
+ if (aioret < 0) {
+ std::ostringstream oss;
+ oss << "AIO write operation failed: " << std::strerror(-aioret) << " (" << aioret << ")" << std::endl;
+ oss << " data=" << _aio_event_arr[i].data << std::endl;
+ oss << " obj=" << _aio_event_arr[i].obj << std::endl;
+ oss << " res=" << _aio_event_arr[i].res << std::endl;
+ oss << " res2=" << _aio_event_arr[i].res2 << std::endl;
+ oss << " iocb->data=" << aiocbp->data << std::endl;
+ oss << " iocb->key=" << aiocbp->key << std::endl;
+ oss << " iocb->aio_lio_opcode=" << aiocbp->aio_lio_opcode << std::endl;
+ oss << " iocb->aio_reqprio=" << aiocbp->aio_reqprio << std::endl;
+ oss << " iocb->aio_fildes=" << aiocbp->aio_fildes << std::endl;
+ oss << " iocb->u.c.buf=" << aiocbp->u.c.buf << std::endl;
+ oss << " iocb->u.c.nbytes=0x" << std::hex << aiocbp->u.c.nbytes << std::dec << " (" << aiocbp->u.c.nbytes << ")" << std::endl;
+ oss << " iocb->u.c.offset=0x" << std::hex << aiocbp->u.c.offset << std::dec << " (" << aiocbp->u.c.offset << ")" << std::endl;
+ oss << " iocb->u.c.flags=0x" << std::hex << aiocbp->u.c.flags << std::dec << " (" << aiocbp->u.c.flags << ")" << std::endl;
+ oss << " iocb->u.c.resfd=" << aiocbp->u.c.resfd << std::endl;
+ if (pcbp) {
+ oss << " Page Control Block: (iocb->data):" << std::endl;
+ oss << " pcb.index=" << pcbp->_index << std::endl;
+ oss << " pcb.state=" << pcbp->_state << " (" << pmgr::page_state_str(pcbp->_state) << ")" << std::endl;
+ oss << " pcb.frid=0x" << std::hex << pcbp->_frid << std::dec << std::endl;
+ oss << " pcb.wdblks=0x" << std::hex << pcbp->_wdblks << std::dec << std::endl;
+ oss << " pcb.pdtokl.size=" << pcbp->_pdtokl->size() << std::endl;
+ oss << " pcb.pbuff=" << pcbp->_pbuff << std::endl;
+ oss << " JournalFile (pcb.jfp):" << std::endl;
+ oss << pcbp->_jfp->status_str(6) << std::endl;
+ } else {
+ file_hdr_t* fhp = (file_hdr_t*)aiocbp->u.c.buf;
+ oss << "fnum=" << fhp->_file_number;
+ oss << " qname=" << std::string((char*)fhp + sizeof(file_hdr_t), fhp->_queue_name_len);
+ }
+ throw jexception(jerrno::JERR__AIO, oss.str(), "wmgr", "get_events");
+ }
+ if (pcbp) // Page writes have pcb
+ {
+//std::cout << "p"; // DEBUG
+ uint32_t s = pcbp->_pdtokl->size();
+ std::vector<data_tok*> dtokl;
+ dtokl.reserve(s);
+ for (uint32_t k=0; k<s; k++)
+ {
+ data_tok* dtokp = pcbp->_pdtokl->at(k);
+ if (dtokp->decr_pg_cnt() == 0)
+ {
+ pending_txn_map_itr_t it;
+ switch (dtokp->wstate())
+ {
+ case data_tok::ENQ_SUBM:
+ dtokl.push_back(dtokp);
+ tot_data_toks++;
+ dtokp->set_wstate(data_tok::ENQ);
+ if (dtokp->has_xid())
+ // Ignoring return value here. A non-zero return can signify that the transaction
+ // has committed or aborted, and which was completed prior to the aio returning.
+ _tmap.set_aio_compl(dtokp->xid(), dtokp->rid());
+ break;
+ case data_tok::DEQ_SUBM:
+ if (!dtokp->has_xid()) {
+ _lfc.decrEnqueuedRecordCount(dtokp->fid());
+ }
+ dtokl.push_back(dtokp);
+ tot_data_toks++;
+ dtokp->set_wstate(data_tok::DEQ);
+ if (dtokp->has_xid())
+ // Ignoring return value - see note above.
+ _tmap.set_aio_compl(dtokp->xid(), dtokp->rid());
+ break;
+ case data_tok::ABORT_SUBM:
+ dtokl.push_back(dtokp);
+ tot_data_toks++;
+ dtokp->set_wstate(data_tok::ABORTED);
+ it = _txn_pending_map.find(dtokp->xid());
+ if (it == _txn_pending_map.end())
+ {
+ std::ostringstream oss;
+ oss << std::hex << "_txn_pending_set: abort xid=\""
+ << qpid::linearstore::journal::jcntl::str2hexnum(dtokp->xid()) << "\"";
+ throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "wmgr", "get_events");
+ }
+ for (fidl_itr_t i=it->second.begin(); i!=it->second.end(); ++i) {
+ _lfc.decrEnqueuedRecordCount(*i);
+ }
+ _txn_pending_map.erase(it);
+ break;
+ case data_tok::COMMIT_SUBM:
+ dtokl.push_back(dtokp);
+ tot_data_toks++;
+ dtokp->set_wstate(data_tok::COMMITTED);
+ it = _txn_pending_map.find(dtokp->xid());
+ if (it == _txn_pending_map.end())
+ {
+ std::ostringstream oss;
+ oss << std::hex << "_txn_pending_set: commit xid=\""
+ << qpid::linearstore::journal::jcntl::str2hexnum(dtokp->xid()) << "\"";
+ throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "wmgr", "get_events");
+ }
+ for (fidl_itr_t i=it->second.begin(); i!=it->second.end(); ++i) {
+ _lfc.decrEnqueuedRecordCount(*i);
+ }
+ _txn_pending_map.erase(it);
+ break;
+ case data_tok::ENQ_PART:
+ case data_tok::DEQ_PART:
+ case data_tok::ABORT_PART:
+ case data_tok::COMMIT_PART:
+ // ignore these
+ break;
+ default:
+ // throw for anything else
+ std::ostringstream oss;
+ oss << "dtok_id=" << dtokp->id() << " dtok_state=" << dtokp->wstate_str();
+ throw jexception(jerrno::JERR_WMGR_BADDTOKSTATE, oss.str(), "wmgr",
+ "get_events");
+ }
+ }
+ }
+
+ // Increment the completed write offset
+ // NOTE: We cannot use _wrfc here, as it may have rotated since submitting count.
+ // Use stored pointer to fcntl in the pcb instead.
+ pcbp->_jfp->addCompletedDblkCount(pcbp->_wdblks);
+ pcbp->_jfp->decrOutstandingAioOperationCount();
+ _jc->instr_decr_outstanding_aio_cnt();
+
+ // Clean up this pcb's data_tok list
+ pcbp->_pdtokl->clear();
+ pcbp->_state = UNUSED;
+//std::cout << "c" << pcbp->_index << pcbp->state_str(); // DEBUG
+
+ // Perform AIO return callback
+ if (_cbp && tot_data_toks)
+ _cbp->wr_aio_cb(dtokl);
+ }
+ else // File header writes have no pcb
+ {
+//std::cout << "f"; // DEBUG
+ file_hdr_t* fhp = (file_hdr_t*)aiocbp->u.c.buf;
+ _lfc.addWriteCompletedDblkCount(fhp->_file_number, QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_DBLKS);
+ _lfc.decrOutstandingAioOperationCount(fhp->_file_number);
+ }
+ }
+
+ return tot_data_toks;
+}
+
+bool
+wmgr::is_txn_synced(const std::string& xid)
+{
+ // Ignore xid not found error here
+ if (_tmap.is_txn_synced(xid) == txn_map::TMAP_NOT_SYNCED)
+ return false;
+ // Check for outstanding commit/aborts
+ pending_txn_map_itr_t it = _txn_pending_map.find(xid);
+ return it == _txn_pending_map.end();
+}
+
+void
+wmgr::initialize(aio_callback* const cbp,
+ const uint32_t wcache_pgsize_sblks,
+ const uint16_t wcache_num_pages)
+{
+
+ pmgr::initialize(cbp, wcache_pgsize_sblks, wcache_num_pages);
+ wmgr::clean();
+ _page_cb_arr[0]._state = IN_USE;
+ _cached_offset_dblks = 0;
+ _enq_busy = false;
+}
+
+iores
+wmgr::pre_write_check(const _op_type op,
+ const data_tok* const dtokp,
+ const std::size_t /*xidsize*/,
+ const std::size_t /*dsize*/,
+ const bool /*external*/) const
+{
+ // Check status of current file
+ // TODO: Replace for LFC
+/*
+ if (!_wrfc.is_wr_reset())
+ {
+ if (!_wrfc.wr_reset())
+ return RHM_IORES_FULL;
+ }
+*/
+
+ // Check status of current page is ok for writing
+ if (_page_cb_arr[_pg_index]._state != IN_USE)
+ {
+ if (_page_cb_arr[_pg_index]._state == UNUSED)
+ _page_cb_arr[_pg_index]._state = IN_USE;
+ else if (_page_cb_arr[_pg_index]._state == AIO_PENDING)
+ return RHM_IORES_PAGE_AIOWAIT;
+ else
+ {
+ std::ostringstream oss;
+ oss << "jrnl=" << _jc->id() << " op=" << _op_str[op];
+ oss << " index=" << _pg_index << " pg_state=" << _page_cb_arr[_pg_index].state_str();
+ throw jexception(jerrno::JERR_WMGR_BADPGSTATE, oss.str(), "wmgr", "pre_write_check");
+ }
+ }
+
+ // operation-specific checks
+ switch (op)
+ {
+ case WMGR_ENQUEUE:
+ {
+ if (!dtokp->is_writable())
+ {
+ std::ostringstream oss;
+ oss << "jrnl=" << _jc->id() << " op=" << _op_str[op];
+ oss << " dtok_id=" << dtokp->id() << " dtok_state=" << dtokp->wstate_str();
+ throw jexception(jerrno::JERR_WMGR_BADDTOKSTATE, oss.str(), "wmgr",
+ "pre_write_check");
+ }
+ }
+ break;
+ case WMGR_DEQUEUE:
+ if (!dtokp->is_dequeueable())
+ {
+ std::ostringstream oss;
+ oss << "jrnl=" << _jc->id() << " op=" << _op_str[op];
+ oss << " dtok_id=" << dtokp->id() << " dtok_state=" << dtokp->wstate_str();
+ throw jexception(jerrno::JERR_WMGR_BADDTOKSTATE, oss.str(), "wmgr",
+ "pre_write_check");
+ }
+ break;
+ case WMGR_ABORT:
+ break;
+ case WMGR_COMMIT:
+ break;
+ }
+
+ return RHM_IORES_SUCCESS;
+}
+
+void
+wmgr::dequeue_check(const std::string& xid,
+ const uint64_t drid)
+{
+ // First check emap
+ bool found = false;
+ uint64_t fid;
+ short eres = _emap.get_pfid(drid, fid);
+ if (eres < enq_map::EMAP_OK) { // fail
+ if (eres == enq_map::EMAP_RID_NOT_FOUND) {
+ if (xid.size()) {
+ found = _tmap.data_exists(xid, drid);
+ }
+ } else if (eres == enq_map::EMAP_LOCKED) {
+ std::ostringstream oss;
+ oss << std::hex << "drid=0x" << drid;
+ throw jexception(jerrno::JERR_MAP_LOCKED, oss.str(), "wmgr", "dequeue_check");
+ }
+ } else {
+ found = true;
+ }
+ if (!found) {
+ std::ostringstream oss;
+ oss << "jrnl=" << _jc->id() << " drid=0x" << std::hex << drid;
+ throw jexception(jerrno::JERR_WMGR_DEQRIDNOTENQ, oss.str(), "wmgr", "dequeue_check");
+ }
+}
+
+void
+wmgr::dblk_roundup()
+{
+ const uint32_t xmagic = QLS_EMPTY_MAGIC;
+ uint32_t wdblks = jrec::size_blks(_cached_offset_dblks, QLS_SBLK_SIZE_DBLKS) * QLS_SBLK_SIZE_DBLKS;
+ while (_cached_offset_dblks < wdblks)
+ {
+//std::cout << "^0x" << std::hex << _cached_offset_dblks << "<0x" << wdblks << std::dec << std::flush;
+ void* wptr = (void*)((char*)_page_ptr_arr[_pg_index] + _pg_offset_dblks * QLS_DBLK_SIZE_BYTES);
+ std::memcpy(wptr, (const void*)&xmagic, sizeof(xmagic));
+#ifdef QLS_CLEAN
+ std::memset((char*)wptr + sizeof(xmagic), QLS_CLEAN_CHAR, QLS_DBLK_SIZE_BYTES - sizeof(xmagic));
+#endif
+ _pg_offset_dblks++;
+ _cached_offset_dblks++;
+ }
+}
+
+void
+wmgr::rotate_page()
+{
+//std::cout << "^^^^^ wmgr::rotate_page() " << status_str() << " pi=" << _pg_index; // DEBUG
+ if (_pg_offset_dblks >= _cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS)
+ {
+ _pg_offset_dblks = 0;
+ _pg_cntr++;
+ }
+ if (++_pg_index >= _cache_num_pages)
+ _pg_index = 0;
+//std::cout << "->" << _pg_index << std::endl; // DEBUG
+}
+
+void
+wmgr::clean() {
+ // Clean up allocated memory here
+}
+
+const std::string
+wmgr::status_str() const
+{
+ std::ostringstream oss;
+ oss << "wmgr: pi=" << _pg_index << " pc=" << _pg_cntr;
+ oss << " po=" << _pg_offset_dblks << " aer=" << _aio_evt_rem;
+ oss << " edac=" << (_enq_busy?"T":"F") << (_deq_busy?"T":"F");
+ oss << (_abort_busy?"T":"F") << (_commit_busy?"T":"F");
+ oss << " ps=[";
+ for (int i=0; i<_cache_num_pages; i++)
+ {
+ switch (_page_cb_arr[i]._state)
+ {
+ case UNUSED: oss << "-"; break;
+ case IN_USE: oss << "U"; break;
+ case AIO_PENDING: oss << "A"; break;
+ default: oss << _page_cb_arr[i]._state;
+ }
+ }
+ oss << "] ";
+ return oss.str();
+}
+
+// static
+
+const char* wmgr::_op_str[] = {"enqueue", "dequeue", "abort", "commit"};
+
+}}}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/wmgr.h b/qpid/cpp/src/qpid/linearstore/journal/wmgr.h
new file mode 100644
index 0000000000..99da20bab9
--- /dev/null
+++ b/qpid/cpp/src/qpid/linearstore/journal/wmgr.h
@@ -0,0 +1,156 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#ifndef QPID_LINEARSTORE_JOURNAL_WMGR_H
+#define QPID_LINEARSTORE_JOURNAL_WMGR_H
+
+#include <deque>
+#include <map>
+#include "qpid/linearstore/journal/enums.h"
+#include "qpid/linearstore/journal/pmgr.h"
+#include <vector>
+
+namespace qpid {
+namespace linearstore {
+namespace journal {
+
+class LinearFileController;
+
+/**
+* \brief Class for managing a write page cache of arbitrary size and number of pages.
+*
+* The write page cache works on the principle of caching the write data within a page until
+* that page is either full or flushed; this initiates a single AIO write operation to store
+* the data on disk.
+*
+* The maximum disk throughput is achieved by keeping the write operations of uniform size.
+* Waiting for a page cache to fill achieves this; and in high data volume/throughput situations
+* achieves the optimal disk throughput. Calling flush() forces a write of the current page cache
+* no matter how full it is, and disrupts the uniformity of the write operations. This should
+* normally only be done if throughput drops and there is a danger of a page of unwritten data
+* waiting around for excessive time.
+*
+* The usual tradeoff between data storage latency and throughput performance applies.
+*/
+class wmgr : public pmgr
+{
+private:
+ typedef std::vector<uint64_t> fidl_t;
+ typedef fidl_t::iterator fidl_itr_t;
+ typedef std::map<std::string, fidl_t> pending_txn_map_t;
+ typedef pending_txn_map_t::iterator pending_txn_map_itr_t;
+
+ LinearFileController& _lfc; ///< Linear File Controller ref
+ uint32_t _max_dtokpp; ///< Max data writes per page
+ uint32_t _max_io_wait_us; ///< Max wait in microseconds till submit
+ uint32_t _cached_offset_dblks; ///< Amount of unwritten data in page (dblocks)
+
+ // TODO: Convert _enq_busy etc into a proper threadsafe lock
+ // TODO: Convert to enum? Are these encodes mutually exclusive?
+ bool _enq_busy; ///< Flag true if enqueue is in progress
+ bool _deq_busy; ///< Flag true if dequeue is in progress
+ bool _abort_busy; ///< Flag true if abort is in progress
+ bool _commit_busy; ///< Flag true if commit is in progress
+
+ enum _op_type { WMGR_ENQUEUE = 0, WMGR_DEQUEUE, WMGR_ABORT, WMGR_COMMIT };
+ static const char* _op_str[];
+
+ enq_rec _enq_rec; ///< Enqueue record used for encoding/decoding
+ deq_rec _deq_rec; ///< Dequeue record used for encoding/decoding
+ txn_rec _txn_rec; ///< Transaction record used for encoding/decoding
+ pending_txn_map_t _txn_pending_map; ///< Set containing xids of pending commits/aborts
+
+public:
+ wmgr(jcntl* jc,
+ enq_map& emap,
+ txn_map& tmap,
+ LinearFileController& lfc);
+ wmgr(jcntl* jc,
+ enq_map& emap,
+ txn_map& tmap,
+ LinearFileController& lfc,
+ const uint32_t max_dtokpp,
+ const uint32_t max_iowait_us);
+ virtual ~wmgr();
+
+ void initialize(aio_callback* const cbp,
+ const uint32_t wcache_pgsize_sblks,
+ const uint16_t wcache_num_pages,
+ const uint32_t max_dtokpp,
+ const uint32_t max_iowait_us,
+ std::size_t end_offset);
+ iores enqueue(const void* const data_buff,
+ const std::size_t tot_data_len,
+ const std::size_t this_data_len,
+ data_tok* dtokp,
+ const void* const xid_ptr,
+ const std::size_t xid_len,
+ const bool tpc_flag,
+ const bool transient,
+ const bool external);
+ iores dequeue(data_tok* dtokp,
+ const void* const xid_ptr,
+ const std::size_t xid_len,
+ const bool tpc_flag,
+ const bool txn_coml_commit);
+ iores abort(data_tok* dtokp,
+ const void* const xid_ptr,
+ const std::size_t xid_len);
+ iores commit(data_tok* dtokp,
+ const void* const xid_ptr,
+ const std::size_t xid_len);
+ iores flush();
+ int32_t get_events(timespec* const timeout,
+ bool flush);
+ bool is_txn_synced(const std::string& xid);
+ inline bool curr_pg_blocked() const { return _page_cb_arr[_pg_index]._state != UNUSED; }
+ inline uint32_t unflushed_dblks() { return _cached_offset_dblks; }
+
+ // Debug aid
+ const std::string status_str() const;
+
+private:
+ void initialize(aio_callback* const cbp,
+ const uint32_t wcache_pgsize_sblks,
+ const uint16_t wcache_num_pages);
+ iores pre_write_check(const _op_type op,
+ const data_tok* const dtokp,
+ const std::size_t xidsize = 0,
+ const std::size_t dsize = 0,
+ const bool external = false) const;
+ void dequeue_check(const std::string& xid,
+ const uint64_t drid);
+ void file_header_check(const uint64_t rid,
+ const bool cont,
+ const uint32_t rec_dblks_rem);
+ void flush_check(iores& res,
+ bool& cont,
+ bool& done, const uint64_t rid);
+ iores write_flush();
+ void get_next_file();
+ void dblk_roundup();
+ void rotate_page();
+ void clean();
+};
+
+}}}
+
+#endif // ifndef QPID_LINEARSTORE_JOURNAL_WMGR_H