diff options
Diffstat (limited to 'qpid/cpp/src/qpid/linearstore')
88 files changed, 15570 insertions, 0 deletions
diff --git a/qpid/cpp/src/qpid/linearstore/BindingDbt.cpp b/qpid/cpp/src/qpid/linearstore/BindingDbt.cpp new file mode 100644 index 0000000000..47738cce39 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/BindingDbt.cpp @@ -0,0 +1,50 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/BindingDbt.h" + +namespace qpid { +namespace linearstore { + +BindingDbt::BindingDbt(const qpid::broker::PersistableExchange& e, const qpid::broker::PersistableQueue& q, const std::string& k, const qpid::framing::FieldTable& a) + : data(new char[encodedSize(e, q, k, a)]), + buffer(data, encodedSize(e, q, k, a)) +{ + buffer.putLongLong(q.getPersistenceId()); + buffer.putShortString(q.getName()); + buffer.putShortString(k); + buffer.put(a); + + set_data(data); + set_size(encodedSize(e, q, k, a)); +} + +BindingDbt::~BindingDbt() +{ + delete [] data; +} + +uint32_t BindingDbt::encodedSize(const qpid::broker::PersistableExchange& /*not used*/, const qpid::broker::PersistableQueue& q, const std::string& k, const qpid::framing::FieldTable& a) +{ + return 8 /*queue id*/ + q.getName().size() + 1 + k.size() + 1 + a.encodedSize(); +} + +}} diff --git a/qpid/cpp/src/qpid/linearstore/BindingDbt.h b/qpid/cpp/src/qpid/linearstore/BindingDbt.h new file mode 100644 index 0000000000..e5d61de248 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/BindingDbt.h @@ -0,0 +1,56 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_BINDINGDBT_H +#define QPID_LINEARSTORE_BINDINGDBT_H + +#include "db-inc.h" +#include "qpid/broker/PersistableExchange.h" +#include "qpid/broker/PersistableQueue.h" +#include "qpid/framing/Buffer.h" +#include "qpid/framing/FieldTable.h" + +namespace qpid{ +namespace linearstore{ + +class BindingDbt : public Dbt +{ + char* data; + qpid::framing::Buffer buffer; + + static uint32_t encodedSize(const qpid::broker::PersistableExchange& e, + const qpid::broker::PersistableQueue& q, + const std::string& k, + const qpid::framing::FieldTable& a); + +public: + BindingDbt(const qpid::broker::PersistableExchange& e, + const qpid::broker::PersistableQueue& q, + const std::string& k, + const qpid::framing::FieldTable& a); + + virtual ~BindingDbt(); + +}; + +}} + +#endif // ifndef QPID_LINEARSTORE_BINDINGDBT_H diff --git a/qpid/cpp/src/qpid/linearstore/BufferValue.cpp b/qpid/cpp/src/qpid/linearstore/BufferValue.cpp new file mode 100644 index 0000000000..5115055375 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/BufferValue.cpp @@ -0,0 +1,56 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/BufferValue.h" + +namespace qpid { +namespace linearstore { + + + +BufferValue::BufferValue(uint32_t size, uint64_t offset) + : data(new char[size]), + buffer(data, size) +{ + set_data(data); + set_size(size); + set_flags(DB_DBT_USERMEM | DB_DBT_PARTIAL); + set_doff(offset); + set_dlen(size); + set_ulen(size); +} + +BufferValue::BufferValue(const qpid::broker::Persistable& p) + : data(new char[p.encodedSize()]), + buffer(data, p.encodedSize()) +{ + p.encode(buffer); + + set_data(data); + set_size(p.encodedSize()); +} + +BufferValue::~BufferValue() +{ + delete [] data; +} + +}} diff --git a/qpid/cpp/src/qpid/linearstore/BufferValue.h b/qpid/cpp/src/qpid/linearstore/BufferValue.h new file mode 100644 index 0000000000..daeb81306a --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/BufferValue.h @@ -0,0 +1,46 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_BUFFERVALUE_H +#define QPID_LINEARSTORE_BUFFERVALUE_H + +#include "db-inc.h" +#include "qpid/broker/Persistable.h" +#include "qpid/framing/Buffer.h" + +namespace qpid{ +namespace linearstore{ + +class BufferValue : public Dbt +{ + char* data; + +public: + qpid::framing::Buffer buffer; + + BufferValue(uint32_t size, uint64_t offset); + BufferValue(const qpid::broker::Persistable& p); + virtual ~BufferValue(); +}; + +}} + +#endif // ifndef QPID_LINEARSTORE_BUFFERVALUE_H diff --git a/qpid/cpp/src/qpid/linearstore/Cursor.h b/qpid/cpp/src/qpid/linearstore/Cursor.h new file mode 100644 index 0000000000..0287803b21 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/Cursor.h @@ -0,0 +1,50 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_CURSOR_H +#define QPID_LINEARSTORE_CURSOR_H + +#include <boost/shared_ptr.hpp> +#include "db-inc.h" + +namespace qpid{ +namespace linearstore{ + +class Cursor +{ + Dbc* cursor; +public: + typedef boost::shared_ptr<Db> db_ptr; + + Cursor() : cursor(0) {} + virtual ~Cursor() { if(cursor) cursor->close(); } + + void open(db_ptr db, DbTxn* txn, uint32_t flags = 0) { db->cursor(txn, &cursor, flags); } + void close() { if(cursor) cursor->close(); cursor = 0; } + Dbc* get() { return cursor; } + Dbc* operator->() { return cursor; } + bool next(Dbt& key, Dbt& value) { return cursor->get(&key, &value, DB_NEXT) == 0; } + bool current(Dbt& key, Dbt& value) { return cursor->get(&key, &value, DB_CURRENT) == 0; } +}; + +}} + +#endif // ifndef QPID_LINEARSTORE_CURSOR_H diff --git a/qpid/cpp/src/qpid/linearstore/DataTokenImpl.cpp b/qpid/cpp/src/qpid/linearstore/DataTokenImpl.cpp new file mode 100644 index 0000000000..0b1f3d7941 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/DataTokenImpl.cpp @@ -0,0 +1,28 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/DataTokenImpl.h" + +using namespace qpid::linearstore; + +DataTokenImpl::DataTokenImpl():data_tok() {} + +DataTokenImpl::~DataTokenImpl() {} diff --git a/qpid/cpp/src/qpid/linearstore/DataTokenImpl.h b/qpid/cpp/src/qpid/linearstore/DataTokenImpl.h new file mode 100644 index 0000000000..7152cef6a0 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/DataTokenImpl.h @@ -0,0 +1,47 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_DATATOKENIMPL_H +#define QPID_LINEARSTORE_DATATOKENIMPL_H + +#include "qpid/linearstore/journal/data_tok.h" +#include "qpid/broker/PersistableMessage.h" +#include <boost/intrusive_ptr.hpp> + +namespace qpid{ +namespace linearstore{ + +class DataTokenImpl : public qpid::linearstore::journal::data_tok, public qpid::RefCounted +{ + private: + boost::intrusive_ptr<qpid::broker::PersistableMessage> sourceMsg; + public: + DataTokenImpl(); + virtual ~DataTokenImpl(); + + inline boost::intrusive_ptr<qpid::broker::PersistableMessage>& getSourceMessage() { return sourceMsg; } + inline void setSourceMessage(const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg) { sourceMsg = msg; } +}; + +} // namespace msgstore +} // namespace mrg + +#endif // ifndef QPID_LINEARSTORE_DATATOKENIMPL_H diff --git a/qpid/cpp/src/qpid/linearstore/ISSUES b/qpid/cpp/src/qpid/linearstore/ISSUES new file mode 100644 index 0000000000..4023ba9629 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/ISSUES @@ -0,0 +1,224 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +Linear Store issues: + +Current/pending: +================ + Q-JIRA RHBZ Description / Comments + ------ ------- ---------------------- + 5359 - Linearstore: Implement new management schema and wire into store + 5360 - Linearstore: Evaluate and rework logging to produce a consistent log output +* 5361 1145359 Linearstore: No tests for linearstore functionality currently exist + svn r.1564893 2014-02-05: Added tx-test-soak.sh + svn r.1564935 2014-02-05: Added license text to tx-test-soak.sh + svn r.1625283 2014-09-16: Basic python tests from legacystore ported over to linearstore + * No existing tests for linearstore: + ** Basic broker-level tests for txn and non-txn recovery + ** Store-level tests which check write boundary conditions + ** EFP tests, including file recovery, error management + ** Unit tests + ** Basic performance tests + 5464 - [linearstore] Incompletely created journal files accumulate in EFP + - 1088944 [Linearstore] store does not return all files to EFP after purging big queue <queue purge issue> +* - 1066256 [LinearStore] changing efp size after using store broke the new durable nodes creation + - 1067480 [LinearStore] Provide a way to limit max count/size of empty files in EFP + - 1067429 [LinearStore] last file from deleted queue is not moved to EFP <queue delete issue> + - 1067482 [LinearStore] Provide a way to preallocate empty pages in EFP +* 6303 1180660 [linearstore] Roll back auto-upgrade of store directory structure +* 5362 1145363 Linearstore: No store tools exist for examining the journals + svn r.1556888 2014-01-09: WIP checkin for linearstore version of qpid_qls_analyze. Needs testing and tidy-up. + svn r.1560530 2014-01-22: Bugfixes for qpid_qls_analyze + svn r.1561848 2014-01-27: Bugfixes and enhancements for qpid_qls_analyze + svn r.1564808 2014-02-05: Bugfixes and enhancements for qpid_qls_analyze + svn r.1578899 2014-03-18: Bugfixes and enhancements for qpid_qls_analyze + svn r.1583778 2014-04-01: Bugfix for qpid_qls_analyze + * Store analysis and status + * Recovery/reading of message content + * Empty file pool status and management + + + + +Fixed/closed: +============= + Q-JIRA RHBZ Description / Comments + ------ ------- ---------------------- + 5357 1052518 Linearstore: Empty file recycling not functional + svn r.1545563 2013-11-26: Propsed fix. VERIFIED + 5358 1052727 Linearstore: Checksums not implemented in record tail + svn r.1547601 2013-12-03: Propsed fix. NEEDINFO on algorithm + 5387 1036071 Linearstore: Segmentation fault when deleting queue + svn r.1547641 2013-12-03: Propsed fix. VERIFIED + 5388 1035802 Linearstore: Segmentation fault when recovering empty queue + svn r.1547921 2013-12-04: Propsed fix. VERIFIED +NO-JIRA - Added missing Apache copyright/license text + svn r.1551304 2013-12-16: Propsed fix + 5425 1052445 Linearstore: Transaction Prepared List (TPL) fails with jexception 0x0402 AtomicCounter::addLimit() threw JERR_JNLF_FILEOFFSOVFL + svn r.1551361 2013-12-16: Proposed fix VERIFIED + 5442 1039949 Linearstore: Dtx recover test fails + svn r.1552772 2013-12-20: Proposed fix VERIFIED + 5444 1052775 Linearstore: Recovering from qpid-txtest fails with "Inconsistent TPL 2PC count" error message + svn r.1553148 2013-12-23: Proposed fix NEEDIFNO on reproduction and testing + - 1038599 [LinearStore] Abort when deleting used queue after restart + CLOSED-NOTABUG 2014-01-06 + 5460 1051097 [linearstore] Recovery of store which contains prepared but incomplete transactions results in message loss + svn r.1556892 2014-01-09: Proposed fix VERIFIED + 5473 1051924 [linearstore] Recovery of journal in which last logical file contains truncated record causes crash + svn r.1557620 2014-01-12: Proposed fix MODIFIED + 5483 - [linearstore] Recovery of journal with partly written record fails with "JERR_JREC_BADRECTAIL: Invalid data record tail" error message + svn r.1558589 2014-01-15: Proposed fix + * May be linked to RHBZ 1039522 - VERIFIED + * May be linked to RHBZ 1039525 - VERIFIED + - 1039522 Qpid crashes while recovering from linear store around apid::linearstore::journal::JournalFile::getFqFileName() including enq_rec::decode() threw JERR_JREC_BAD_RECTAIL + * Possible dup of 1039525 + * May be fixed by QPID-5483 - waiting for needinfo, recommend rebuilding with QPID-5483 fix and re-testing. VERIFIED. + - 1039525 Qpid crashes while recovering from linear store around apid::linearstore::journal::jexception::format including enq_rec::decode() threw JERR_JREC_BAD_REC_TAIL + * Possible dup of 1039522 + * May be fixed by QPID-5483 - waiting for needinfo, recommend rebuilding with QPID-5483 fix and re-testing. VERIFIED. + 5487 1054448 [linearstore] Replace use of /dev/urandom with c random generator calls + svn r.1558913 2014-01-16: Proposed fix VEFIFIED + 5484 1035843 Slow performance for producers + svn r.1558592 2014-01-15 fixes an issue with using /dev/random as a source of random numbers for Journal serial numbers. + svn r.1558913 2014-01-16 replaces use of /dev/urandom with several calls to rand() to construct a 64-bit random number. + * Recommend rebuilding and testing for performance again with these two fixes. VERIFIED. + 5479 1053701 [linearstore] Using recovered store results in "JERR_JNLF_FILEOFFSOVFL: Attempted to increase submitted offset past file size. (JournalFile::submittedDblkCount)" error message + * Probability: 2 of 600 (0.3%) using tx-test-soak.sh + * Fixed by checkin for QPID-5480, no longer able to reproduce. VERIFIED + 5480 1053749 [linearstore] Recovery of store failure with "JERR_MAP_NOTFOUND: Key not found in map." error message + svn r.1564877 2014-02-05: Proposed fix + * Probability: 6 of 600 (1.0%) using tx-test-soak.sh + * If broker is started a second time after failure, it starts correctly and test completes ok. + * Problem: File is being recycled to EFP with still-locked enqueues in it (ie dequeued transactionally). + * Problem: Record alignment check writes filler records to wrong file when decoding bad record moves across a file boundary + 5603 1063700 [linearstore] broker restart fails under stress test + svn r.1574513 2014-03-05: Proposed fix. POST + * jexception 0x0701 RecoveryManager::readNextRemainingRecord() threw JERR_JREC_BADRECTAIL + 5607 1064181 [linearstore] Qpidd closes transactional client session&connection with async_dequeue() failed + svn r.1575009 2014-03-06 Proposed fix. POST + * jexception 0x010b LinearFileController::getCurrentSerial() threw JERR_NULL + - 1064230 [linearstore] Qpidd linearstore recovery sometimes fail to recover messages with recoverMessages() failed + * jexception 0x0701 RecoveryManager::readNextRemainingRecord() threw JERR_JREC_BADRECTAIL + * possible dup of 1063700 + - 1036026 [LinearStore] Qpid linear store unable to create durable queue - framing-error: Queue <q-name>: create() failed: jexception 0x0000 + * UNABLE TO REPRODUCE - but Frantizek has additional info + * Retested after checkin 1575009, problem solved. VERIFIED + 5651 - [C++ broker] segfault in qpid::linearstore::journal::jdir::clear_dir when declaring durable queue + svn r.1582730 2014-03-28 Proposed fix by Pavel Moravec + * Bug introduced by r.1578899. + 5661 - [linearstore] Set default cmake build to exclude linearstore + svn r.1584379 2014-04-03 Proposed solution. + * Run ccmake, select BUILD_LINEARSTORE to change its value to ON to build. + 5750 1078142 [linearstore] qpidd closes connection with (distributed) transactional client while checking previous transaction, broker signals error (closed by error: Queue Ve0-2: async_dequeue() failed: exception 0x0103 wmgr::get_events() threw JERR__AIO: AIO error) + svn r.1594215 2014-05-13 Proposed solution. + * jexception 0x0103 wmgr::get_events() threw JERR__AIO: AIO error. (AIO write operation failed: Invalid argument (-22) [pg=0 size=8192 offset=4096 fh=22]) + 5655 1078937 [linearstore] Installation and tests for new store analysis tool qpid-qls-analyze + svn r.1596633 2014-05-21: Modified to run from installed location + 5767 1098118 [linearstore] broker segfaults when recovering journal file with damaged header + svn r.1596509 2014-05-21 Proposed solution (committed by pmoravec) + svn r.1599243 2014-06-02 Solution to additional case of file header corruption + 5924 1124906 [linearstore] Qpidd Will Not Start with Large Number of Queues + svn r.1614665 2014-07-30 Proposed solution + 5948 1121660 [AMQP 1.0] Broker restart failure with durable topic using non-durable exchange + svn r.1616287 2014-08-06 Proposed solution checked in by gsim + This turned out to be an AMQP error, fix does not affect store code. + 6043 1089652 [RFE]: Configuration option for linear store to delete or overwrite the used journal files. + svn r.1620426 2014-08-25 Proposed solution + 6147 1152012 [C++ broker linearstore] missing journal id in "trace Mgmt create journal." log + svn r.1631360 2014-10-13 Proposed solution + 6157 1150397 linearstore: segfault when 2 journals request new journal file from empty EFP + svn r.1632504 2014-10-17 Proposed solution by pmoravec + 6230 1165200 [linearstore] qpid-qls-analyze fails when analyzing empty journal + svn r.1643053 2014-11-18: Proposed fix + 6248 1167911 [linearstore] Symlink creation fails if store dir path is not absolute + svn r.1641689 2014-11-25 Proposed solution + 5671 1160367 [linearstore] Add ability to use disk partitions and select per-queue EFPs + svn r.1636598 2014-11-04: WIP: New EFP and journal dir structure using symlinks + svn r.1637985 2014-11-10: WIP: Auto-upgrade from old dir structure to new + svn r.1649081 2015-01-02: WIP: Specify new queue using qpid-config --durable together with --efp-partition-num and/or --efp-pool-file-size. Needs testing. + - 1148807 [linearstore] Restarting broker with empty journal raises confusing warning + Fixed by svn r.1649081 of bug 5671 / 1160367 above + + +Ordered checkin list: +===================== +In order to port the linearstore changes from trunk to a branch, the following svn checkins need to be ported in order: + +no. svn r Q-JIRA RHBZ Date Alt Committer +--- ------- ------- -------- ---------- ------------- + 1. 1545563 5357 1052518 2013-11-26 0.22-mrg + 2. 1547601 5358 1052727 2013-12-03 0.22-mrg + 3. 1547641 5387 1036071 2013-12-03 0.22-mrg + 4. 1547921 5388 1035802 2013-12-04 0.22-mrg + 5. 1551304 NO-JIRA - 2013-12-16 0.22-mrg (aconway) + 6. 1551361 5425 1052445 2013-12-16 0.22-mrg + 7. 1552772 5442 1039949 2013-12-20 0.22-mrg + 8. 1553148 5444 1052775 2013-12-23 0.22-mrg + 9. 1556888 5362 - 2014-01-09 +10. 1556892 5460 1051097 2014-01-09 0.22-mrg +11. 1557620 5473 1051924 2014-01-12 0.22-mrg +12. 1558589 5483 - 2014-01-15 0.22-mrg +13. 1558592 5484 1035843 2014-01-15 0.22-mrg +14. 1558913 5487 1054448 2014-01-16 0.22-mrg +15. 1560530 5362 - 2014-01-22 +16. 1561848 5362 - 2014-01-27 +17. 1564808 5362 - 2014-02-05 +18. 1564877 5480 1053749 2014-02-05 0.22-mrg +19. 1564893 5361 - 2014-02-05 +20. 1564935 5361 - 2014-02-05 +21. 1574513 5603 1063700 2014-03-05 0.22-mrg +22. 1575009 5607 1064181 2014-03-06 0.22-mrg +23. 1578899 5362 - 2014-03-18 parts in 0.22-mrg +24. 1582730 5651 - 2014-03-28 0.22-mrg (pmoravec) +25. 1583778 5362 - 2014-04-01 +26. 1584379 5661 - 2014-04-03 +27. 1594215 5750 1078142 2014-05-13 0.22-mrg +28. 1596509 5767 1098118 2014-05-21 0.22-mrg (pmoravec) +29. 1596633 NO-JIRA 1078937 2014-05-21 (includes tools install update) +30. 1599243 5767 1098118 2014-06-02 0.22-mrg +31. 1599243 5767 1098118 2014-06-02 +32. 1614665 5924 1124906 2014-07-30 +33. 1620426 6043 1089652 2014-08-25 +34. 1631360 6147 1152012 2014-10-13 (pmoravec) +35. 1632504 6157 1150397 2014-10-17 (pmoravec) +36. 1636598 5671 1160367 2014-11-04 +37. 1637985 5671 1160367 2014-11-10 +38. 1643053 6230 1165200 2014-11-18 +39. 1641689 6248 1167911 2014-11-25 +40. 1649081 5671 1160367 2015-01-02 +41. 1649082 NO-JIRA - 2015-01-02 + +See above sections for details on these checkins. + +Future work: +============ +* One journal file lost when queue deleted. All files except for one are recycled back to the EFP. +* Complete exceptions - several exceptions thrown using jexception have no exception numbers +* Investigate ability of store to detect missing journal files, especially from logical end of a journal +* Investigate ability of store to handle file muddle-ups (ie journal files from EFP which are not zeroed or other journals) +* Look at improving the efficiency of recovery - right now the entire store is read once, and then each recovered record xid and data is read again + +Code tidy-up +------------ +* Remove old comments +* Use c++ cast templates instead of (xxx)y +* Member names: xxx_ +* Rename classes, functions and variables to camel-case +* Add Doxygen docs to classes +* Make fid's consistent in name (fid, file_id, pfid) and format (hex vs decimal) diff --git a/qpid/cpp/src/qpid/linearstore/IdDbt.cpp b/qpid/cpp/src/qpid/linearstore/IdDbt.cpp new file mode 100644 index 0000000000..d427085bbe --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/IdDbt.cpp @@ -0,0 +1,42 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/IdDbt.h" + +using namespace qpid::linearstore; + +IdDbt::IdDbt() : id(0) +{ + init(); +} + +IdDbt::IdDbt(uint64_t _id) : id(_id) +{ + init(); +} + +void IdDbt::init() +{ + set_data(&id); + set_size(sizeof(uint64_t)); + set_ulen(sizeof(uint64_t)); + set_flags(DB_DBT_USERMEM); +} diff --git a/qpid/cpp/src/qpid/linearstore/IdDbt.h b/qpid/cpp/src/qpid/linearstore/IdDbt.h new file mode 100644 index 0000000000..c7264491ab --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/IdDbt.h @@ -0,0 +1,42 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_IDDBT_H +#define QPID_LINEARSTORE_IDDBT_H + +#include "db-inc.h" + +namespace qpid{ +namespace linearstore{ + +class IdDbt : public Dbt +{ + void init(); +public: + uint64_t id; + + IdDbt(uint64_t id); + IdDbt(); +}; + +}} + +#endif // ifndef QPID_LINEARSTORE_IDDBT_H diff --git a/qpid/cpp/src/qpid/linearstore/IdSequence.cpp b/qpid/cpp/src/qpid/linearstore/IdSequence.cpp new file mode 100644 index 0000000000..4d3172ffe9 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/IdSequence.cpp @@ -0,0 +1,40 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/IdSequence.h" + +using namespace qpid::linearstore; +using qpid::sys::Mutex; + +IdSequence::IdSequence() : id(1) {} + +uint64_t IdSequence::next() +{ + Mutex::ScopedLock guard(lock); + if (!id) id++; // avoid 0 when folding around + return id++; +} + +void IdSequence::reset(uint64_t value) +{ + //deliberately not threadsafe, used only on recovery + id = value; +} diff --git a/qpid/cpp/src/qpid/linearstore/IdSequence.h b/qpid/cpp/src/qpid/linearstore/IdSequence.h new file mode 100644 index 0000000000..17996eec52 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/IdSequence.h @@ -0,0 +1,43 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_IDSEQUENCE_H +#define QPID_LINEARSTORE_IDSEQUENCE_H + +#include "qpid/framing/amqp_types.h" +#include "qpid/sys/Mutex.h" + +namespace qpid{ +namespace linearstore{ + +class IdSequence +{ + qpid::sys::Mutex lock; + uint64_t id; +public: + IdSequence(); + uint64_t next(); + void reset(uint64_t value); +}; + +}} + +#endif // ifndef QPID_LINEARSTORE_IDSEQUENCE_H diff --git a/qpid/cpp/src/qpid/linearstore/JournalImpl.cpp b/qpid/cpp/src/qpid/linearstore/JournalImpl.cpp new file mode 100644 index 0000000000..b2d41275a0 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/JournalImpl.cpp @@ -0,0 +1,516 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/JournalImpl.h" + +#include "qpid/linearstore/DataTokenImpl.h" +#include "qpid/linearstore/JournalLogImpl.h" +#include "qpid/linearstore/journal/jexception.h" +#include "qpid/linearstore/StoreException.h" +#include "qpid/management/ManagementAgent.h" + +namespace qpid { +namespace linearstore { + +InactivityFireEvent::InactivityFireEvent(JournalImpl* p, + const ::qpid::sys::Duration timeout): + ::qpid::sys::TimerTask(timeout, "JournalInactive:"+p->id()), _parent(p) {} + +void InactivityFireEvent::fire() { + ::qpid::sys::Mutex::ScopedLock sl(_ife_lock); + if (_parent) { + _parent->flushFire(); + } +} + +GetEventsFireEvent::GetEventsFireEvent(JournalImpl* p, + const ::qpid::sys::Duration timeout): + ::qpid::sys::TimerTask(timeout, "JournalGetEvents:"+p->id()), _parent(p) +{} + +void GetEventsFireEvent::fire() { + ::qpid::sys::Mutex::ScopedLock sl(_gefe_lock); + if (_parent) { + _parent->getEventsFire(); + } +} + +JournalImpl::JournalImpl(::qpid::sys::Timer& timer_, + const std::string& journalId, + const std::string& journalDirectory, + JournalLogImpl& journalLogRef, + const ::qpid::sys::Duration getEventsTimeout, + const ::qpid::sys::Duration flushTimeout, + ::qpid::management::ManagementAgent* a, + DeleteCallback onDelete): + jcntl(journalId, journalDirectory, journalLogRef), + timer(timer_), + _journalLogRef(journalLogRef), + getEventsTimerSetFlag(false), + writeActivityFlag(false), + flushTriggeredFlag(true), + deleteCallback(onDelete) +{ + getEventsFireEventsPtr = new GetEventsFireEvent(this, getEventsTimeout); + inactivityFireEventPtr = new InactivityFireEvent(this, flushTimeout); + { + timer.start(); + timer.add(inactivityFireEventPtr); + } + + initManagement(a); + + QLS_LOG2(info, _jid, "Created"); + std::ostringstream oss; + oss << "Journal directory = \"" << journalDirectory << "\""; + QLS_LOG2(debug, _jid, oss.str()); +} + +JournalImpl::~JournalImpl() +{ + if (deleteCallback) deleteCallback(*this); + if (_init_flag && !_stop_flag){ + try { stop(true); } // NOTE: This will *block* until all outstanding disk aio calls are complete! + catch (const ::qpid::linearstore::journal::jexception& e) { QLS_LOG2(error, _jid, e.what()); } + } + getEventsFireEventsPtr->cancel(); + inactivityFireEventPtr->cancel(); + + if (_mgmtObject.get() != 0) { + _mgmtObject->resourceDestroy(); + _mgmtObject.reset(); + } + + QLS_LOG2(info, _jid, "Destroyed"); +} + +void +JournalImpl::initManagement(::qpid::management::ManagementAgent* a) +{ + _agent = a; + if (_agent != 0) + { + _mgmtObject = ::qmf::org::apache::qpid::linearstore::Journal::shared_ptr ( + new ::qmf::org::apache::qpid::linearstore::Journal(_agent, this, _jid)); + + _mgmtObject->set_directory(_jdir.dirname()); +// _mgmtObject->set_baseFileName(_base_filename); +// _mgmtObject->set_readPageSize(JRNL_RMGR_PAGE_SIZE * JRNL_SBLK_SIZE); +// _mgmtObject->set_readPages(JRNL_RMGR_PAGES); + + // The following will be set on initialize(), but being properties, these must be set to 0 in the meantime + //_mgmtObject->set_initialFileCount(0); + //_mgmtObject->set_dataFileSize(0); + //_mgmtObject->set_currentFileCount(0); + _mgmtObject->set_writePageSize(0); + _mgmtObject->set_writePages(0); + + _agent->addObject(_mgmtObject, 0, true); + } +} + + +void +JournalImpl::initialize(::qpid::linearstore::journal::EmptyFilePool* efpp_, + const uint16_t wcache_num_pages, + const uint32_t wcache_pgsize_sblks, + ::qpid::linearstore::journal::aio_callback* const cbp) +{ +// efpp->createJournal(_jdir); +// QLS_LOG2(info, _jid, "Initialized"); +// std::ostringstream oss; +//// oss << "Initialize; num_jfiles=" << num_jfiles << " jfsize_sblks=" << jfsize_sblks; +// oss << "Initialize; efpPartitionNumber=" << efpp_->getPartitionNumber(); +// oss << " efpFileSizeKb=" << efpp_->fileSizeKib(); +// oss << " wcache_pgsize_sblks=" << wcache_pgsize_sblks; +// oss << " wcache_num_pages=" << wcache_num_pages; +// QLS_LOG2(debug, _jid, oss.str()); + jcntl::initialize(efpp_, wcache_num_pages, wcache_pgsize_sblks, cbp); +// QLS_LOG2(debug, _jid, "Initialization complete"); + // TODO: replace for linearstore: _lpmgr +/* + if (_mgmtObject.get() != 0) + { + _mgmtObject->set_initialFileCount(_lpmgr.num_jfiles()); + _mgmtObject->set_autoExpand(_lpmgr.is_ae()); + _mgmtObject->set_currentFileCount(_lpmgr.num_jfiles()); + _mgmtObject->set_maxFileCount(_lpmgr.ae_max_jfiles()); + _mgmtObject->set_dataFileSize(_jfsize_sblks * JRNL_SBLK_SIZE); + _mgmtObject->set_writePageSize(wcache_pgsize_sblks * JRNL_SBLK_SIZE); + _mgmtObject->set_writePages(wcache_num_pages); + } + if (_agent != 0) + _agent->raiseEvent::(qmf::org::apache::qpid::linearstore::EventCreated(_jid, _jfsize_sblks * JRNL_SBLK_SIZE, _lpmgr.num_jfiles()), + qpid::management::ManagementAgent::SEV_NOTE); +*/ +} + +void +JournalImpl::recover(boost::shared_ptr< ::qpid::linearstore::journal::EmptyFilePoolManager> efpm, + const uint16_t wcache_num_pages, + const uint32_t wcache_pgsize_sblks, + ::qpid::linearstore::journal::aio_callback* const cbp, + boost::ptr_list<PreparedTransaction>* prep_tx_list_ptr, + uint64_t& highest_rid, + uint64_t queue_id) +{ + std::ostringstream oss1; + oss1 << "Recover;"; + oss1 << " queue_id = 0x" << std::hex << queue_id << std::dec; + oss1 << " wcache_pgsize_sblks=" << wcache_pgsize_sblks; + oss1 << " wcache_num_pages=" << wcache_num_pages; + QLS_LOG2(debug, _jid, oss1.str()); + // TODO: replace for linearstore: _lpmgr +/* + if (_mgmtObject.get() != 0) + { + _mgmtObject->set_initialFileCount(_lpmgr.num_jfiles()); + _mgmtObject->set_autoExpand(_lpmgr.is_ae()); + _mgmtObject->set_currentFileCount(_lpmgr.num_jfiles()); + _mgmtObject->set_maxFileCount(_lpmgr.ae_max_jfiles()); + _mgmtObject->set_dataFileSize(_jfsize_sblks * JRNL_SBLK_SIZE); + _mgmtObject->set_writePageSize(wcache_pgsize_sblks * JRNL_SBLK_SIZE); + _mgmtObject->set_writePages(wcache_num_pages); + } +*/ + + // TODO: This is ugly, find a way for RecoveryManager to use boost::ptr_list<PreparedTransaction>* directly + if (prep_tx_list_ptr) { + // Create list of prepared xids + std::vector<std::string> prep_xid_list; + for (PreparedTransaction::list::iterator i = prep_tx_list_ptr->begin(); i != prep_tx_list_ptr->end(); i++) { + prep_xid_list.push_back(i->xid); + } + + jcntl::recover(efpm.get(), wcache_num_pages, wcache_pgsize_sblks, cbp, &prep_xid_list, highest_rid); + } else { + jcntl::recover(efpm.get(), wcache_num_pages, wcache_pgsize_sblks, cbp, 0, highest_rid); + } + + // Populate PreparedTransaction lists from _tmap + if (prep_tx_list_ptr) + { + for (PreparedTransaction::list::iterator i = prep_tx_list_ptr->begin(); i != prep_tx_list_ptr->end(); i++) { + ::qpid::linearstore::journal::txn_data_list_t tdl = _tmap.get_tdata_list(i->xid); // tdl will be empty if xid not found + for (::qpid::linearstore::journal::tdl_itr_t tdl_itr = tdl.begin(); tdl_itr < tdl.end(); tdl_itr++) { + if (tdl_itr->enq_flag_) { // enqueue op + i->enqueues->add(queue_id, tdl_itr->rid_); + } else { // dequeue op + i->dequeues->add(queue_id, tdl_itr->drid_); + } + } + } + } + std::ostringstream oss2; + oss2 << "Recover phase 1 complete; highest rid found = 0x" << std::hex << highest_rid; + oss2 << std::dec << "; emap.size=" << _emap.size() << "; tmap.size=" << _tmap.size(); + oss2 << "; journal now read-only."; + QLS_LOG2(debug, _jid, oss2.str()); + + if (_mgmtObject.get() != 0) + { + _mgmtObject->inc_recordDepth(_emap.size()); + _mgmtObject->inc_enqueues(_emap.size()); + _mgmtObject->inc_txn(_tmap.size()); + _mgmtObject->inc_txnEnqueues(_tmap.enq_cnt()); + _mgmtObject->inc_txnDequeues(_tmap.deq_cnt()); + } +} + +void +JournalImpl::recover_complete() +{ + jcntl::recover_complete(); + QLS_LOG2(debug, _jid, "Recover phase 2 complete; journal now writable."); + // TODO: replace for linearstore: _lpmgr +/* + if (_agent != 0) + _agent->raiseEvent(qmf::org::apache::qpid::linearstore::EventRecovered(_jid, _jfsize_sblks * JRNL_SBLK_SIZE, _lpmgr.num_jfiles(), + _emap.size(), _tmap.size(), _tmap.enq_cnt(), _tmap.deq_cnt()), qpid::management::ManagementAgent::SEV_NOTE); +*/ +} + + +void +JournalImpl::enqueue_data_record(const void* const data_buff, + const size_t tot_data_len, + const size_t this_data_len, + ::qpid::linearstore::journal::data_tok* dtokp, + const bool transient) +{ + handleIoResult(jcntl::enqueue_data_record(data_buff, tot_data_len, this_data_len, dtokp, transient)); + + if (_mgmtObject.get() != 0) + { + _mgmtObject->inc_enqueues(); + _mgmtObject->inc_recordDepth(); + } +} + +void +JournalImpl::enqueue_extern_data_record(const size_t tot_data_len, + ::qpid::linearstore::journal::data_tok* dtokp, + const bool transient) +{ + handleIoResult(jcntl::enqueue_extern_data_record(tot_data_len, dtokp, transient)); + + if (_mgmtObject.get() != 0) + { + _mgmtObject->inc_enqueues(); + _mgmtObject->inc_recordDepth(); + } +} + +void +JournalImpl::enqueue_txn_data_record(const void* const data_buff, + const size_t tot_data_len, + const size_t this_data_len, + ::qpid::linearstore::journal::data_tok* dtokp, + const std::string& xid, + const bool tpc_flag, + const bool transient) +{ + bool txn_incr = _mgmtObject.get() != 0 ? _tmap.in_map(xid) : false; + + handleIoResult(jcntl::enqueue_txn_data_record(data_buff, tot_data_len, this_data_len, dtokp, xid, tpc_flag, transient)); + + if (_mgmtObject.get() != 0) + { + if (!txn_incr) // If this xid was not in _tmap, it will be now... + _mgmtObject->inc_txn(); + _mgmtObject->inc_enqueues(); + _mgmtObject->inc_txnEnqueues(); + _mgmtObject->inc_recordDepth(); + } +} + +void +JournalImpl::enqueue_extern_txn_data_record(const size_t tot_data_len, + ::qpid::linearstore::journal::data_tok* dtokp, + const std::string& xid, + const bool tpc_flag, + const bool transient) +{ + bool txn_incr = _mgmtObject.get() != 0 ? _tmap.in_map(xid) : false; + + handleIoResult(jcntl::enqueue_extern_txn_data_record(tot_data_len, dtokp, xid, tpc_flag, transient)); + + if (_mgmtObject.get() != 0) + { + if (!txn_incr) // If this xid was not in _tmap, it will be now... + _mgmtObject->inc_txn(); + _mgmtObject->inc_enqueues(); + _mgmtObject->inc_txnEnqueues(); + _mgmtObject->inc_recordDepth(); + } +} + +void +JournalImpl::dequeue_data_record(::qpid::linearstore::journal::data_tok* const dtokp, + const bool txn_coml_commit) +{ + handleIoResult(jcntl::dequeue_data_record(dtokp, txn_coml_commit)); + + if (_mgmtObject.get() != 0) + { + _mgmtObject->inc_dequeues(); + _mgmtObject->inc_txnDequeues(); + _mgmtObject->dec_recordDepth(); + } +} + +void +JournalImpl::dequeue_txn_data_record(::qpid::linearstore::journal::data_tok* const dtokp, + const std::string& xid, + const bool tpc_flag, + const bool txn_coml_commit) +{ + bool txn_incr = _mgmtObject.get() != 0 ? _tmap.in_map(xid) : false; + + handleIoResult(jcntl::dequeue_txn_data_record(dtokp, xid, tpc_flag, txn_coml_commit)); + + if (_mgmtObject.get() != 0) + { + if (!txn_incr) // If this xid was not in _tmap, it will be now... + _mgmtObject->inc_txn(); + _mgmtObject->inc_dequeues(); + _mgmtObject->inc_txnDequeues(); + _mgmtObject->dec_recordDepth(); + } +} + +void +JournalImpl::txn_abort(::qpid::linearstore::journal::data_tok* const dtokp, + const std::string& xid) +{ + handleIoResult(jcntl::txn_abort(dtokp, xid)); + + if (_mgmtObject.get() != 0) + { + _mgmtObject->dec_txn(); + _mgmtObject->inc_txnAborts(); + } +} + +void +JournalImpl::txn_commit(::qpid::linearstore::journal::data_tok* const dtokp, + const std::string& xid) +{ + handleIoResult(jcntl::txn_commit(dtokp, xid)); + + if (_mgmtObject.get() != 0) + { + _mgmtObject->dec_txn(); + _mgmtObject->inc_txnCommits(); + } +} + +void +JournalImpl::stop(bool block_till_aio_cmpl) +{ + InactivityFireEvent* ifep = dynamic_cast<InactivityFireEvent*>(inactivityFireEventPtr.get()); + assert(ifep); // dynamic_cast can return null if the cast fails + ifep->cancel(); + jcntl::stop(block_till_aio_cmpl); + + if (_mgmtObject.get() != 0) { + _mgmtObject->resourceDestroy(); + _mgmtObject.reset(); + } +} + +::qpid::linearstore::journal::iores +JournalImpl::flush(const bool block_till_aio_cmpl) +{ + const ::qpid::linearstore::journal::iores res = jcntl::flush(block_till_aio_cmpl); + { + ::qpid::sys::Mutex::ScopedLock sl(_getf_lock); + if (_wmgr.get_aio_evt_rem() && !getEventsTimerSetFlag) { setGetEventTimer(); } + } + return res; +} + +void +JournalImpl::getEventsFire() +{ + ::qpid::sys::Mutex::ScopedLock sl(_getf_lock); + getEventsTimerSetFlag = false; + if (_wmgr.get_aio_evt_rem()) { jcntl::get_wr_events(0); } + if (_wmgr.get_aio_evt_rem()) { setGetEventTimer(); } +} + +void +JournalImpl::flushFire() +{ + if (writeActivityFlag) { + writeActivityFlag = false; + flushTriggeredFlag = false; + } else { + if (!flushTriggeredFlag) { + flush(false); + flushTriggeredFlag = true; + } + } + inactivityFireEventPtr->setupNextFire(); + { + timer.add(inactivityFireEventPtr); + } +} + +void +JournalImpl::wr_aio_cb(std::vector< ::qpid::linearstore::journal::data_tok*>& dtokl) +{ + for (std::vector< ::qpid::linearstore::journal::data_tok*>::const_iterator i=dtokl.begin(); i!=dtokl.end(); i++) + { + DataTokenImpl* dtokp = static_cast<DataTokenImpl*>(*i); + if (/*!is_stopped() &&*/ dtokp->getSourceMessage()) + { + switch (dtokp->wstate()) + { + case ::qpid::linearstore::journal::data_tok::ENQ: +//std::cout << "<<<>>> JournalImpl::wr_aio_cb() ENQ dtokp rid=0x" << std::hex << dtokp->rid() << std::dec << std::endl << std::flush; // DEBUG + dtokp->getSourceMessage()->enqueueComplete(); + break; + case ::qpid::linearstore::journal::data_tok::DEQ: +//std::cout << "<<<>>> JournalImpl::wr_aio_cb() DEQ dtokp rid=0x" << std::hex << dtokp->rid() << std::dec << std::endl << std::flush; // DEBUG +/* Don't need to signal until we have a way to ack completion of dequeue in AMQP + dtokp->getSourceMessage()->dequeueComplete(); + if ( dtokp->getSourceMessage()->isDequeueComplete() ) // clear id after last dequeue + dtokp->getSourceMessage()->setPersistenceId(0); +*/ + break; + default: ; + } + } + dtokp->release(); + } +} + +void +JournalImpl::rd_aio_cb(std::vector<uint16_t>& /*pil*/) +{} + +void +JournalImpl::createStore() { + +} + +void +JournalImpl::handleIoResult(const ::qpid::linearstore::journal::iores r) +{ + writeActivityFlag = true; + switch (r) + { + case ::qpid::linearstore::journal::RHM_IORES_SUCCESS: + return; + default: + { + std::ostringstream oss; + oss << "Unexpected I/O response (" << ::qpid::linearstore::journal::iores_str(r) << ")."; + QLS_LOG2(error, _jid, oss.str()); + THROW_STORE_FULL_EXCEPTION(oss.str()); + } + } +} + +::qpid::management::Manageable::status_t JournalImpl::ManagementMethod (uint32_t /*methodId*/, + ::qpid::management::Args& /*args*/, + std::string& /*text*/) +{ + Manageable::status_t status = Manageable::STATUS_UNKNOWN_METHOD; + +/* + switch (methodId) + { + case _qmf::Journal::METHOD_EXPAND : + //_qmf::ArgsJournalExpand& eArgs = (_qmf::ArgsJournalExpand&) args; + + // Implement "expand" using eArgs.i_by (expand-by argument) + + status = Manageable::STATUS_NOT_IMPLEMENTED; + break; + } +*/ + + return status; +} + +}} diff --git a/qpid/cpp/src/qpid/linearstore/JournalImpl.h b/qpid/cpp/src/qpid/linearstore/JournalImpl.h new file mode 100644 index 0000000000..667579253e --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/JournalImpl.h @@ -0,0 +1,252 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNALIMPL_H +#define QPID_LINEARSTORE_JOURNALIMPL_H + +#include <boost/ptr_container/ptr_list.hpp> +#include "qpid/broker/PersistableQueue.h" +#include "qpid/linearstore/journal/aio_callback.h" +#include "qpid/linearstore/journal/jcntl.h" +#include "qpid/linearstore/PreparedTransaction.h" +#include "qpid/sys/Timer.h" + +#include "qmf/org/apache/qpid/linearstore/Journal.h" + +namespace qpid{ + +namespace sys { +//class Timer; +} + +namespace linearstore{ +namespace journal { +// class EmptyFilePool; +} +class JournalImpl; +class JournalLogImpl; + +class InactivityFireEvent : public ::qpid::sys::TimerTask +{ + JournalImpl* _parent; + ::qpid::sys::Mutex _ife_lock; + + public: + InactivityFireEvent(JournalImpl* p, + const ::qpid::sys::Duration timeout); + virtual ~InactivityFireEvent() {} + void fire(); + inline void cancel() { ::qpid::sys::Mutex::ScopedLock sl(_ife_lock); _parent = 0; } +}; + +class GetEventsFireEvent : public ::qpid::sys::TimerTask +{ + JournalImpl* _parent; + ::qpid::sys::Mutex _gefe_lock; + + public: + GetEventsFireEvent(JournalImpl* p, + const ::qpid::sys::Duration timeout); + virtual ~GetEventsFireEvent() {} + void fire(); + inline void cancel() { ::qpid::sys::Mutex::ScopedLock sl(_gefe_lock); _parent = 0; } +}; + +class JournalImpl : public ::qpid::broker::ExternalQueueStore, + public ::qpid::linearstore::journal::jcntl, + public ::qpid::linearstore::journal::aio_callback +{ + public: + typedef boost::function<void (JournalImpl&)> DeleteCallback; + + protected: + ::qpid::sys::Timer& timer; + JournalLogImpl& _journalLogRef; + bool getEventsTimerSetFlag; + boost::intrusive_ptr< ::qpid::sys::TimerTask> getEventsFireEventsPtr; + ::qpid::sys::Mutex _getf_lock; + ::qpid::sys::Mutex _read_lock; + + bool writeActivityFlag; + bool flushTriggeredFlag; + boost::intrusive_ptr< ::qpid::sys::TimerTask> inactivityFireEventPtr; + + ::qpid::management::ManagementAgent* _agent; + ::qmf::org::apache::qpid::linearstore::Journal::shared_ptr _mgmtObject; + DeleteCallback deleteCallback; + + public: + + JournalImpl(::qpid::sys::Timer& timer, + const std::string& journalId, + const std::string& journalDirectory, + JournalLogImpl& journalLogRef, + const ::qpid::sys::Duration getEventsTimeout, + const ::qpid::sys::Duration flushTimeout, + ::qpid::management::ManagementAgent* agent, + DeleteCallback deleteCallback=DeleteCallback() ); + + virtual ~JournalImpl(); + + void initManagement(::qpid::management::ManagementAgent* agent); + + void initialize(::qpid::linearstore::journal::EmptyFilePool* efp, + const uint16_t wcache_num_pages, + const uint32_t wcache_pgsize_sblks, + ::qpid::linearstore::journal::aio_callback* const cbp); + + inline void initialize(::qpid::linearstore::journal::EmptyFilePool* efpp, + const uint16_t wcache_num_pages, + const uint32_t wcache_pgsize_sblks) { + initialize(efpp, wcache_num_pages, wcache_pgsize_sblks, this); + } + + void recover(boost::shared_ptr< ::qpid::linearstore::journal::EmptyFilePoolManager> efpm, + const uint16_t wcache_num_pages, + const uint32_t wcache_pgsize_sblks, + ::qpid::linearstore::journal::aio_callback* const cbp, + boost::ptr_list<PreparedTransaction>* prep_tx_list_ptr, + uint64_t& highest_rid, + uint64_t queue_id); + + inline void recover(boost::shared_ptr< ::qpid::linearstore::journal::EmptyFilePoolManager> efpm, + const uint16_t wcache_num_pages, + const uint32_t wcache_pgsize_sblks, + boost::ptr_list<PreparedTransaction>* prep_tx_list_ptr, + uint64_t& highest_rid, + uint64_t queue_id) { + recover(efpm, wcache_num_pages, wcache_pgsize_sblks, this, prep_tx_list_ptr, highest_rid, queue_id); + } + + void recover_complete(); + + // Overrides for write inactivity timer + void enqueue_data_record(const void* const data_buff, + const size_t tot_data_len, + const size_t this_data_len, + ::qpid::linearstore::journal::data_tok* dtokp, + const bool transient); + + void enqueue_extern_data_record(const size_t tot_data_len, + ::qpid::linearstore::journal::data_tok* dtokp, + const bool transient); + + void enqueue_txn_data_record(const void* const data_buff, + const size_t tot_data_len, + const size_t this_data_len, + ::qpid::linearstore::journal::data_tok* dtokp, + const std::string& xid, + const bool tpc_flag, + const bool transient); + + void enqueue_extern_txn_data_record(const size_t tot_data_len, + ::qpid::linearstore::journal::data_tok* dtokp, + const std::string& xid, + const bool tpc_flag, + const bool transient); + + void dequeue_data_record(::qpid::linearstore::journal::data_tok* + const dtokp, + const bool txn_coml_commit); + + void dequeue_txn_data_record(::qpid::linearstore::journal::data_tok* const dtokp, + const std::string& xid, + const bool tpc_flag, + const bool txn_coml_commit); + + void txn_abort(::qpid::linearstore::journal::data_tok* const dtokp, const std::string& xid); + + void txn_commit(::qpid::linearstore::journal::data_tok* const dtokp, const std::string& xid); + + void stop(bool block_till_aio_cmpl = false); + + // Overrides for get_events timer + ::qpid::linearstore::journal::iores flush(const bool block_till_aio_cmpl); + + // TimerTask callback + void getEventsFire(); + void flushFire(); + + // AIO callbacks + virtual void wr_aio_cb(std::vector< ::qpid::linearstore::journal::data_tok*>& dtokl); + virtual void rd_aio_cb(std::vector<uint16_t>& pil); + + ::qpid::management::ManagementObject::shared_ptr GetManagementObject (void) const + { return _mgmtObject; } + + ::qpid::management::Manageable::status_t ManagementMethod(uint32_t, + ::qpid::management::Args&, + std::string&); + + void resetDeleteCallback() { deleteCallback = DeleteCallback(); } + + protected: + void createStore(); + + inline void setGetEventTimer() + { + getEventsFireEventsPtr->setupNextFire(); + timer.add(getEventsFireEventsPtr); + getEventsTimerSetFlag = true; + } + void handleIoResult(const ::qpid::linearstore::journal::iores r); + + // Management instrumentation callbacks overridden from jcntl + inline void instr_incr_outstanding_aio_cnt() { + if (_mgmtObject.get() != 0) _mgmtObject->inc_outstandingAIOs(); + } + inline void instr_decr_outstanding_aio_cnt() { + if (_mgmtObject.get() != 0) _mgmtObject->dec_outstandingAIOs(); + } + +}; // class JournalImpl + +class TplJournalImpl : public JournalImpl +{ + public: + TplJournalImpl(::qpid::sys::Timer& timer, + const std::string& journalId, + const std::string& journalDirectory, + JournalLogImpl& journalLogRef, + const ::qpid::sys::Duration getEventsTimeout, + const ::qpid::sys::Duration flushTimeout, + ::qpid::management::ManagementAgent* agent) : + JournalImpl(timer, journalId, journalDirectory, journalLogRef, getEventsTimeout, flushTimeout, agent) + {} + + virtual ~TplJournalImpl() {} + + // Special version of read_data_record that ignores transactions - needed when reading the TPL + inline ::qpid::linearstore::journal::iores read_data_record(void** const datapp, + std::size_t& dsize, + void** const xidpp, + std::size_t& xidsize, + bool& transient, + bool& external, + ::qpid::linearstore::journal::data_tok* const dtokp) { + return JournalImpl::read_data_record(datapp, dsize, xidpp, xidsize, transient, external, dtokp, false); + } +}; // class TplJournalImpl + +} // namespace msgstore +} // namespace mrg + +#endif // ifndef QPID_LINEARSTORE_JOURNALIMPL_H diff --git a/qpid/cpp/src/qpid/linearstore/JournalLogImpl.cpp b/qpid/cpp/src/qpid/linearstore/JournalLogImpl.cpp new file mode 100644 index 0000000000..c3e631a6ca --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/JournalLogImpl.cpp @@ -0,0 +1,61 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/JournalLogImpl.h" + +#include "qpid/log/Statement.h" + +namespace qpid { +namespace linearstore { + +JournalLogImpl::JournalLogImpl(const qpid::linearstore::journal::JournalLog::log_level_t logLevelThreshold) : qpid::linearstore::journal::JournalLog(logLevelThreshold) {} +JournalLogImpl::~JournalLogImpl() {} + +void +JournalLogImpl::log(const qpid::linearstore::journal::JournalLog::log_level_t level, + const std::string& log_stmt) const { + switch (level) { + case LOG_CRITICAL: QPID_LOG(critical, "Linear Store: " << log_stmt); break; + case LOG_ERROR: QPID_LOG(error, "Linear Store: " << log_stmt); break; + case LOG_WARN: QPID_LOG(warning, "Linear Store: " << log_stmt); break; + case LOG_NOTICE: QPID_LOG(notice, "Linear Store: " << log_stmt); break; + case LOG_INFO: QPID_LOG(info, "Linear Store: " << log_stmt); break; + case LOG_DEBUG: QPID_LOG(debug, "Linear Store: " << log_stmt); break; + default: QPID_LOG(trace, "Linear Store: " << log_stmt); + } +} + +void +JournalLogImpl::log(const qpid::linearstore::journal::JournalLog::log_level_t level, + const std::string& jid, + const std::string& log_stmt) const { + switch (level) { + case LOG_CRITICAL: QPID_LOG(critical, "Linear Store: Journal \"" << jid << "\": " << log_stmt); break; + case LOG_ERROR: QPID_LOG(error, "Linear Store: Journal \"" << jid << "\": " << log_stmt); break; + case LOG_WARN: QPID_LOG(warning, "Linear Store: Journal \"" << jid << "\": " << log_stmt); break; + case LOG_NOTICE: QPID_LOG(notice, "Linear Store: Journal \"" << jid << "\": " << log_stmt); break; + case LOG_INFO: QPID_LOG(info, "Linear Store: Journal \"" << jid << "\": " << log_stmt); break; + case LOG_DEBUG: QPID_LOG(debug, "Linear Store: Journal \"" << jid << "\": " << log_stmt); break; + default: QPID_LOG(trace, "Linear Store: Journal \"" << jid << "\": " << log_stmt); + } +} + +}} // namespace qpid::linearstore diff --git a/qpid/cpp/src/qpid/linearstore/JournalLogImpl.h b/qpid/cpp/src/qpid/linearstore/JournalLogImpl.h new file mode 100644 index 0000000000..846eaac124 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/JournalLogImpl.h @@ -0,0 +1,47 @@ + /* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_LOG_H +#define QPID_LINEARSTORE_LOG_H + +#include "qpid/linearstore/journal/JournalLog.h" + +#define QLS_LOG(level, msg) QPID_LOG(level, "Linear Store: " << msg) +#define QLS_LOG2(level, queue, msg) QPID_LOG(level, "Linear Store: Journal \"" << queue << "\":" << msg) + +namespace qpid { +namespace linearstore { + +class JournalLogImpl : public qpid::linearstore::journal::JournalLog +{ +public: + JournalLogImpl(const qpid::linearstore::journal::JournalLog::log_level_t logLevelThreshold); + virtual ~JournalLogImpl(); + virtual void log(const qpid::linearstore::journal::JournalLog::log_level_t logLevel, + const std::string& logStatement) const; + virtual void log(const qpid::linearstore::journal::JournalLog::log_level_t logLevel, + const std::string& journalId, + const std::string& logStatement) const; +}; + +}} // namespace qpid::linearstore + +#endif // QPID_LINEARSTORE_LOG_H diff --git a/qpid/cpp/src/qpid/linearstore/MessageStoreImpl.cpp b/qpid/cpp/src/qpid/linearstore/MessageStoreImpl.cpp new file mode 100644 index 0000000000..70eac27f48 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/MessageStoreImpl.cpp @@ -0,0 +1,1559 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/MessageStoreImpl.h" + +#include "qpid/broker/Broker.h" +#include "qpid/framing/FieldValue.h" +#include "qpid/linearstore/BindingDbt.h" +#include "qpid/linearstore/BufferValue.h" +#include "qpid/linearstore/Cursor.h" +#include "qpid/linearstore/DataTokenImpl.h" +#include "qpid/linearstore/IdDbt.h" +#include "qpid/linearstore/JournalImpl.h" +#include "qpid/linearstore/journal/EmptyFilePoolManager.h" +#include "qpid/linearstore/StoreException.h" +#include "qpid/linearstore/TxnCtxt.h" +#include "qpid/log/Statement.h" + +#include "qmf/org/apache/qpid/linearstore/Package.h" + +#define MAX_AIO_SLEEPS 100000 // tot: ~1 sec +#define AIO_SLEEP_TIME_US 10 // 0.01 ms + +//namespace _qmf = qmf::org::apache::qpid::linearstore; + +namespace qpid{ +namespace linearstore{ + +const std::string MessageStoreImpl::storeTopLevelDir("qls"); // Sets the top-level store dir name + +// FIXME aconway 2010-03-09: was 10 +qpid::sys::Duration MessageStoreImpl::defJournalGetEventsTimeout(1 * qpid::sys::TIME_MSEC); // 10ms +qpid::sys::Duration MessageStoreImpl::defJournalFlushTimeout(500 * qpid::sys::TIME_MSEC); // 0.5s +qpid::sys::Mutex TxnCtxt::globalSerialiser; + +MessageStoreImpl::MessageStoreImpl(qpid::broker::Broker* broker_, const char* envpath_) : + defaultEfpPartitionNumber(0), + defaultEfpFileSize_kib(0), + overwriteBeforeReturnFlag(false), + wCachePgSizeSblks(0), + wCacheNumPages(0), + tplWCachePgSizeSblks(0), + tplWCacheNumPages(0), + highestRid(0), + isInit(false), + envPath(envpath_), + broker(broker_), + jrnlLog(qpid::linearstore::journal::JournalLog::LOG_NOTICE), + mgmtObject(), + agent(0) +{ + // Test of values for QLS_RAND_SHIFT1, QLS_RAND_SHIFT2 and QLS_RAND_MASK + if((((uint64_t)RAND_MAX << QLS_RAND_SHIFT1) ^ ((uint64_t)RAND_MAX << QLS_RAND_SHIFT2) ^ (RAND_MAX & QLS_RAND_MASK)) != 0xffffffffffffffffULL) { + THROW_STORE_EXCEPTION("[linearstore] 64-bit random number generation alignment error"); + } + ::srand(::time(NULL)); +} + +uint32_t MessageStoreImpl::chkJrnlWrPageCacheSize(const uint32_t param_, const std::string& paramName_) +{ + uint32_t p = param_; + + if (p == 0) { + // For zero value, use default + p = QLS_WMGR_DEF_PAGE_SIZE_KIB; + QLS_LOG(warning, "parameter " << paramName_ << " (" << param_ << ") must be a power of 2 between 1 and 128; changing this parameter to default value (" << p << ")"); + } else if ( p > 128 || (p & (p-1)) ) { + // For any positive value that is not a power of 2, use closest value + if (p < 6) p = 4; + else if (p < 12) p = 8; + else if (p < 24) p = 16; + else if (p < 48) p = 32; + else if (p < 96) p = 64; + else p = 128; + QLS_LOG(warning, "parameter " << paramName_ << " (" << param_ << ") must be a power of 2 between 1 and 128; changing this parameter to closest allowable value (" << p << ")"); + } + return p; +} + +uint16_t MessageStoreImpl::getJrnlWrNumPages(const uint32_t wrPageSizeKib_) +{ + uint32_t wrPageSizeSblks = wrPageSizeKib_ / QLS_SBLK_SIZE_KIB; // convert from KiB to number sblks + uint32_t defTotWCacheSizeSblks = QLS_WMGR_DEF_PAGE_SIZE_SBLKS * QLS_WMGR_DEF_PAGES; + switch (wrPageSizeKib_) + { + case 1: + case 2: + case 4: + // 256 KiB total cache + return defTotWCacheSizeSblks / wrPageSizeSblks / 4; + case 8: + case 16: + // 512 KiB total cache + return defTotWCacheSizeSblks / wrPageSizeSblks / 2; + default: // 32, 64, 128 + // 1 MiB total cache + return defTotWCacheSizeSblks / wrPageSizeSblks; + } +} + +qpid::linearstore::journal::efpPartitionNumber_t MessageStoreImpl::chkEfpPartition(const qpid::linearstore::journal::efpPartitionNumber_t partition_, + const std::string& /*paramName_*/) { + // TODO: check against list of existing partitions, throw if not found + return partition_; +} + +qpid::linearstore::journal::efpDataSize_kib_t MessageStoreImpl::chkEfpFileSizeKiB(const qpid::linearstore::journal::efpDataSize_kib_t efpFileSizeKib_, + const std::string& paramName_) { + uint8_t rem = efpFileSizeKib_ % uint64_t(QLS_SBLK_SIZE_KIB); + if (rem != 0) { + uint64_t newVal = efpFileSizeKib_ - rem; + if (rem >= (QLS_SBLK_SIZE_KIB / 2)) + newVal += QLS_SBLK_SIZE_KIB; + QLS_LOG(warning, "Parameter " << paramName_ << " (" << efpFileSizeKib_ << ") must be a multiple of " << + QLS_SBLK_SIZE_KIB << "; changing this parameter to the closest allowable value (" << + newVal << ")"); + return newVal; + } + return efpFileSizeKib_; + + // TODO: check against list of existing pools in the given partition +} + +void MessageStoreImpl::initManagement () +{ + if (broker != 0) { + agent = broker->getManagementAgent(); + if (agent != 0) { + qmf::org::apache::qpid::linearstore::Package packageInitializer(agent); + mgmtObject = qmf::org::apache::qpid::linearstore::Store::shared_ptr ( + new qmf::org::apache::qpid::linearstore::Store(agent, this, broker)); + + mgmtObject->set_storeDir(storeDir); + mgmtObject->set_tplIsInitialized(false); + mgmtObject->set_tplDirectory(getTplBaseDir()); + mgmtObject->set_tplWritePageSize(tplWCachePgSizeSblks * QLS_SBLK_SIZE_BYTES); + mgmtObject->set_tplWritePages(tplWCacheNumPages); + + agent->addObject(mgmtObject, 0, true); + + // Initialize all existing queues (ie those recovered before management was initialized) + for (JournalListMapItr i=journalList.begin(); i!=journalList.end(); i++) { + i->second->initManagement(agent); + } + } + } +} + +bool MessageStoreImpl::init(const qpid::Options* options_) +{ + // Extract and check options + const StoreOptions* opts = static_cast<const StoreOptions*>(options_); + qpid::linearstore::journal::efpPartitionNumber_t efpPartition = chkEfpPartition(opts->efpPartition, "efp-partition"); + qpid::linearstore::journal::efpDataSize_kib_t efpFilePoolSize_kib = chkEfpFileSizeKiB(opts->efpFileSizeKib, "efp-file-size"); + uint32_t jrnlWrCachePageSizeKib = chkJrnlWrPageCacheSize(opts->wCachePageSizeKib, "wcache-page-size"); + uint32_t tplJrnlWrCachePageSizeKib = chkJrnlWrPageCacheSize(opts->tplWCachePageSizeKib, "tpl-wcache-page-size"); + + // Pass option values to init() + return init(opts->storeDir, efpPartition, efpFilePoolSize_kib, opts->truncateFlag, jrnlWrCachePageSizeKib, + tplJrnlWrCachePageSizeKib, opts->overwriteBeforeReturnFlag); +} + +// These params, taken from options, are assumed to be correct and verified +bool MessageStoreImpl::init(const std::string& storeDir_, + qpid::linearstore::journal::efpPartitionNumber_t efpPartition_, + qpid::linearstore::journal::efpDataSize_kib_t efpFileSize_kib_, + const bool truncateFlag_, + uint32_t wCachePageSizeKib_, + uint32_t tplWCachePageSizeKib_, + const bool overwriteBeforeReturnFlag_) +{ + if (isInit) return true; + + // Set geometry members (converting to correct units where req'd) + overwriteBeforeReturnFlag = overwriteBeforeReturnFlag_; + defaultEfpPartitionNumber = efpPartition_; + defaultEfpFileSize_kib = efpFileSize_kib_; + wCachePgSizeSblks = wCachePageSizeKib_ / QLS_SBLK_SIZE_KIB; // convert from KiB to number sblks + wCacheNumPages = getJrnlWrNumPages(wCachePageSizeKib_); + tplWCachePgSizeSblks = tplWCachePageSizeKib_ / QLS_SBLK_SIZE_KIB; // convert from KiB to number sblks + tplWCacheNumPages = getJrnlWrNumPages(tplWCachePageSizeKib_); + if (storeDir_.size()>0) storeDir = storeDir_; + + if (truncateFlag_) + truncateInit(); + init(truncateFlag_); + + QLS_LOG(notice, "Store module initialized; store-dir=" << storeDir_); + QLS_LOG(info, "> Default EFP partition: " << defaultEfpPartitionNumber); + QLS_LOG(info, "> Default EFP file size: " << defaultEfpFileSize_kib << " (KiB)"); + QLS_LOG(info, "> Default write cache page size: " << wCachePageSizeKib_ << " (KiB)"); + QLS_LOG(info, "> Default number of write cache pages: " << wCacheNumPages); + QLS_LOG(info, "> TPL write cache page size: " << tplWCachePageSizeKib_ << " (KiB)"); + QLS_LOG(info, "> TPL number of write cache pages: " << tplWCacheNumPages); + QLS_LOG(info, "> EFP partition: " << defaultEfpPartitionNumber); + QLS_LOG(info, "> EFP file size pool: " << defaultEfpFileSize_kib << " (KiB)"); + QLS_LOG(info, "> Overwrite before return to EFP: " << (overwriteBeforeReturnFlag?"True":"False")); + + return isInit; +} + +void MessageStoreImpl::init(const bool truncateFlag) +{ + const int retryMax = 3; + int bdbRetryCnt = 0; + do { + if (bdbRetryCnt++ > 0) + { + closeDbs(); + ::usleep(1000000); // 1 sec delay + QLS_LOG(error, "Previoius BDB store initialization failed, retrying (" << bdbRetryCnt << " of " << retryMax << ")..."); + } + + try { + qpid::linearstore::journal::jdir::create_dir(getBdbBaseDir()); + + dbenv.reset(new DbEnv(0)); + dbenv->set_errpfx("linearstore"); + dbenv->set_lg_regionmax(256000); // default = 65000 + dbenv->open(getBdbBaseDir().c_str(), DB_THREAD | DB_CREATE | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_USE_ENVIRON | DB_RECOVER, 0); + + // Databases are constructed here instead of the constructor so that the DB_RECOVER flag can be used + // against the database environment. Recover can only be performed if no databases have been created + // against the environment at the time of recovery, as recovery invalidates the environment. + queueDb.reset(new Db(dbenv.get(), 0)); + dbs.push_back(queueDb); + configDb.reset(new Db(dbenv.get(), 0)); + dbs.push_back(configDb); + exchangeDb.reset(new Db(dbenv.get(), 0)); + dbs.push_back(exchangeDb); + mappingDb.reset(new Db(dbenv.get(), 0)); + dbs.push_back(mappingDb); + bindingDb.reset(new Db(dbenv.get(), 0)); + dbs.push_back(bindingDb); + generalDb.reset(new Db(dbenv.get(), 0)); + dbs.push_back(generalDb); + + TxnCtxt txn; + txn.begin(dbenv.get(), false); + try { + open(queueDb, txn.get(), "queues.db", false); + open(configDb, txn.get(), "config.db", false); + open(exchangeDb, txn.get(), "exchanges.db", false); + open(mappingDb, txn.get(), "mappings.db", true); + open(bindingDb, txn.get(), "bindings.db", true); + open(generalDb, txn.get(), "general.db", false); + txn.commit(); + } catch (...) { txn.abort(); throw; } + // NOTE: during normal initialization, agent == 0 because the store is initialized before the management infrastructure. + // However during a truncated initialization in a cluster, agent != 0. We always pass 0 as the agent for the + // TplStore to keep things consistent in a cluster. See https://bugzilla.redhat.com/show_bug.cgi?id=681026 + tplStorePtr.reset(new TplJournalImpl(broker->getTimer(), "TplStore", getTplBaseDir(), jrnlLog, defJournalGetEventsTimeout, defJournalFlushTimeout, 0)); + isInit = true; + } catch (const DbException& e) { + if (e.get_errno() == DB_VERSION_MISMATCH) + { + QLS_LOG(error, "Database environment mismatch: This version of db4 does not match that which created the store database.: " << e.what()); + THROW_STORE_EXCEPTION_2("Database environment mismatch: This version of db4 does not match that which created the store database. " + "(If recovery is not important, delete the contents of the store directory. Otherwise, try upgrading the database using " + "db_upgrade or using db_recover - but the db4-utils package must also be installed to use these utilities.)", e); + } + QLS_LOG(error, "BDB exception occurred while initializing store: " << e.what()); + if (bdbRetryCnt >= retryMax) + THROW_STORE_EXCEPTION_2("BDB exception occurred while initializing store", e); + } catch (const StoreException&) { + throw; + } catch (const qpid::linearstore::journal::jexception& e) { + QLS_LOG(error, "Journal Exception occurred while initializing store: " << e); + THROW_STORE_EXCEPTION_2("Journal Exception occurred while initializing store", e.what()); + } catch (...) { + QLS_LOG(error, "Unknown exception occurred while initializing store."); + throw; + } + } while (!isInit); + + efpMgr.reset(new qpid::linearstore::journal::EmptyFilePoolManager(getStoreTopLevelDir(), + defaultEfpPartitionNumber, + defaultEfpFileSize_kib, + overwriteBeforeReturnFlag, + truncateFlag, + jrnlLog)); + efpMgr->findEfpPartitions(); +} + +void MessageStoreImpl::finalize() +{ + if (tplStorePtr.get() && tplStorePtr->is_ready()) tplStorePtr->stop(true); + { + qpid::sys::Mutex::ScopedLock sl(journalListLock); + for (JournalListMapItr i = journalList.begin(); i != journalList.end(); i++) + { + JournalImpl* jQueue = i->second; + jQueue->resetDeleteCallback(); + if (jQueue->is_ready()) jQueue->stop(true); + } + } + + if (mgmtObject.get() != 0) { + mgmtObject->resourceDestroy(); + mgmtObject.reset(); + } +} + +void MessageStoreImpl::truncateInit() +{ + if (isInit) { + { + qpid::sys::Mutex::ScopedLock sl(journalListLock); + if (journalList.size()) { // check no queues exist + std::ostringstream oss; + oss << "truncateInit() called with " << journalList.size() << " queues still in existence"; + THROW_STORE_EXCEPTION(oss.str()); + } + } + closeDbs(); + dbs.clear(); + if (tplStorePtr->is_ready()) tplStorePtr->stop(true); + dbenv->close(0); + isInit = false; + } + + qpid::linearstore::journal::jdir::delete_dir(getBdbBaseDir()); + + // TODO: Linearstore: harvest all discarded journal files into the empty file pool(s). + qpid::linearstore::journal::jdir::delete_dir(getJrnlBaseDir()); + qpid::linearstore::journal::jdir::delete_dir(getTplBaseDir()); + QLS_LOG(info, "Store directory " << getStoreTopLevelDir() << " was truncated."); +} + +void MessageStoreImpl::chkTplStoreInit() +{ + // Prevent multiple threads from late-initializing the TPL + qpid::sys::Mutex::ScopedLock sl(tplInitLock); + if (!tplStorePtr->is_ready()) { + qpid::linearstore::journal::jdir::create_dir(getTplBaseDir()); + tplStorePtr->initialize(getEmptyFilePool(defaultEfpPartitionNumber, defaultEfpFileSize_kib), tplWCacheNumPages, tplWCachePgSizeSblks); + if (mgmtObject.get() != 0) mgmtObject->set_tplIsInitialized(true); + } +} + +void MessageStoreImpl::open(db_ptr db_, + DbTxn* txn_, + const char* file_, + bool dupKey_) +{ + if(dupKey_) db_->set_flags(DB_DUPSORT); + db_->open(txn_, file_, 0, DB_BTREE, DB_CREATE | DB_THREAD, 0); +} + +void MessageStoreImpl::closeDbs() +{ + for (std::list<db_ptr >::iterator i = dbs.begin(); i != dbs.end(); i++) { + (*i)->close(0); + } + dbs.clear(); +} + +MessageStoreImpl::~MessageStoreImpl() +{ + finalize(); + try { + closeDbs(); + } catch (const DbException& e) { + QLS_LOG(error, "Error closing BDB databases: " << e.what()); + } catch (const qpid::linearstore::journal::jexception& e) { + QLS_LOG(error, "Error: " << e.what()); + } catch (const std::exception& e) { + QLS_LOG(error, "Error: " << e.what()); + } catch (...) { + QLS_LOG(error, "Unknown error in MessageStoreImpl::~MessageStoreImpl()"); + } + + if (mgmtObject.get() != 0) { + mgmtObject->resourceDestroy(); + mgmtObject.reset(); + } +} + +void MessageStoreImpl::create(qpid::broker::PersistableQueue& queue_, + const qpid::framing::FieldTable& args_) +{ + QLS_LOG(info, "*** MessageStoreImpl::create() queue=\"" << queue_.getName() << "\""); // DEBUG + checkInit(); + if (queue_.getPersistenceId()) { + THROW_STORE_EXCEPTION("Queue already created: " + queue_.getName()); + } + JournalImpl* jQueue = 0; + + if (queue_.getName().size() == 0) + { + QLS_LOG(error, "Cannot create store for empty (null) queue name - queue create ignored."); + return; + } + + jQueue = new JournalImpl(broker->getTimer(), queue_.getName(), getJrnlDir(queue_.getName()), jrnlLog, + defJournalGetEventsTimeout, defJournalFlushTimeout, agent, + boost::bind(&MessageStoreImpl::journalDeleted, this, _1)); + { + qpid::sys::Mutex::ScopedLock sl(journalListLock); + journalList[queue_.getName()]=jQueue; + } + + queue_.setExternalQueueStore(dynamic_cast<qpid::broker::ExternalQueueStore*>(jQueue)); + try { + jQueue->initialize(getEmptyFilePool(args_), wCacheNumPages, wCachePgSizeSblks); + } catch (const qpid::linearstore::journal::jexception& e) { + THROW_STORE_EXCEPTION(std::string("Queue ") + queue_.getName() + ": create() failed: " + e.what()); + } + try { + if (!create(queueDb, queueIdSequence, queue_)) { + THROW_STORE_EXCEPTION("Queue already exists: " + queue_.getName()); + } + } catch (const DbException& e) { + THROW_STORE_EXCEPTION_2("Error creating queue named " + queue_.getName(), e); + } +} + +qpid::linearstore::journal::EmptyFilePool* +MessageStoreImpl::getEmptyFilePool(const qpid::linearstore::journal::efpPartitionNumber_t efpPartitionNumber_, + const qpid::linearstore::journal::efpDataSize_kib_t efpFileSizeKib_) { + qpid::linearstore::journal::EmptyFilePool* efpp = efpMgr->getEmptyFilePool(efpPartitionNumber_, efpFileSizeKib_); + if (efpp == 0) { + std::ostringstream oss; + oss << "Partition=" << efpPartitionNumber_ << "; EfpFileSize=" << efpFileSizeKib_ << " KiB"; + throw qpid::linearstore::journal::jexception(qpid::linearstore::journal::jerrno::JERR_EFP_NOEFP, oss.str(), "MessageStoreImpl", "getEmptyFilePool"); + } + return efpp; +} + +qpid::linearstore::journal::EmptyFilePool* +MessageStoreImpl::getEmptyFilePool(const qpid::framing::FieldTable& args_) { + qpid::framing::FieldTable::ValuePtr value; + qpid::linearstore::journal::efpPartitionNumber_t localEfpPartition = defaultEfpPartitionNumber; + value = args_.get("qpid.efp_partition_num"); + if (value.get() != 0 && !value->empty() && value->convertsTo<int>()) { + localEfpPartition = chkEfpPartition((uint32_t)value->get<int>(), "qpid.efp_partition_num"); + } + + qpid::linearstore::journal::efpDataSize_kib_t localEfpFileSizeKib = defaultEfpFileSize_kib; + value = args_.get("qpid.efp_pool_file_size"); + if (value.get() != 0 && !value->empty() && value->convertsTo<int>()) { + localEfpFileSizeKib = chkEfpFileSizeKiB((uint32_t)value->get<int>(), "qpid.efp_pool_file_size"); + } + return getEmptyFilePool(localEfpPartition, localEfpFileSizeKib); +} + +void MessageStoreImpl::destroy(qpid::broker::PersistableQueue& queue_) +{ + QLS_LOG(info, "*** MessageStoreImpl::destroy() queue=\"" << queue_.getName() << "\""); + checkInit(); + destroy(queueDb, queue_); + deleteBindingsForQueue(queue_); + qpid::broker::ExternalQueueStore* eqs = queue_.getExternalQueueStore(); + if (eqs) { + JournalImpl* jQueue = static_cast<JournalImpl*>(eqs); + jQueue->delete_jrnl_files(); + queue_.setExternalQueueStore(0); // will delete the journal if exists + { + qpid::sys::Mutex::ScopedLock sl(journalListLock); + journalList.erase(queue_.getName()); + } + } +} + +void MessageStoreImpl::create(const qpid::broker::PersistableExchange& exchange_, + const qpid::framing::FieldTable& /*args_*/) +{ + checkInit(); + if (exchange_.getPersistenceId()) { + THROW_STORE_EXCEPTION("Exchange already created: " + exchange_.getName()); + } + try { + if (!create(exchangeDb, exchangeIdSequence, exchange_)) { + THROW_STORE_EXCEPTION("Exchange already exists: " + exchange_.getName()); + } + } catch (const DbException& e) { + THROW_STORE_EXCEPTION_2("Error creating exchange named " + exchange_.getName(), e); + } +} + +void MessageStoreImpl::destroy(const qpid::broker::PersistableExchange& exchange) +{ + checkInit(); + destroy(exchangeDb, exchange); + //need to also delete bindings + IdDbt key(exchange.getPersistenceId()); + bindingDb->del(0, &key, DB_AUTO_COMMIT); +} + +void MessageStoreImpl::create(const qpid::broker::PersistableConfig& general_) +{ + checkInit(); + if (general_.getPersistenceId()) { + THROW_STORE_EXCEPTION("General configuration item already created"); + } + try { + if (!create(generalDb, generalIdSequence, general_)) { + THROW_STORE_EXCEPTION("General configuration already exists"); + } + } catch (const DbException& e) { + THROW_STORE_EXCEPTION_2("Error creating general configuration", e); + } +} + +void MessageStoreImpl::destroy(const qpid::broker::PersistableConfig& general_) +{ + checkInit(); + destroy(generalDb, general_); +} + +bool MessageStoreImpl::create(db_ptr db_, + IdSequence& seq_, + const qpid::broker::Persistable& p_) +{ + uint64_t id (seq_.next()); + Dbt key(&id, sizeof(id)); + BufferValue value (p_); + + int status; + TxnCtxt txn; + txn.begin(dbenv.get(), true); + try { + status = db_->put(txn.get(), &key, &value, DB_NOOVERWRITE); + txn.commit(); + } catch (...) { + txn.abort(); + throw; + } + if (status == DB_KEYEXIST) { + return false; + } else { + p_.setPersistenceId(id); + return true; + } +} + +void MessageStoreImpl::destroy(db_ptr db, const qpid::broker::Persistable& p_) +{ + qpid::sys::Mutex::ScopedLock sl(bdbLock); + IdDbt key(p_.getPersistenceId()); + db->del(0, &key, DB_AUTO_COMMIT); +} + + +void MessageStoreImpl::bind(const qpid::broker::PersistableExchange& e_, + const qpid::broker::PersistableQueue& q_, + const std::string& k_, + const qpid::framing::FieldTable& a_) +{ + checkInit(); + IdDbt key(e_.getPersistenceId()); + BindingDbt value(e_, q_, k_, a_); + TxnCtxt txn; + txn.begin(dbenv.get(), true); + try { + put(bindingDb, txn.get(), key, value); + txn.commit(); + } catch (...) { + txn.abort(); + throw; + } +} + +void MessageStoreImpl::unbind(const qpid::broker::PersistableExchange& e_, + const qpid::broker::PersistableQueue& q_, + const std::string& k_, + const qpid::framing::FieldTable& /*a_*/) +{ + checkInit(); + deleteBinding(e_, q_, k_); +} + +void MessageStoreImpl::recover(qpid::broker::RecoveryManager& registry_) +{ + checkInit(); + txn_list prepared; + recoverLockedMappings(prepared); + + std::ostringstream oss; + oss << "Recovered transaction prepared list:"; + for (txn_list::iterator i = prepared.begin(); i != prepared.end(); i++) { + oss << std::endl << " " << qpid::linearstore::journal::jcntl::str2hexnum(i->xid); + } + QLS_LOG(debug, oss.str()); + + queue_index queues;//id->queue + exchange_index exchanges;//id->exchange + message_index messages;//id->message + + TxnCtxt txn; + txn.begin(dbenv.get(), false); + try { + //read all queues, calls recoversMessages for each queue + recoverQueues(txn, registry_, queues, prepared, messages); + + //recover exchange & bindings: + recoverExchanges(txn, registry_, exchanges); + recoverBindings(txn, exchanges, queues); + + //recover general-purpose configuration + recoverGeneral(txn, registry_); + + txn.commit(); + } catch (const DbException& e) { + txn.abort(); + THROW_STORE_EXCEPTION_2("Error on recovery", e); + } catch (...) { + txn.abort(); + throw; + } + + //recover transactions: + qpid::linearstore::journal::txn_map& txn_map_ref = tplStorePtr->get_txn_map(); + for (txn_list::iterator i = prepared.begin(); i != prepared.end(); i++) { + const PreparedTransaction pt = *i; + if (mgmtObject.get() != 0) { + mgmtObject->inc_tplTransactionDepth(); + mgmtObject->inc_tplTxnPrepares(); + } + + std::string xid = pt.xid; + qpid::linearstore::journal::txn_data_list_t tdl = txn_map_ref.get_tdata_list(xid); + if (tdl.size() == 0) THROW_STORE_EXCEPTION("XID not found in txn_map"); + qpid::linearstore::journal::txn_op_stats_t txn_op_stats(tdl); + bool commitFlag = txn_op_stats.abortCnt == 0; + + // If a record is found that is dequeued but not committed/aborted from tplStore, then a complete() call + // was interrupted part way through committing/aborting the impacted queues. Complete this process. + bool incomplTplTxnFlag = txn_op_stats.deqCnt > 0; + + if (txn_op_stats.tpcCnt > 0) { + // Dtx (2PC) transaction + TPCTxnCtxt* tpcc = new TPCTxnCtxt(xid, &messageIdSequence); + std::auto_ptr<qpid::broker::TPCTransactionContext> txn(tpcc); + tpcc->recoverDtok(txn_op_stats.rid, xid); + tpcc->prepare(tplStorePtr.get()); + + qpid::broker::RecoverableTransaction::shared_ptr dtx; + if (!incomplTplTxnFlag) dtx = registry_.recoverTransaction(xid, txn); + if (pt.enqueues.get()) { + for (LockedMappings::iterator j = pt.enqueues->begin(); j != pt.enqueues->end(); j++) { + tpcc->addXidRecord(queues[j->first]->getExternalQueueStore()); + if (!incomplTplTxnFlag) dtx->enqueue(queues[j->first], messages[j->second]); + } + } + if (pt.dequeues.get()) { + for (LockedMappings::iterator j = pt.dequeues->begin(); j != pt.dequeues->end(); j++) { + tpcc->addXidRecord(queues[j->first]->getExternalQueueStore()); + if (!incomplTplTxnFlag) dtx->dequeue(queues[j->first], messages[j->second]); + } + } + + if (incomplTplTxnFlag) { + tpcc->complete(commitFlag); + } + } else { + // Local (1PC) transaction + boost::shared_ptr<TxnCtxt> opcc(new TxnCtxt(xid, &messageIdSequence)); + opcc->recoverDtok(txn_op_stats.rid, xid); + opcc->prepare(tplStorePtr.get()); + + if (pt.enqueues.get()) { + for (LockedMappings::iterator j = pt.enqueues->begin(); j != pt.enqueues->end(); j++) { + opcc->addXidRecord(queues[j->first]->getExternalQueueStore()); + } + } + if (pt.dequeues.get()) { + for (LockedMappings::iterator j = pt.dequeues->begin(); j != pt.dequeues->end(); j++) { + opcc->addXidRecord(queues[j->first]->getExternalQueueStore()); + } + } + if (incomplTplTxnFlag) { + opcc->complete(commitFlag); + } else { + completed(*opcc.get(), commitFlag); + } + } + + } + registry_.recoveryComplete(); +} + +void MessageStoreImpl::recoverQueues(TxnCtxt& txn, + qpid::broker::RecoveryManager& registry, + queue_index& queue_index, + txn_list& prepared, + message_index& messages) +{ + Cursor queues; + queues.open(queueDb, txn.get()); + + uint64_t maxQueueId(1); + + IdDbt key; + Dbt value; + //read all queues + while (queues.next(key, value)) { + qpid::framing::Buffer buffer(reinterpret_cast<char*>(value.get_data()), value.get_size()); + //create a Queue instance + qpid::broker::RecoverableQueue::shared_ptr queue = registry.recoverQueue(buffer); + //set the persistenceId and update max as required + queue->setPersistenceId(key.id); + + const std::string queueName = queue->getName().c_str(); + JournalImpl* jQueue = 0; + if (queueName.size() == 0) + { + QLS_LOG(error, "Cannot recover empty (null) queue name - ignoring and attempting to continue."); + break; + } + jQueue = new JournalImpl(broker->getTimer(), queueName, getJrnlDir(queueName),jrnlLog, + defJournalGetEventsTimeout, defJournalFlushTimeout, agent, + boost::bind(&MessageStoreImpl::journalDeleted, this, _1)); + { + qpid::sys::Mutex::ScopedLock sl(journalListLock); + journalList[queueName] = jQueue; + } + queue->setExternalQueueStore(dynamic_cast<qpid::broker::ExternalQueueStore*>(jQueue)); + + try + { + long rcnt = 0L; // recovered msg count + long idcnt = 0L; // in-doubt msg count + uint64_t thisHighestRid = 0ULL; + jQueue->recover(boost::dynamic_pointer_cast<qpid::linearstore::journal::EmptyFilePoolManager>(efpMgr), wCacheNumPages, wCachePgSizeSblks, &prepared, thisHighestRid, key.id); + + // Check for changes to queue store settings qpid.file_count and qpid.file_size resulting + // from recovery of a store that has had its size changed externally by the resize utility. + // If so, update the queue store settings so that QMF queries will reflect the new values. + // TODO: Update this for new settings, as qpid.file_count and qpid.file_size no longer apply +/* + const qpid::framing::FieldTable& storeargs = queue->getSettings().storeSettings; + qpid::framing::FieldTable::ValuePtr value; + value = storeargs.get("qpid.file_count"); + if (value.get() != 0 && !value->empty() && value->convertsTo<int>() && (uint16_t)value->get<int>() != jQueue->num_jfiles()) { + queue->addArgument("qpid.file_count", jQueue->num_jfiles()); + } + value = storeargs.get("qpid.file_size"); + if (value.get() != 0 && !value->empty() && value->convertsTo<int>() && (uint32_t)value->get<int>() != jQueue->jfsize_sblks()/JRNL_RMGR_PAGE_SIZE) { + queue->addArgument("qpid.file_size", jQueue->jfsize_sblks()/JRNL_RMGR_PAGE_SIZE); + } +*/ + + if (highestRid == 0ULL) + highestRid = thisHighestRid; + else if (thisHighestRid - highestRid < 0x8000000000000000ULL) // RFC 1982 comparison for unsigned 64-bit + highestRid = thisHighestRid; + recoverMessages(txn, registry, queue, prepared, messages, rcnt, idcnt); + QLS_LOG(info, "Recovered queue \"" << queueName << "\": " << rcnt << " messages recovered; " << idcnt << " messages in-doubt."); + jQueue->recover_complete(); // start journal. + } catch (const qpid::linearstore::journal::jexception& e) { + THROW_STORE_EXCEPTION(std::string("Queue ") + queueName + ": recoverQueues() failed: " + e.what()); + } + //read all messages: done on a per queue basis if using Journal + + queue_index[key.id] = queue; + maxQueueId = std::max(key.id, maxQueueId); + } + + // NOTE: highestRid is set by both recoverQueues() and recoverTplStore() as + // the messageIdSequence is used for both queue journals and the tpl journal. + messageIdSequence.reset(highestRid + 1); + QLS_LOG(info, "Most recent persistence id found: 0x" << std::hex << highestRid << std::dec); + + queueIdSequence.reset(maxQueueId + 1); +} + + +void MessageStoreImpl::recoverExchanges(TxnCtxt& txn_, + qpid::broker::RecoveryManager& registry_, + exchange_index& index_) +{ + //TODO: this is a copy&paste from recoverQueues - refactor! + Cursor exchanges; + exchanges.open(exchangeDb, txn_.get()); + + uint64_t maxExchangeId(1); + IdDbt key; + Dbt value; + //read all exchanges + while (exchanges.next(key, value)) { + qpid::framing::Buffer buffer(reinterpret_cast<char*>(value.get_data()), value.get_size()); + //create a Exchange instance + qpid::broker::RecoverableExchange::shared_ptr exchange = registry_.recoverExchange(buffer); + if (exchange) { + //set the persistenceId and update max as required + exchange->setPersistenceId(key.id); + index_[key.id] = exchange; + QLS_LOG(info, "Recovered exchange \"" << exchange->getName() << '"'); + } + maxExchangeId = std::max(key.id, maxExchangeId); + } + exchangeIdSequence.reset(maxExchangeId + 1); +} + +void MessageStoreImpl::recoverBindings(TxnCtxt& txn_, + exchange_index& exchanges_, + queue_index& queues_) +{ + Cursor bindings; + bindings.open(bindingDb, txn_.get()); + + IdDbt key; + Dbt value; + while (bindings.next(key, value)) { + qpid::framing::Buffer buffer(reinterpret_cast<char*>(value.get_data()), value.get_size()); + if (buffer.available() < 8) { + QLS_LOG(error, "Not enough data for binding: " << buffer.available()); + THROW_STORE_EXCEPTION("Not enough data for binding"); + } + uint64_t queueId = buffer.getLongLong(); + std::string queueName; + std::string routingkey; + qpid::framing::FieldTable args; + buffer.getShortString(queueName); + buffer.getShortString(routingkey); + buffer.get(args); + exchange_index::iterator exchange = exchanges_.find(key.id); + queue_index::iterator queue = queues_.find(queueId); + if (exchange != exchanges_.end() && queue != queues_.end()) { + //could use the recoverable queue here rather than the name... + exchange->second->bind(queueName, routingkey, args); + QLS_LOG(info, "Recovered binding exchange=" << exchange->second->getName() + << " key=" << routingkey + << " queue=" << queueName); + } else { + //stale binding, delete it + QLS_LOG(warning, "Deleting stale binding"); + bindings->del(0); + } + } +} + +void MessageStoreImpl::recoverGeneral(TxnCtxt& txn_, + qpid::broker::RecoveryManager& registry_) +{ + Cursor items; + items.open(generalDb, txn_.get()); + + uint64_t maxGeneralId(1); + IdDbt key; + Dbt value; + //read all items + while (items.next(key, value)) { + qpid::framing::Buffer buffer(reinterpret_cast<char*>(value.get_data()), value.get_size()); + //create instance + qpid::broker::RecoverableConfig::shared_ptr config = registry_.recoverConfig(buffer); + //set the persistenceId and update max as required + config->setPersistenceId(key.id); + maxGeneralId = std::max(key.id, maxGeneralId); + } + generalIdSequence.reset(maxGeneralId + 1); +} + +void MessageStoreImpl::recoverMessages(TxnCtxt& /*txn*/, + qpid::broker::RecoveryManager& recovery, + qpid::broker::RecoverableQueue::shared_ptr& queue, + txn_list& prepared, + message_index& messages, + long& rcnt, + long& idcnt) +{ + size_t preambleLength = sizeof(uint32_t)/*header size*/; + + JournalImpl* jc = static_cast<JournalImpl*>(queue->getExternalQueueStore()); + unsigned msg_count = 0; + + // TODO: This optimization to skip reading if there are no enqueued messages to read + // breaks the python system test in phase 6 with "Exception: Cannot write lock file" + // Figure out what is breaking. + //bool read = jc->get_enq_cnt() > 0; + bool read = true; + + void* dbuff = NULL; + size_t dbuffSize = 0; + void* xidbuff = NULL; + size_t xidbuffSize = 0; + bool transientFlag = false; + bool externalFlag = false; + DataTokenImpl dtok; + dtok.set_wstate(DataTokenImpl::NONE); + qpid::linearstore::journal::txn_map& txn_map_ref = tplStorePtr->get_txn_map(); + + // Read the message from the Journal. + try { + unsigned aio_sleep_cnt = 0; + while (read) { + qpid::linearstore::journal::iores res = jc->read_data_record(&dbuff, dbuffSize, &xidbuff, xidbuffSize, transientFlag, externalFlag, &dtok, false); + + switch (res) + { + case qpid::linearstore::journal::RHM_IORES_SUCCESS: { + msg_count++; + qpid::broker::RecoverableMessage::shared_ptr msg; + char* data = (char*)dbuff; + + unsigned headerSize; + if (externalFlag) { + msg = getExternMessage(recovery, dtok.rid(), headerSize); // large message external to jrnl + } else { + headerSize = qpid::framing::Buffer(data, preambleLength).getLong(); + qpid::framing::Buffer headerBuff(data+ preambleLength, headerSize); + msg = recovery.recoverMessage(headerBuff); + } + msg->setPersistenceId(dtok.rid()); + // At some future point if delivery attempts are stored, then this call would + // become optional depending on that information. + msg->setRedelivered(); + // Reset the TTL for the recovered message + msg->computeExpiration(); + + uint32_t contentOffset = headerSize + preambleLength; + uint64_t contentSize = dbuffSize - contentOffset; + if (msg->loadContent(contentSize) && !externalFlag) { + //now read the content + qpid::framing::Buffer contentBuff(data + contentOffset, contentSize); + msg->decodeContent(contentBuff); + } + + PreparedTransaction::list::iterator i = PreparedTransaction::getLockedPreparedTransaction(prepared, queue->getPersistenceId(), dtok.rid()); + if (i == prepared.end()) { // not in prepared list + rcnt++; + queue->recover(msg); + } else { + uint64_t rid = dtok.rid(); + std::string xid(i->xid); + qpid::linearstore::journal::txn_data_list_t tdl = txn_map_ref.get_tdata_list(xid); + if (tdl.size() == 0) THROW_STORE_EXCEPTION("XID not found in txn_map"); + qpid::linearstore::journal::txn_op_stats_t txn_op_stats(tdl); + if (txn_op_stats.deqCnt > 0 || txn_op_stats.tpcCnt == 0) { + if (jc->is_enqueued(rid, true)) { + // Enqueue is non-tx, dequeue tx + assert(jc->is_locked(rid)); // This record MUST be locked by a txn dequeue + if (txn_op_stats.abortCnt > 0) { + rcnt++; + queue->recover(msg); // recover message in abort case only + } + } else { + // Enqueue and/or dequeue tx + qpid::linearstore::journal::txn_map& tmap = jc->get_txn_map(); + qpid::linearstore::journal::txn_data_list_t txnList = tmap.get_tdata_list(xid); // txnList will be empty if xid not found + bool enq = false; + bool deq = false; + for (qpid::linearstore::journal::tdl_itr_t j = txnList.begin(); j<txnList.end(); j++) { + if (j->enq_flag_ && j->rid_ == rid) + enq = true; + else if (!j->enq_flag_ && j->drid_ == rid) + deq = true; + } + if (enq && !deq && txn_op_stats.abortCnt == 0) { + rcnt++; + queue->recover(msg); // recover txn message in commit case only + } + } + } else { + idcnt++; + messages[rid] = msg; + } + } + + dtok.reset(); + dtok.set_wstate(DataTokenImpl::NONE); + + if (xidbuff) { + ::free(xidbuff); + xidbuff = NULL; + } + if (dbuff) { + ::free(dbuff); + dbuff = NULL; + } + aio_sleep_cnt = 0; + break; + } + case qpid::linearstore::journal::RHM_IORES_PAGE_AIOWAIT: + if (++aio_sleep_cnt > MAX_AIO_SLEEPS) + THROW_STORE_EXCEPTION("Timeout waiting for AIO in MessageStoreImpl::recoverMessages()"); + ::usleep(AIO_SLEEP_TIME_US); + break; + case qpid::linearstore::journal::RHM_IORES_EMPTY: + read = false; + break; // done with all messages. (add call in jrnl to test that _emap is empty.) + default: + std::ostringstream oss; + oss << "recoverMessages(): Queue: " << queue->getName() << ": Unexpected return from journal read: " << qpid::linearstore::journal::iores_str(res); + THROW_STORE_EXCEPTION(oss.str()); + } // switch + } // while + } catch (const qpid::linearstore::journal::jexception& e) { + THROW_STORE_EXCEPTION(std::string("Queue ") + queue->getName() + ": recoverMessages() failed: " + e.what()); + } +} + +qpid::broker::RecoverableMessage::shared_ptr MessageStoreImpl::getExternMessage(qpid::broker::RecoveryManager& /*recovery*/, + uint64_t /*messageId*/, + unsigned& /*headerSize*/) +{ + throw qpid::linearstore::journal::jexception(qpid::linearstore::journal::jerrno::JERR__NOTIMPL, "MessageStoreImpl", "getExternMessage"); +} + +int MessageStoreImpl::enqueueMessage(TxnCtxt& txn_, + IdDbt& msgId_, + qpid::broker::RecoverableMessage::shared_ptr& msg_, + queue_index& index_, + txn_list& prepared_, + message_index& messages_) +{ + Cursor mappings; + mappings.open(mappingDb, txn_.get()); + + IdDbt value; + + int count(0); + for (int status = mappings->get(&msgId_, &value, DB_SET); status == 0; status = mappings->get(&msgId_, &value, DB_NEXT_DUP)) { + if (index_.find(value.id) == index_.end()) { + QLS_LOG(warning, "Recovered message for queue that no longer exists"); + mappings->del(0); + } else { + qpid::broker::RecoverableQueue::shared_ptr queue = index_[value.id]; + if (PreparedTransaction::isLocked(prepared_, value.id, msgId_.id)) { + messages_[msgId_.id] = msg_; + } else { + queue->recover(msg_); + } + count++; + } + } + mappings.close(); + return count; +} + + +void MessageStoreImpl::recoverTplStore() +{ + if (qpid::linearstore::journal::jdir::exists(tplStorePtr->jrnl_dir())) { + uint64_t thisHighestRid = 0ULL; + tplStorePtr->recover(boost::dynamic_pointer_cast<qpid::linearstore::journal::EmptyFilePoolManager>(efpMgr), tplWCacheNumPages, tplWCachePgSizeSblks, 0, thisHighestRid, 0); + if (highestRid == 0ULL) + highestRid = thisHighestRid; + else if (thisHighestRid - highestRid < 0x8000000000000000ULL) // RFC 1982 comparison for unsigned 64-bit + highestRid = thisHighestRid; + tplStorePtr->recover_complete(); // start TPL + } +} + +void MessageStoreImpl::recoverLockedMappings(txn_list& txns) +{ + if (!tplStorePtr->is_ready()) + recoverTplStore(); + std::vector<std::string> xidList; + tplStorePtr->get_txn_map().xid_list(xidList); + for (std::vector<std::string>::const_iterator i=xidList.begin(); i!=xidList.end(); ++i) { + LockedMappings::shared_ptr enq_ptr; + enq_ptr.reset(new LockedMappings); + LockedMappings::shared_ptr deq_ptr; + deq_ptr.reset(new LockedMappings); + txns.push_back(new PreparedTransaction(*i, enq_ptr, deq_ptr)); + } +} + +void MessageStoreImpl::collectPreparedXids(std::set<std::string>& xids) +{ + if (!tplStorePtr->is_ready()) { + recoverTplStore(); + } + std::vector<std::string> xidList; + tplStorePtr->get_txn_map().xid_list(xidList); + for (std::vector<std::string>::const_iterator i=xidList.begin(); i!=xidList.end(); ++i) { + qpid::linearstore::journal::txn_data_list_t tdl = tplStorePtr->get_txn_map().get_tdata_list(*i); + qpid::linearstore::journal::txn_op_stats_t txn_op_stats(tdl); + if (txn_op_stats.tpcCnt > 0) { + if (txn_op_stats.enqCnt - txn_op_stats.deqCnt > 0) { + xids.insert(*i); + } + } + } +} + +void MessageStoreImpl::stage(const boost::intrusive_ptr<qpid::broker::PersistableMessage>& /*msg*/) +{ + throw qpid::linearstore::journal::jexception(qpid::linearstore::journal::jerrno::JERR__NOTIMPL, "MessageStoreImpl", "stage"); +} + +void MessageStoreImpl::destroy(qpid::broker::PersistableMessage& /*msg*/) +{ + throw qpid::linearstore::journal::jexception(qpid::linearstore::journal::jerrno::JERR__NOTIMPL, "MessageStoreImpl", "destroy"); +} + +void MessageStoreImpl::appendContent(const boost::intrusive_ptr<const qpid::broker::PersistableMessage>& /*msg*/, + const std::string& /*data*/) +{ + throw qpid::linearstore::journal::jexception(qpid::linearstore::journal::jerrno::JERR__NOTIMPL, "MessageStoreImpl", "appendContent"); +} + +void MessageStoreImpl::loadContent(const qpid::broker::PersistableQueue& /*queue*/, + const boost::intrusive_ptr<const qpid::broker::PersistableMessage>& /*msg*/, + std::string& /*data*/, + uint64_t /*offset*/, + uint32_t /*length*/) +{ + throw qpid::linearstore::journal::jexception(qpid::linearstore::journal::jerrno::JERR__NOTIMPL, "MessageStoreImpl", "loadContent"); +} + +void MessageStoreImpl::flush(const qpid::broker::PersistableQueue& queue_) +{ +// QLS_LOG(info, "*** MessageStoreImpl::flush() queue=\"" << queue_.getName() << "\""); + if (queue_.getExternalQueueStore() == 0) return; + checkInit(); + std::string qn = queue_.getName(); + try { + JournalImpl* jc = static_cast<JournalImpl*>(queue_.getExternalQueueStore()); + if (jc) { + // TODO: check if this result should be used... + /*mrg::journal::iores res =*/ jc->flush(false); + } + } catch (const qpid::linearstore::journal::jexception& e) { + THROW_STORE_EXCEPTION(std::string("Queue ") + qn + ": flush() failed: " + e.what() ); + } +} + +void MessageStoreImpl::enqueue(qpid::broker::TransactionContext* ctxt_, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg_, + const qpid::broker::PersistableQueue& queue_) +{ + //QLS_LOG(info, "*** MessageStoreImpl::enqueue() queue=\"" << queue_.getName() << "\""); + checkInit(); + uint64_t queueId (queue_.getPersistenceId()); + if (queueId == 0) { + THROW_STORE_EXCEPTION("Queue not created: " + queue_.getName()); + } + + TxnCtxt implicit; + TxnCtxt* txn = 0; + if (ctxt_) { + txn = check(ctxt_); + } else { + txn = &implicit; + } + + if (msg_->getPersistenceId() == 0) { + msg_->setPersistenceId(messageIdSequence.next()); + } + store(&queue_, txn, msg_); + + // add queue* to the txn map.. + if (ctxt_) txn->addXidRecord(queue_.getExternalQueueStore()); +} + +uint64_t MessageStoreImpl::msgEncode(std::vector<char>& buff_, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& message_) +{ + uint32_t headerSize = message_->encodedHeaderSize(); + uint64_t size = message_->encodedSize() + sizeof(uint32_t); + try { buff_ = std::vector<char>(size); } // long + headers + content + catch (const std::exception& e) { + std::ostringstream oss; + oss << "Unable to allocate memory for encoding message; requested size: " << size << "; error: " << e.what(); + THROW_STORE_EXCEPTION(oss.str()); + } + qpid::framing::Buffer buffer(&buff_[0],size); + buffer.putLong(headerSize); + message_->encode(buffer); + return size; +} + +void MessageStoreImpl::store(const qpid::broker::PersistableQueue* queue_, + TxnCtxt* txn_, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& message_) +{ + //QLS_LOG(info, "*** MessageStoreImpl::store() queue=\"" << queue_->getName() << "\""); + std::vector<char> buff; + uint64_t size = msgEncode(buff, message_); + + try { + if (queue_) { + boost::intrusive_ptr<DataTokenImpl> dtokp(new DataTokenImpl); + dtokp->addRef(); + dtokp->setSourceMessage(message_); + dtokp->set_external_rid(true); + dtokp->set_rid(message_->getPersistenceId()); // set the messageID into the Journal header (record-id) + + JournalImpl* jc = static_cast<JournalImpl*>(queue_->getExternalQueueStore()); + if (txn_->getXid().empty()) { + jc->enqueue_data_record(&buff[0], size, size, dtokp.get(), !message_->isPersistent()); + } else { + jc->enqueue_txn_data_record(&buff[0], size, size, dtokp.get(), txn_->getXid(), txn_->isTPC(), !message_->isPersistent()); + } + } else { + THROW_STORE_EXCEPTION(std::string("MessageStoreImpl::store() failed: queue NULL.")); + } + } catch (const qpid::linearstore::journal::jexception& e) { + THROW_STORE_EXCEPTION(std::string("Queue ") + queue_->getName() + ": MessageStoreImpl::store() failed: " + + e.what()); + } +} + +void MessageStoreImpl::dequeue(qpid::broker::TransactionContext* ctxt_, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg_, + const qpid::broker::PersistableQueue& queue_) +{ + //QLS_LOG(info, "*** MessageStoreImpl::dequeue() queue=\"" << queue_.getName() << "\""); + checkInit(); + uint64_t queueId (queue_.getPersistenceId()); + uint64_t messageId (msg_->getPersistenceId()); + if (queueId == 0) { + THROW_STORE_EXCEPTION("Queue \"" + queue_.getName() + "\" has null queue Id (has not been created)"); + } + if (messageId == 0) { + THROW_STORE_EXCEPTION("Queue \"" + queue_.getName() + "\": Dequeuing message with null persistence Id."); + } + + TxnCtxt implicit; + TxnCtxt* txn = 0; + if (ctxt_) { + txn = check(ctxt_); + } else { + txn = &implicit; + } + + // add queue* to the txn map.. + if (ctxt_) txn->addXidRecord(queue_.getExternalQueueStore()); + async_dequeue(ctxt_, msg_, queue_); + msg_->dequeueComplete(); +} + +void MessageStoreImpl::async_dequeue(qpid::broker::TransactionContext* ctxt_, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg_, + const qpid::broker::PersistableQueue& queue_) +{ + //QLS_LOG(info, "*** MessageStoreImpl::async_dequeue() queue=\"" << queue_.getName() << "\""); + boost::intrusive_ptr<DataTokenImpl> ddtokp(new DataTokenImpl); + ddtokp->setSourceMessage(msg_); + ddtokp->set_external_rid(true); + ddtokp->set_rid(messageIdSequence.next()); + ddtokp->set_dequeue_rid(msg_->getPersistenceId()); + ddtokp->set_wstate(DataTokenImpl::ENQ); + TxnCtxt* txn = 0; + std::string tid; + if (ctxt_) { + txn = check(ctxt_); + tid = txn->getXid(); + } + // Manually increase the ref count, as raw pointers are used beyond this point + ddtokp->addRef(); + try { + JournalImpl* jc = static_cast<JournalImpl*>(queue_.getExternalQueueStore()); + if (tid.empty()) { + jc->dequeue_data_record(ddtokp.get(), false); + } else { + jc->dequeue_txn_data_record(ddtokp.get(), tid, txn?txn->isTPC():false, false); + } + } catch (const qpid::linearstore::journal::jexception& e) { + ddtokp->release(); + THROW_STORE_EXCEPTION(std::string("Queue ") + queue_.getName() + ": async_dequeue() failed: " + e.what()); + } +} + +void MessageStoreImpl::completed(TxnCtxt& txn_, + bool commit_) +{ + try { + chkTplStoreInit(); // Late initialize (if needed) + + // Nothing to do if not prepared + if (txn_.getDtok()->is_enqueued()) { + txn_.incrDtokRef(); + DataTokenImpl* dtokp = txn_.getDtok(); + dtokp->set_dequeue_rid(dtokp->rid()); + dtokp->set_rid(messageIdSequence.next()); + tplStorePtr->dequeue_txn_data_record(txn_.getDtok(), txn_.getXid(), txn_.isTPC(), commit_); + } + txn_.complete(commit_); + if (mgmtObject.get() != 0) { + mgmtObject->dec_tplTransactionDepth(); + if (commit_) + mgmtObject->inc_tplTxnCommits(); + else + mgmtObject->inc_tplTxnAborts(); + } + } catch (const std::exception& e) { + QLS_LOG(error, "Error completing xid " << qpid::linearstore::journal::jcntl::str2hexnum(txn_.getXid()) << ": " << e.what()); + throw; + } +} + +std::auto_ptr<qpid::broker::TransactionContext> MessageStoreImpl::begin() +{ + checkInit(); + // pass sequence number for c/a + return std::auto_ptr<qpid::broker::TransactionContext>(new TxnCtxt(&messageIdSequence)); +} + +std::auto_ptr<qpid::broker::TPCTransactionContext> MessageStoreImpl::begin(const std::string& xid_) +{ + checkInit(); + IdSequence* jtx = &messageIdSequence; + // pass sequence number for c/a + return std::auto_ptr<qpid::broker::TPCTransactionContext>(new TPCTxnCtxt(xid_, jtx)); +} + +void MessageStoreImpl::prepare(qpid::broker::TPCTransactionContext& ctxt_) +{ + checkInit(); + TxnCtxt* txn = dynamic_cast<TxnCtxt*>(&ctxt_); +//std::string xid=txn->getXid(); std::cout << "*** MessageStoreImpl::prepare() xid=" << std::hex; +//for (unsigned i=0; i<xid.length(); ++i) std::cout << "\\" << (int)xid.at(i); std::cout << " ***" << std::dec << std::endl; + if(!txn) throw qpid::broker::InvalidTransactionContextException(); + localPrepare(txn); +} + +void MessageStoreImpl::localPrepare(TxnCtxt* ctxt_) +{ +//std::string xid=ctxt_->getXid(); std::cout << "*** MessageStoreImpl::localPrepare() xid=" << std::hex; +//for (unsigned i=0; i<xid.length(); ++i) std::cout << "\\" << (int)xid.at(i); std::cout << " ***" << std::dec << std::endl; + try { + chkTplStoreInit(); // Late initialize (if needed) + + // This sync is required to ensure multi-queue atomicity - ie all txn data + // must hit the disk on *all* queues before the TPL prepare (enq) is written. + ctxt_->sync(); + + ctxt_->incrDtokRef(); + DataTokenImpl* dtokp = ctxt_->getDtok(); + dtokp->set_external_rid(true); + dtokp->set_rid(messageIdSequence.next()); + char tpcFlag = static_cast<char>(ctxt_->isTPC()); + tplStorePtr->enqueue_txn_data_record(&tpcFlag, sizeof(char), sizeof(char), dtokp, ctxt_->getXid(), tpcFlag != 0, false); + ctxt_->prepare(tplStorePtr.get()); + // make sure all the data is written to disk before returning + ctxt_->sync(); + if (mgmtObject.get() != 0) { + mgmtObject->inc_tplTransactionDepth(); + mgmtObject->inc_tplTxnPrepares(); + } + } catch (const std::exception& e) { + QLS_LOG(error, "Error preparing xid " << ctxt_->getXid() << ": " << e.what()); + throw; + } +} + +void MessageStoreImpl::commit(qpid::broker::TransactionContext& ctxt_) +{ + checkInit(); + TxnCtxt* txn(check(&ctxt_)); + if (!txn->isTPC()) { + if (txn->impactedQueuesEmpty()) return; + localPrepare(dynamic_cast<TxnCtxt*>(txn)); + } + completed(*dynamic_cast<TxnCtxt*>(txn), true); +} + +void MessageStoreImpl::abort(qpid::broker::TransactionContext& ctxt_) +{ + checkInit(); + TxnCtxt* txn(check(&ctxt_)); + if (!txn->isTPC()) { + if (txn->impactedQueuesEmpty()) return; + localPrepare(dynamic_cast<TxnCtxt*>(txn)); + } + completed(*dynamic_cast<TxnCtxt*>(txn), false); +} + +TxnCtxt* MessageStoreImpl::check(qpid::broker::TransactionContext* ctxt_) +{ + TxnCtxt* txn = dynamic_cast<TxnCtxt*>(ctxt_); + if(!txn) throw qpid::broker::InvalidTransactionContextException(); + return txn; +} + +void MessageStoreImpl::put(db_ptr db_, + DbTxn* txn_, + Dbt& key_, + Dbt& value_) +{ + try { + int status = db_->put(txn_, &key_, &value_, DB_NODUPDATA); + if (status == DB_KEYEXIST) { + THROW_STORE_EXCEPTION("duplicate data"); + } else if (status) { + THROW_STORE_EXCEPTION(DbEnv::strerror(status)); + } + } catch (const DbException& e) { + THROW_STORE_EXCEPTION(e.what()); + } +} + +void MessageStoreImpl::deleteBindingsForQueue(const qpid::broker::PersistableQueue& queue_) +{ + TxnCtxt txn; + txn.begin(dbenv.get(), true); + try { + { + Cursor bindings; + bindings.open(bindingDb, txn.get()); + + IdDbt key; + Dbt value; + while (bindings.next(key, value)) { + qpid::framing::Buffer buffer(reinterpret_cast<char*>(value.get_data()), value.get_size()); + if (buffer.available() < 8) { + THROW_STORE_EXCEPTION("Not enough data for binding"); + } + uint64_t queueId = buffer.getLongLong(); + if (queue_.getPersistenceId() == queueId) { + bindings->del(0); + QLS_LOG(debug, "Deleting binding for " << queue_.getName() << " " << key.id << "->" << queueId); + } + } + } + txn.commit(); + } catch (const std::exception& e) { + txn.abort(); + THROW_STORE_EXCEPTION_2("Error deleting bindings", e.what()); + } catch (...) { + txn.abort(); + throw; + } + QLS_LOG(debug, "Deleted all bindings for " << queue_.getName() << ":" << queue_.getPersistenceId()); +} + +void MessageStoreImpl::deleteBinding(const qpid::broker::PersistableExchange& exchange_, + const qpid::broker::PersistableQueue& queue_, + const std::string& bkey_) +{ + TxnCtxt txn; + txn.begin(dbenv.get(), true); + try { + { + Cursor bindings; + bindings.open(bindingDb, txn.get()); + + IdDbt key(exchange_.getPersistenceId()); + Dbt value; + + for (int status = bindings->get(&key, &value, DB_SET); status == 0; status = bindings->get(&key, &value, DB_NEXT_DUP)) { + qpid::framing::Buffer buffer(reinterpret_cast<char*>(value.get_data()), value.get_size()); + if (buffer.available() < 8) { + THROW_STORE_EXCEPTION("Not enough data for binding"); + } + uint64_t queueId = buffer.getLongLong(); + if (queue_.getPersistenceId() == queueId) { + std::string q; + std::string k; + buffer.getShortString(q); + buffer.getShortString(k); + if (bkey_ == k) { + bindings->del(0); + QLS_LOG(debug, "Deleting binding for " << queue_.getName() << " " << key.id << "->" << queueId); + } + } + } + } + txn.commit(); + } catch (const std::exception& e) { + txn.abort(); + THROW_STORE_EXCEPTION_2("Error deleting bindings", e.what()); + } catch (...) { + txn.abort(); + throw; + } +} + +std::string MessageStoreImpl::getStoreTopLevelDir() { + std::ostringstream dir; + dir << storeDir << "/" << storeTopLevelDir; + return dir.str(); +} + + +std::string MessageStoreImpl::getJrnlBaseDir() +{ + std::ostringstream dir; + dir << storeDir << "/" << storeTopLevelDir << "/jrnl2/" ; + return dir.str(); +} + +std::string MessageStoreImpl::getBdbBaseDir() +{ + std::ostringstream dir; + dir << storeDir << "/" << storeTopLevelDir << "/dat2/" ; + return dir.str(); +} + +std::string MessageStoreImpl::getTplBaseDir() +{ + std::ostringstream dir; + dir << storeDir << "/" << storeTopLevelDir << "/tpl2/" ; + return dir.str(); +} + +std::string MessageStoreImpl::getJrnlDir(const std::string& queueName_) +{ + std::ostringstream oss; + oss << getJrnlBaseDir() << queueName_; + return oss.str(); +} + +std::string MessageStoreImpl::getStoreDir() const { return storeDir; } + +void MessageStoreImpl::journalDeleted(JournalImpl& j_) { + qpid::sys::Mutex::ScopedLock sl(journalListLock); + journalList.erase(j_.id()); +} + +MessageStoreImpl::StoreOptions::StoreOptions(const std::string& name_) : + qpid::Options(name_), + truncateFlag(defTruncateFlag), + wCachePageSizeKib(defWCachePageSizeKib), + tplWCachePageSizeKib(defTplWCachePageSizeKib), + efpPartition(defEfpPartition), + efpFileSizeKib(defEfpFileSizeKib), + overwriteBeforeReturnFlag(defOverwriteBeforeReturnFlag) +{ + addOptions() + ("store-dir", qpid::optValue(storeDir, "DIR"), + "Store directory location for persistence (instead of using --data-dir value). " + "Required if --no-data-dir is also used.") + ("truncate", qpid::optValue(truncateFlag, "yes|no"), + "If yes|true|1, will truncate the store (discard any existing records). If no|false|0, will preserve " + "the existing store files for recovery.") + ("wcache-page-size", qpid::optValue(wCachePageSizeKib, "N"), + "Size of the pages in the write page cache in KiB. " + "Allowable values - powers of 2: 1, 2, 4, ... , 128. " + "Lower values decrease latency at the expense of throughput.") + ("tpl-wcache-page-size", qpid::optValue(tplWCachePageSizeKib, "N"), + "Size of the pages in the transaction prepared list write page cache in KiB. " + "Allowable values - powers of 2: 1, 2, 4, ... , 128. " + "Lower values decrease latency at the expense of throughput.") + ("efp-partition", qpid::optValue(efpPartition, "N"), + "Empty File Pool partition to use for finding empty journal files") + ("efp-file-size", qpid::optValue(efpFileSizeKib, "N"), + "Empty File Pool file size in KiB to use for journal files. Must be a multiple of 4 KiB.") + ("overwrite-before-return", qpid::optValue(overwriteBeforeReturnFlag, "yes|no"), + "If yes|true|1, will overwrite each store file with zeros before returning " + "it to the Empty File Pool. When not in use (the default), then old message data remains " + "in the file, but is overwritten on next use. This option should only be used where security " + "considerations justify it as it makes the store somewhat slower.") + ; +} + +}} diff --git a/qpid/cpp/src/qpid/linearstore/MessageStoreImpl.h b/qpid/cpp/src/qpid/linearstore/MessageStoreImpl.h new file mode 100644 index 0000000000..236fcf2cf8 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/MessageStoreImpl.h @@ -0,0 +1,351 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_MESSAGESTOREIMPL_H +#define QPID_LINEARSTORE_MESSAGESTOREIMPL_H + +#include "qpid/broker/MessageStore.h" + +#include "qpid/Options.h" +#include "qpid/linearstore/IdSequence.h" +#include "qpid/linearstore/JournalLogImpl.h" +#include "qpid/linearstore/journal/jcfg.h" +#include "qpid/linearstore/journal/EmptyFilePoolTypes.h" +#include "qpid/linearstore/PreparedTransaction.h" + +#include "qmf/org/apache/qpid/linearstore/Store.h" + +#include <iomanip> + +// Assume DB_VERSION_MAJOR == 4 +#if (DB_VERSION_MINOR == 2) +#include <errno.h> +#define DB_BUFFER_SMALL ENOMEM +#endif + +class Db; +class DbEnv; +class Dbt; +class DbTxn; + +namespace qpid { +namespace broker { + class Broker; +} +namespace sys { + class Timer; +} +namespace linearstore{ +namespace journal { + class EmptyFilePool; + class EmptyFilePoolManager; +} + +class IdDbt; +class JournalImpl; +class TplJournalImpl; +class TxnCtxt; + +/** + * An implementation of the MessageStore interface based on Berkeley DB + */ +class MessageStoreImpl : public qpid::broker::MessageStore, public qpid::management::Manageable +{ + public: + typedef boost::shared_ptr<Db> db_ptr; + typedef boost::shared_ptr<DbEnv> dbEnv_ptr; + + struct StoreOptions : public qpid::Options { + StoreOptions(const std::string& name="Linear Store Options"); + std::string clusterName; + std::string storeDir; + bool truncateFlag; + uint32_t wCachePageSizeKib; + uint32_t tplWCachePageSizeKib; + uint16_t efpPartition; + uint64_t efpFileSizeKib; + bool overwriteBeforeReturnFlag; + }; + + protected: + typedef std::map<uint64_t, qpid::broker::RecoverableQueue::shared_ptr> queue_index; + typedef std::map<uint64_t, qpid::broker::RecoverableExchange::shared_ptr> exchange_index; + typedef std::map<uint64_t, qpid::broker::RecoverableMessage::shared_ptr> message_index; + + typedef LockedMappings::map txn_lock_map; + typedef boost::ptr_list<PreparedTransaction> txn_list; + + typedef std::map<std::string, JournalImpl*> JournalListMap; + typedef JournalListMap::iterator JournalListMapItr; + + // Default store settings + static const bool defTruncateFlag = false; + static const uint32_t defWCachePageSizeKib = QLS_WMGR_DEF_PAGE_SIZE_KIB; + static const uint32_t defTplWCachePageSizeKib = defWCachePageSizeKib / 8; + static const uint16_t defEfpPartition = 1; + static const uint64_t defEfpFileSizeKib = 512 * QLS_SBLK_SIZE_KIB; + static const bool defOverwriteBeforeReturnFlag = false; + static const std::string storeTopLevelDir; + + static qpid::sys::Duration defJournalGetEventsTimeout; + static qpid::sys::Duration defJournalFlushTimeout; + + std::list<db_ptr> dbs; + dbEnv_ptr dbenv; + db_ptr queueDb; + db_ptr configDb; + db_ptr exchangeDb; + db_ptr mappingDb; + db_ptr bindingDb; + db_ptr generalDb; + + // Pointer to Transaction Prepared List (TPL) journal instance + boost::shared_ptr<TplJournalImpl> tplStorePtr; + qpid::sys::Mutex tplInitLock; + JournalListMap journalList; + qpid::sys::Mutex journalListLock; + qpid::sys::Mutex bdbLock; + + IdSequence queueIdSequence; + IdSequence exchangeIdSequence; + IdSequence generalIdSequence; + IdSequence messageIdSequence; + std::string storeDir; + qpid::linearstore::journal::efpPartitionNumber_t defaultEfpPartitionNumber; + qpid::linearstore::journal::efpDataSize_kib_t defaultEfpFileSize_kib; + bool overwriteBeforeReturnFlag; + uint32_t wCachePgSizeSblks; + uint16_t wCacheNumPages; + uint32_t tplWCachePgSizeSblks; + uint16_t tplWCacheNumPages; + uint64_t highestRid; + bool isInit; + const char* envPath; + qpid::broker::Broker* broker; + JournalLogImpl jrnlLog; + boost::shared_ptr<qpid::linearstore::journal::EmptyFilePoolManager> efpMgr; + + qmf::org::apache::qpid::linearstore::Store::shared_ptr mgmtObject; + qpid::management::ManagementAgent* agent; + + + // Parameter validation and calculation + static uint32_t chkJrnlWrPageCacheSize(const uint32_t param, + const std::string& paramName/*, + const uint16_t jrnlFsizePgs*/); + static uint16_t getJrnlWrNumPages(const uint32_t wrPageSizeKiB); + static qpid::linearstore::journal::efpPartitionNumber_t chkEfpPartition(const qpid::linearstore::journal::efpPartitionNumber_t partition, + const std::string& paramName); + static qpid::linearstore::journal::efpDataSize_kib_t chkEfpFileSizeKiB(const qpid::linearstore::journal::efpDataSize_kib_t efpFileSizeKiB, + const std::string& paramName); + + void init(const bool truncateFlag); + + void recoverQueues(TxnCtxt& txn, + qpid::broker::RecoveryManager& recovery, + queue_index& index, + txn_list& locked, + message_index& messages); + void recoverMessages(TxnCtxt& txn, + qpid::broker::RecoveryManager& recovery, + queue_index& index, + txn_list& locked, + message_index& prepared); + void recoverMessages(TxnCtxt& txn, + qpid::broker::RecoveryManager& recovery, + qpid::broker::RecoverableQueue::shared_ptr& queue, + txn_list& locked, + message_index& prepared, + long& rcnt, + long& idcnt); + qpid::broker::RecoverableMessage::shared_ptr getExternMessage(qpid::broker::RecoveryManager& recovery, + uint64_t mId, + unsigned& headerSize); + void recoverExchanges(TxnCtxt& txn, + qpid::broker::RecoveryManager& recovery, + exchange_index& index); + void recoverBindings(TxnCtxt& txn, + exchange_index& exchanges, + queue_index& queues); + void recoverGeneral(TxnCtxt& txn, + qpid::broker::RecoveryManager& recovery); + int enqueueMessage(TxnCtxt& txn, + IdDbt& msgId, + qpid::broker::RecoverableMessage::shared_ptr& msg, + queue_index& index, + txn_list& locked, + message_index& prepared); + void recoverTplStore(); + void recoverLockedMappings(txn_list& txns); + TxnCtxt* check(qpid::broker::TransactionContext* ctxt); + uint64_t msgEncode(std::vector<char>& buff, const boost::intrusive_ptr<qpid::broker::PersistableMessage>& message); + void store(const qpid::broker::PersistableQueue* queue, + TxnCtxt* txn, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& message); + void async_dequeue(qpid::broker::TransactionContext* ctxt, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg, + const qpid::broker::PersistableQueue& queue); + void destroy(db_ptr db, + const qpid::broker::Persistable& p); + bool create(db_ptr db, + IdSequence& seq, + const qpid::broker::Persistable& p); + void completed(TxnCtxt& txn, + bool commit); + void deleteBindingsForQueue(const qpid::broker::PersistableQueue& queue); + void deleteBinding(const qpid::broker::PersistableExchange& exchange, + const qpid::broker::PersistableQueue& queue, + const std::string& key); + + void put(db_ptr db, + DbTxn* txn, + Dbt& key, + Dbt& value); + void open(db_ptr db, + DbTxn* txn, + const char* file, + bool dupKey); + void closeDbs(); + + // journal functions + void createJrnlQueue(const qpid::broker::PersistableQueue& queue); + std::string getJrnlDir(const std::string& queueName); + qpid::linearstore::journal::EmptyFilePool* getEmptyFilePool(const qpid::linearstore::journal::efpPartitionNumber_t p, const qpid::linearstore::journal::efpDataSize_kib_t s); + qpid::linearstore::journal::EmptyFilePool* getEmptyFilePool(const qpid::framing::FieldTable& args); + std::string getStoreTopLevelDir(); + std::string getJrnlBaseDir(); + std::string getBdbBaseDir(); + std::string getTplBaseDir(); + inline void checkInit() { + // TODO: change the default dir to ~/.qpidd + if (!isInit) { init("/tmp"); isInit = true; } + } + void chkTplStoreInit(); + + public: + typedef boost::shared_ptr<MessageStoreImpl> shared_ptr; + + MessageStoreImpl(qpid::broker::Broker* broker, const char* envpath = 0); + + virtual ~MessageStoreImpl(); + + bool init(const qpid::Options* options); + + bool init(const std::string& dir, + qpid::linearstore::journal::efpPartitionNumber_t efpPartition = defEfpPartition, + qpid::linearstore::journal::efpDataSize_kib_t efpFileSizeKib = defEfpFileSizeKib, + const bool truncateFlag = false, + uint32_t wCachePageSize = defWCachePageSizeKib, + uint32_t tplWCachePageSize = defTplWCachePageSizeKib, + const bool overwriteBeforeReturnFlag_ = false); + + void truncateInit(); + + void initManagement (); + + void finalize(); + + // --- Implementation of qpid::broker::MessageStore --- + + void create(qpid::broker::PersistableQueue& queue, + const qpid::framing::FieldTable& args); + + void destroy(qpid::broker::PersistableQueue& queue); + + void create(const qpid::broker::PersistableExchange& queue, + const qpid::framing::FieldTable& args); + + void destroy(const qpid::broker::PersistableExchange& queue); + + void bind(const qpid::broker::PersistableExchange& exchange, + const qpid::broker::PersistableQueue& queue, + const std::string& key, + const qpid::framing::FieldTable& args); + + void unbind(const qpid::broker::PersistableExchange& exchange, + const qpid::broker::PersistableQueue& queue, + const std::string& key, + const qpid::framing::FieldTable& args); + + void create(const qpid::broker::PersistableConfig& config); + + void destroy(const qpid::broker::PersistableConfig& config); + + void recover(qpid::broker::RecoveryManager& queues); + + void stage(const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg); + + void destroy(qpid::broker::PersistableMessage& msg); + + void appendContent(const boost::intrusive_ptr<const qpid::broker::PersistableMessage>& msg, + const std::string& data); + + void loadContent(const qpid::broker::PersistableQueue& queue, + const boost::intrusive_ptr<const qpid::broker::PersistableMessage>& msg, + std::string& data, + uint64_t offset, + uint32_t length); + + void enqueue(qpid::broker::TransactionContext* ctxt, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg, + const qpid::broker::PersistableQueue& queue); + + void dequeue(qpid::broker::TransactionContext* ctxt, + const boost::intrusive_ptr<qpid::broker::PersistableMessage>& msg, + const qpid::broker::PersistableQueue& queue); + + void flush(const qpid::broker::PersistableQueue& queue); + + inline uint32_t outstandingQueueAIO(const qpid::broker::PersistableQueue& /*queue*/) { return 0; }; // TODO: Deprecate this call + + void collectPreparedXids(std::set<std::string>& xids); + + std::auto_ptr<qpid::broker::TransactionContext> begin(); + + std::auto_ptr<qpid::broker::TPCTransactionContext> begin(const std::string& xid); + + void prepare(qpid::broker::TPCTransactionContext& ctxt); + + void localPrepare(TxnCtxt* ctxt); + + void commit(qpid::broker::TransactionContext& ctxt); + + void abort(qpid::broker::TransactionContext& ctxt); + + // --- Implementation of qpid::management::Managable --- + + qpid::management::ManagementObject::shared_ptr GetManagementObject (void) const + { return mgmtObject; } + + inline qpid::management::Manageable::status_t ManagementMethod (uint32_t, qpid::management::Args&, std::string&) + { return qpid::management::Manageable::STATUS_OK; } + + std::string getStoreDir() const; + + private: + void journalDeleted(JournalImpl&); + +}; // class MessageStoreImpl + +} // namespace msgstore +} // namespace mrg + +#endif // ifndef QPID_LINEARSTORE_MESSAGESTOREIMPL_H diff --git a/qpid/cpp/src/qpid/linearstore/PreparedTransaction.cpp b/qpid/cpp/src/qpid/linearstore/PreparedTransaction.cpp new file mode 100644 index 0000000000..1b92ca8c23 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/PreparedTransaction.cpp @@ -0,0 +1,81 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/PreparedTransaction.h" + +namespace qpid { +namespace linearstore { + +void LockedMappings::add(queue_id queue, message_id message) +{ + locked.push_back(std::make_pair(queue, message)); +} + +bool LockedMappings::isLocked(queue_id queue, message_id message) +{ + idpair op( std::make_pair(queue, message) ); + return find(locked.begin(), locked.end(), op) != locked.end(); +} + +void LockedMappings::add(LockedMappings::map& map, std::string& key, queue_id queue, message_id message) +{ + LockedMappings::map::iterator i = map.find(key); + if (i == map.end()) { + LockedMappings::shared_ptr ptr(new LockedMappings()); + i = map.insert(std::make_pair(key, ptr)).first; + } + i->second->add(queue, message); +} + +bool PreparedTransaction::isLocked(queue_id queue, message_id message) +{ + return (enqueues.get() && enqueues->isLocked(queue, message)) + || (dequeues.get() && dequeues->isLocked(queue, message)); +} + + +bool PreparedTransaction::isLocked(PreparedTransaction::list& txns, queue_id queue, message_id message) +{ + for (PreparedTransaction::list::iterator i = txns.begin(); i != txns.end(); i++) { + if (i->isLocked(queue, message)) { + return true; + } + } + return false; +} + +PreparedTransaction::list::iterator PreparedTransaction::getLockedPreparedTransaction(PreparedTransaction::list& txns, queue_id queue, message_id message) +{ + for (PreparedTransaction::list::iterator i = txns.begin(); i != txns.end(); i++) { + if (i->isLocked(queue, message)) { + return i; + } + } + return txns.end(); +} + +PreparedTransaction::PreparedTransaction(const std::string& _xid, + LockedMappings::shared_ptr _enqueues, + LockedMappings::shared_ptr _dequeues) + + : xid(_xid), enqueues(_enqueues), dequeues(_dequeues) {} + +}} diff --git a/qpid/cpp/src/qpid/linearstore/PreparedTransaction.h b/qpid/cpp/src/qpid/linearstore/PreparedTransaction.h new file mode 100644 index 0000000000..7b381ba3b9 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/PreparedTransaction.h @@ -0,0 +1,73 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_PREPAREDTRANSACTION_H +#define QPID_LINEARSTORE_PREPAREDTRANSACTION_H + +#include <boost/ptr_container/ptr_list.hpp> +#include <boost/shared_ptr.hpp> +#include <list> +#include <map> +#include <stdint.h> + +namespace qpid{ +namespace linearstore{ + +typedef uint64_t queue_id; +typedef uint64_t message_id; + +class LockedMappings +{ +public: + typedef boost::shared_ptr<LockedMappings> shared_ptr; + typedef std::map<std::string, shared_ptr> map; + typedef std::pair<queue_id, message_id> idpair; + typedef std::list<idpair>::iterator iterator; + + void add(queue_id queue, message_id message); + bool isLocked(queue_id queue, message_id message); + std::size_t size() { return locked.size(); } + iterator begin() { return locked.begin(); } + iterator end() { return locked.end(); } + + static void add(LockedMappings::map& map, std::string& key, queue_id queue, message_id message); + +private: + std::list<idpair> locked; +}; + +struct PreparedTransaction +{ + typedef boost::ptr_list<PreparedTransaction> list; + + const std::string xid; + const LockedMappings::shared_ptr enqueues; + const LockedMappings::shared_ptr dequeues; + + PreparedTransaction(const std::string& xid, LockedMappings::shared_ptr enqueues, LockedMappings::shared_ptr dequeues); + bool isLocked(queue_id queue, message_id message); + static bool isLocked(PreparedTransaction::list& txns, queue_id queue, message_id message); + static PreparedTransaction::list::iterator getLockedPreparedTransaction(PreparedTransaction::list& txns, queue_id queue, message_id message); +}; + +}} + +#endif // ifndef QPID_LINEARSTORE_PREPAREDTRANSACTION_H diff --git a/qpid/cpp/src/qpid/linearstore/StoreException.h b/qpid/cpp/src/qpid/linearstore/StoreException.h new file mode 100644 index 0000000000..7a598a524f --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/StoreException.h @@ -0,0 +1,56 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_STOREEXCEPTION_H +#define QPID_LINEARSTORE_STOREEXCEPTION_H + +#include <boost/format.hpp> +#include "db-inc.h" + +namespace qpid{ +namespace linearstore{ + +class StoreException : public std::exception +{ + std::string text; +public: + StoreException(const std::string& _text) : text(_text) {} + StoreException(const std::string& _text, const DbException& cause) : text(_text + ": " + cause.what()) {} + virtual ~StoreException() throw() {} + virtual const char* what() const throw() { return text.c_str(); } +}; + +class StoreFullException : public StoreException +{ +public: + StoreFullException(const std::string& _text) : StoreException(_text) {} + StoreFullException(const std::string& _text, const DbException& cause) : StoreException(_text, cause) {} + virtual ~StoreFullException() throw() {} + +}; + +#define THROW_STORE_EXCEPTION(MESSAGE) throw StoreException(boost::str(boost::format("%s (%s:%d)") % (MESSAGE) % __FILE__ % __LINE__)) +#define THROW_STORE_EXCEPTION_2(MESSAGE, EXCEPTION) throw StoreException(boost::str(boost::format("%s (%s:%d)") % (MESSAGE) % __FILE__ % __LINE__), EXCEPTION) +#define THROW_STORE_FULL_EXCEPTION(MESSAGE) throw StoreFullException(boost::str(boost::format("%s (%s:%d)") % (MESSAGE) % __FILE__ % __LINE__)) + +}} + +#endif // ifndef QPID_LINEARSTORE_STOREEXCEPTION_H diff --git a/qpid/cpp/src/qpid/linearstore/StorePlugin.cpp b/qpid/cpp/src/qpid/linearstore/StorePlugin.cpp new file mode 100644 index 0000000000..cd8c7ed8a3 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/StorePlugin.cpp @@ -0,0 +1,97 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/broker/Broker.h" +#include "qpid/Plugin.h" +#include "qpid/Options.h" +#include "qpid/DataDir.h" +#include "qpid/linearstore/JournalLogImpl.h" +#include "qpid/linearstore/MessageStoreImpl.h" +#include "qpid/log/Statement.h" + +using qpid::linearstore::MessageStoreImpl; + +namespace qpid { +namespace broker { + +using namespace std; + +struct StorePlugin : public Plugin { + + MessageStoreImpl::StoreOptions options; + boost::shared_ptr<MessageStoreImpl> store; + + Options* getOptions() { return &options; } + + void earlyInitialize (Plugin::Target& target) + { + Broker* broker = dynamic_cast<Broker*>(&target); + if (!broker) return; + store.reset(new MessageStoreImpl(broker)); + const DataDir& dataDir = broker->getDataDir (); + if (options.storeDir.empty ()) + { + if (!dataDir.isEnabled ()) + throw Exception ("linearstore: If broker option --data-dir is blank or --no-data-dir is specified, linearstore option --store-dir must be present."); + + options.storeDir = dataDir.getPath (); + } else { + // Check if store dir is absolute. If not, make it absolute using qpidd executable dir as base + if (options.storeDir.at(0) != '/') { + char buf[1024]; + if (::getcwd(buf, sizeof(buf)-1) == 0) { + std::ostringstream oss; + oss << "linearstore: getcwd() unable to read current directory: errno=" << errno << " (" << strerror(errno) << ")"; + throw Exception(oss.str()); + } + std::string newStoreDir = std::string(buf) + "/" + options.storeDir; + std::ostringstream oss; + oss << "store-dir option \"" << options.storeDir << "\" is not absolute, changed to \"" << newStoreDir << "\""; + QLS_LOG(warning, oss.str()); + options.storeDir = newStoreDir; + } + } + store->init(&options); + boost::shared_ptr<qpid::broker::MessageStore> brokerStore(store); + broker->setStore(brokerStore); + target.addFinalizer(boost::bind(&StorePlugin::finalize, this)); + } + + void initialize(Plugin::Target& target) + { + Broker* broker = dynamic_cast<Broker*>(&target); + if (!broker) return; + if (!store) return; + QLS_LOG(info, "Enabling management instrumentation."); + store->initManagement(); + } + + void finalize() + { + store.reset(); + } + + const char* id() {return "StorePlugin";} +}; + +static StorePlugin instance; // Static initialization. + +}} // namespace qpid::broker diff --git a/qpid/cpp/src/qpid/linearstore/TxnCtxt.cpp b/qpid/cpp/src/qpid/linearstore/TxnCtxt.cpp new file mode 100644 index 0000000000..e26f0b8b6f --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/TxnCtxt.cpp @@ -0,0 +1,185 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/TxnCtxt.h" + +#include "qpid/linearstore/DataTokenImpl.h" +#include "qpid/linearstore/JournalImpl.h" +#include "qpid/linearstore/StoreException.h" + +namespace qpid{ +namespace linearstore{ + +void TxnCtxt::completeTxn(bool commit) { + sync(); + for (ipqItr i = impactedQueues.begin(); i != impactedQueues.end(); i++) { + commitTxn(static_cast<JournalImpl*>(*i), commit); + } + impactedQueues.clear(); + if (preparedXidStorePtr) + commitTxn(preparedXidStorePtr, commit); +} + +void TxnCtxt::commitTxn(JournalImpl* jc, bool commit) { + if (jc && loggedtx) { /* if using journal */ + boost::intrusive_ptr<DataTokenImpl> dtokp(new DataTokenImpl); + dtokp->addRef(); + dtokp->set_external_rid(true); + dtokp->set_rid(loggedtx->next()); + try { + if (commit) { + jc->txn_commit(dtokp.get(), getXid()); + sync(); + } else { + jc->txn_abort(dtokp.get(), getXid()); + } + } catch (const qpid::linearstore::journal::jexception& e) { + std::ostringstream oss; + oss << "Error during " << (commit ? "commit" : "abort") << ": " << e.what(); + THROW_STORE_EXCEPTION(oss.str()); + } + } +} + +// static +sys::uuid_t TxnCtxt::uuid; + +// static +IdSequence TxnCtxt::uuidSeq; + +// static +bool TxnCtxt::staticInit = TxnCtxt::setUuid(); + +// static +bool TxnCtxt::setUuid() { + qpid::sys::uuid_generate(uuid); + return true; +} + +TxnCtxt::TxnCtxt(IdSequence* _loggedtx) : loggedtx(_loggedtx), dtokp(new DataTokenImpl), preparedXidStorePtr(0), txn(0) { + if (loggedtx) { +// // Human-readable tid: 53 bytes +// // uuit_t is a char[16] +// tid.reserve(53); +// uint64_t* u1 = (uint64_t*)uuid; +// uint64_t* u2 = (uint64_t*)(uuid + sizeof(uint64_t)); +// std::stringstream s; +// s << "tid:" << std::hex << std::setfill('0') << std::setw(16) << uuidSeq.next() << ":" << std::setw(16) << *u1 << std::setw(16) << *u2; +// tid.assign(s.str()); + + // Binary tid: 24 bytes + tid.reserve(24); + uint64_t c = uuidSeq.next(); + tid.append((char*)&c, sizeof(c)); + tid.append((char*)&uuid, sizeof(uuid)); + } +} + +TxnCtxt::TxnCtxt(std::string _tid, IdSequence* _loggedtx) : loggedtx(_loggedtx), dtokp(new DataTokenImpl), preparedXidStorePtr(0), tid(_tid), txn(0) {} + +TxnCtxt::~TxnCtxt() { abort(); } + +void TxnCtxt::sync() { + if (loggedtx) { + try { + for (ipqItr i = impactedQueues.begin(); i != impactedQueues.end(); i++) + jrnl_flush(static_cast<JournalImpl*>(*i)); + if (preparedXidStorePtr) + jrnl_flush(preparedXidStorePtr); + for (ipqItr i = impactedQueues.begin(); i != impactedQueues.end(); i++) + jrnl_sync(static_cast<JournalImpl*>(*i), &qpid::linearstore::journal::jcntl::_aio_cmpl_timeout); + if (preparedXidStorePtr) + jrnl_sync(preparedXidStorePtr, &qpid::linearstore::journal::jcntl::_aio_cmpl_timeout); + } catch (const qpid::linearstore::journal::jexception& e) { + THROW_STORE_EXCEPTION(std::string("Error during txn sync: ") + e.what()); + } + } +} + +void TxnCtxt::jrnl_flush(JournalImpl* jc) { + if (jc && !(jc->is_txn_synced(getXid()))) + jc->flush(false); +} + +void TxnCtxt::jrnl_sync(JournalImpl* jc, timespec* timeout) { + if (!jc || jc->is_txn_synced(getXid())) + return; + while (jc->get_wr_aio_evt_rem()) { + if (jc->get_wr_events(timeout) == qpid::linearstore::journal::jerrno::AIO_TIMEOUT && timeout) + THROW_STORE_EXCEPTION(std::string("Error: timeout waiting for TxnCtxt::jrnl_sync()")); + } +} + +void TxnCtxt::begin(DbEnv* env, bool sync) { + int err; + try { err = env->txn_begin(0, &txn, 0); } + catch (const DbException&) { txn = 0; throw; } + if (err != 0) { + std::ostringstream oss; + oss << "Error: Env::txn_begin() returned error code: " << err; + THROW_STORE_EXCEPTION(oss.str()); + } + if (sync) + globalHolder = AutoScopedLock(new qpid::sys::Mutex::ScopedLock(globalSerialiser)); +} + +void TxnCtxt::commit() { + if (txn) { + txn->commit(0); + txn = 0; + globalHolder.reset(); + } +} + +void TxnCtxt::abort(){ + if (txn) { + txn->abort(); + txn = 0; + globalHolder.reset(); + } +} + +DbTxn* TxnCtxt::get() { return txn; } + +bool TxnCtxt::isTPC() { return false; } + +const std::string& TxnCtxt::getXid() { return tid; } + +void TxnCtxt::addXidRecord(qpid::broker::ExternalQueueStore* queue) { impactedQueues.insert(queue); } + +void TxnCtxt::complete(bool commit) { completeTxn(commit); } + +bool TxnCtxt::impactedQueuesEmpty() { return impactedQueues.empty(); } + +DataTokenImpl* TxnCtxt::getDtok() { return dtokp.get(); } + +void TxnCtxt::incrDtokRef() { dtokp->addRef(); } + +void TxnCtxt::recoverDtok(const uint64_t rid, const std::string xid) { + dtokp->set_rid(rid); + dtokp->set_wstate(DataTokenImpl::ENQ); + dtokp->set_xid(xid); + dtokp->set_external_rid(true); +} + +TPCTxnCtxt::TPCTxnCtxt(const std::string& _xid, IdSequence* _loggedtx) : TxnCtxt(_loggedtx), xid(_xid) {} + +}} diff --git a/qpid/cpp/src/qpid/linearstore/TxnCtxt.h b/qpid/cpp/src/qpid/linearstore/TxnCtxt.h new file mode 100644 index 0000000000..764063a615 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/TxnCtxt.h @@ -0,0 +1,115 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_TXNCTXT_H +#define QPID_LINEARSTORE_TXNCTXT_H + +#include <boost/intrusive_ptr.hpp> +#include "qpid/broker/TransactionalStore.h" +#include "qpid/linearstore/IdSequence.h" +#include "qpid/sys/uuid.h" + +class DbEnv; +class DbTxn; + +namespace qpid { +namespace broker { + class ExternalQueueStore; +} +namespace linearstore{ + class DataTokenImpl; + class JournalImpl; + +class TxnCtxt : public qpid::broker::TransactionContext +{ + protected: + static qpid::sys::Mutex globalSerialiser; + + static sys::uuid_t uuid; + static IdSequence uuidSeq; + static bool staticInit; + static bool setUuid(); + + typedef std::set<qpid::broker::ExternalQueueStore*> ipqdef; + typedef ipqdef::iterator ipqItr; + typedef std::auto_ptr<qpid::sys::Mutex::ScopedLock> AutoScopedLock; + + ipqdef impactedQueues; // list of Queues used in the txn + IdSequence* loggedtx; + boost::intrusive_ptr<DataTokenImpl> dtokp; + AutoScopedLock globalHolder; + JournalImpl* preparedXidStorePtr; + + /** + * local txn id, if non XA. + */ + std::string tid; + DbTxn* txn; + + virtual void completeTxn(bool commit); + void commitTxn(JournalImpl* jc, bool commit); + void jrnl_flush(JournalImpl* jc); + void jrnl_sync(JournalImpl* jc, timespec* timeout); + + public: + TxnCtxt(IdSequence* _loggedtx=NULL); + TxnCtxt(std::string _tid, IdSequence* _loggedtx); + virtual ~TxnCtxt(); + + /** + * Call to make sure all the data for this txn is written to safe store + * + *@return if the data successfully synced. + */ + void sync(); + void begin(DbEnv* env, bool sync = false); + void commit(); + void abort(); + DbTxn* get(); + virtual bool isTPC(); + virtual const std::string& getXid(); + + void addXidRecord(qpid::broker::ExternalQueueStore* queue); + inline void prepare(JournalImpl* _preparedXidStorePtr) { preparedXidStorePtr = _preparedXidStorePtr; } + void complete(bool commit); + bool impactedQueuesEmpty(); + DataTokenImpl* getDtok(); + void incrDtokRef(); + void recoverDtok(const uint64_t rid, const std::string xid); +}; + + +class TPCTxnCtxt : public TxnCtxt, public qpid::broker::TPCTransactionContext +{ + protected: + const std::string xid; + + public: + TPCTxnCtxt(const std::string& _xid, IdSequence* _loggedtx); + inline virtual bool isTPC() { return true; } + inline virtual const std::string& getXid() { return xid; } +}; + +}} + +#endif // ifndef QPID_LINEARSTORE_TXNCTXT_H + + diff --git a/qpid/cpp/src/qpid/linearstore/journal/AtomicCounter.h b/qpid/cpp/src/qpid/linearstore/journal/AtomicCounter.h new file mode 100644 index 0000000000..73e5fecf93 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/AtomicCounter.h @@ -0,0 +1,133 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_ATOMICCOUNTER_H_ +#define QPID_LINEARSTORE_JOURNAL_ATOMICCOUNTER_H_ + +#include "qpid/linearstore/journal/slock.h" +#include <string> + +namespace qpid { +namespace linearstore { +namespace journal { + +template <class T> +class AtomicCounter +{ +private: + std::string id_; + T count_; + mutable smutex countMutex; + +public: + AtomicCounter(const std::string& id, const T& initValue) : id_(id), count_(initValue) {} + + virtual ~AtomicCounter() {} + + T get() const { + slock l(countMutex); + return count_; + } + + void set(const T v) { + slock l(countMutex); + count_ = v; + } + + T increment() { + slock l(countMutex); + return ++count_; + } + + T add(const T& a) { + slock l(countMutex); + count_ += a; + return count_; + } + + T addLimit(const T& a, const T& limit, const uint32_t jerr) { + slock l(countMutex); + if (count_ + a > limit) throw jexception(jerr, id_, "AtomicCounter", "addLimit"); + count_ += a; + return count_; + } + + T decrement() { + slock l(countMutex); + return --count_; + } + + T decrementLimit(const T& limit = T(0), const uint32_t jerr = jerrno::JERR__UNDERFLOW) { + slock l(countMutex); + if (count_ < limit + 1) { + throw jexception(jerr, id_, "AtomicCounter", "decrementLimit"); + } + return --count_; + } + + T subtract(const T& s) { + slock l(countMutex); + count_ -= s; + return count_; + } + + T subtractLimit(const T& s, const T& limit = T(0), const uint32_t jerr = jerrno::JERR__UNDERFLOW) { + slock l(countMutex); + if (count_ < limit + s) throw jexception(jerr, id_, "AtomicCounter", "subtractLimit"); + count_ -= s; + return count_; + } + + bool operator==(const T& o) const { + slock l(countMutex); + return count_ == o; + } + + bool operator<(const T& o) const { + slock l(countMutex); + return count_ < o; + } + + bool operator<=(const T& o) const { + slock l(countMutex); + return count_ <= o; + } + + friend T operator-(const T& a, const AtomicCounter& b) { + slock l(b.countMutex); + return a - b.count_; + } + + friend T operator-(const AtomicCounter& a, const T& b) { + slock l(a.countMutex); + return a.count_ - b; + } + + friend T operator-(const AtomicCounter&a, const AtomicCounter& b) { + slock l1(a.countMutex); + slock l2(b.countMutex); + return a.count_ - b.count_; + } +}; + +}}} // namespace qpid::qls_jrnl + +#endif // QPID_LINEARSTORE_JOURNAL_ATOMICCOUNTER_H_ diff --git a/qpid/cpp/src/qpid/linearstore/journal/Checksum.cpp b/qpid/cpp/src/qpid/linearstore/journal/Checksum.cpp new file mode 100644 index 0000000000..eaede12d8e --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/Checksum.cpp @@ -0,0 +1,45 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/Checksum.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +Checksum::Checksum() : a(1UL), b(0UL), MOD_ADLER(65521UL) {} + +Checksum::~Checksum() {} + +void Checksum::addData(const unsigned char* data, const std::size_t len) { + if (data) { + for (uint32_t i = 0; i < len; i++) { + a = (a + data[i]) % MOD_ADLER; + b = (a + b) % MOD_ADLER; + } + } +} + +uint32_t Checksum::getChecksum() { + return (b << 16) | a; +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/Checksum.h b/qpid/cpp/src/qpid/linearstore/journal/Checksum.h new file mode 100644 index 0000000000..d96aac2991 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/Checksum.h @@ -0,0 +1,54 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_CHECKSUM_H_ +#define QPID_LINEARSTORE_JOURNAL_CHECKSUM_H_ + +#include <cstddef> +#include <stdint.h> + +namespace qpid { +namespace linearstore { +namespace journal { + +/* + * This checksum routine uses the Adler-32 algorithm as described in + * http://en.wikipedia.org/wiki/Adler-32. It is structured so that the + * data for which the checksum must be calculated can be added in several + * stages through the addData() function, and when complete, the checksum + * is obtained through a call to getChecksum(). + */ +class Checksum +{ +private: + uint32_t a; + uint32_t b; + const uint32_t MOD_ADLER; +public: + Checksum(); + virtual ~Checksum(); + void addData(const unsigned char* data, const std::size_t len); + uint32_t getChecksum(); +}; + +}}} + +#endif // QPID_LINEARSTORE_JOURNAL_CHECKSUM_H_ diff --git a/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePool.cpp b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePool.cpp new file mode 100644 index 0000000000..08db3f75bd --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePool.cpp @@ -0,0 +1,477 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "EmptyFilePool.h" + +#include <fstream> +#include "qpid/linearstore/journal/EmptyFilePoolPartition.h" +#include "qpid/linearstore/journal/jcfg.h" +#include "qpid/linearstore/journal/jdir.h" +#include "qpid/linearstore/journal/JournalLog.h" +#include "qpid/linearstore/journal/slock.h" +#include "qpid/linearstore/journal/utils/file_hdr.h" +#include "qpid/types/Uuid.h" +#include <sys/stat.h> +#include <unistd.h> +#include <vector> + +namespace qpid { +namespace linearstore { +namespace journal { + +// static +std::string EmptyFilePool::s_inuseFileDirectory_ = "in_use"; + +// static +std::string EmptyFilePool::s_returnedFileDirectory_ = "returned"; + +EmptyFilePool::EmptyFilePool(const std::string& efpDirectory, + const EmptyFilePoolPartition* partitionPtr, + const bool overwriteBeforeReturnFlag, + const bool truncateFlag, + JournalLog& journalLogRef) : + efpDirectory_(efpDirectory), + efpDataSize_kib_(dataSizeFromDirName_kib(efpDirectory, partitionPtr->getPartitionNumber())), + partitionPtr_(partitionPtr), + overwriteBeforeReturnFlag_(overwriteBeforeReturnFlag), + truncateFlag_(truncateFlag), + journalLogRef_(journalLogRef) +{} + +EmptyFilePool::~EmptyFilePool() {} + +void EmptyFilePool::initialize() { + if (::mkdir(efpDirectory_.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH)) { // Create EFP dir if it does not yet exist + if (errno != EEXIST) { + std::ostringstream oss; + oss << "directory=" << efpDirectory_ << " " << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_EFP_MKDIR, oss.str(), "EmptyFilePool", "initialize"); + } + } + + // Process empty files in main dir + std::vector<std::string> dirList; + jdir::read_dir(efpDirectory_, dirList, false, true, false, false); + for (std::vector<std::string>::iterator i = dirList.begin(); i != dirList.end(); ++i) { + size_t dotPos = i->rfind("."); + if (dotPos != std::string::npos) { + if (i->substr(dotPos).compare(".jrnl") == 0 && i->length() == 41) { + std::string emptyFileName(efpDirectory_ + "/" + (*i)); + if (validateEmptyFile(emptyFileName)) { + pushEmptyFile(emptyFileName); + } + } + } + } + + // Create 'in_use' and 'returned' subdirs if they don't already exist + // Retern files to EFP in 'in_use' and 'returned' subdirs if they do exist + initializeSubDirectory(efpDirectory_ + "/" + s_inuseFileDirectory_); + initializeSubDirectory(efpDirectory_ + "/" + s_returnedFileDirectory_); +} + +efpDataSize_kib_t EmptyFilePool::dataSize_kib() const { + return efpDataSize_kib_; +} + +efpFileSize_kib_t EmptyFilePool::fileSize_kib() const { + return efpDataSize_kib_ + (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_KIB); +} + +efpDataSize_sblks_t EmptyFilePool::dataSize_sblks() const { + return efpDataSize_kib_ / QLS_SBLK_SIZE_KIB; +} + +efpFileSize_sblks_t EmptyFilePool::fileSize_sblks() const { + return (efpDataSize_kib_ / QLS_SBLK_SIZE_KIB) + QLS_JRNL_FHDR_RES_SIZE_SBLKS; +} + +efpFileCount_t EmptyFilePool::numEmptyFiles() const { + slock l(emptyFileListMutex_); + return efpFileCount_t(emptyFileList_.size()); +} + +efpDataSize_kib_t EmptyFilePool::cumFileSize_kib() const { + slock l(emptyFileListMutex_); + return efpDataSize_kib_t(emptyFileList_.size()) * efpDataSize_kib_; +} + +efpPartitionNumber_t EmptyFilePool::getPartitionNumber() const { + return partitionPtr_->getPartitionNumber(); +} + +const EmptyFilePoolPartition* EmptyFilePool::getPartition() const { + return partitionPtr_; +} + +const efpIdentity_t EmptyFilePool::getIdentity() const { + return efpIdentity_t(partitionPtr_->getPartitionNumber(), efpDataSize_kib_); +} + +std::string EmptyFilePool::takeEmptyFile(const std::string& destDirectory) { + std::string emptyFileName = popEmptyFile(); + std::string newFileName = efpDirectory_ + "/" + s_inuseFileDirectory_ + emptyFileName.substr(emptyFileName.rfind('/')); // NOTE: substr() includes leading '/' + std::string symlinkName = destDirectory + emptyFileName.substr(emptyFileName.rfind('/')); // NOTE: substr() includes leading '/' + if (!moveFile(emptyFileName, newFileName)) { + // Try again with new UUID for file name + newFileName = efpDirectory_ + "/" + s_inuseFileDirectory_ + "/" + getEfpFileName(); + if (!moveFile(emptyFileName, newFileName)) { +//std::cerr << "*** DEBUG: pushEmptyFile " << emptyFileName << "from EmptyFilePool::takeEmptyFile()" << std::endl; // DEBUG + pushEmptyFile(emptyFileName); // Return empty file to pool + std::ostringstream oss; + oss << "file=\"" << emptyFileName << "\" dest=\"" << newFileName << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_FMOVE, oss.str(), "EmptyFilePool", "takeEmptyFile"); + } + } + if (createSymLink(newFileName, symlinkName)) { + std::ostringstream oss; + oss << "file=\"" << emptyFileName << "\" dest=\"" << newFileName << "\" symlink=\"" << symlinkName << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR__SYMLINK, oss.str(), "EmptyFilePool", "takeEmptyFile"); + } + return symlinkName; +} + +void EmptyFilePool::returnEmptyFileSymlink(const std::string& emptyFileSymlink) { + if (isFile(emptyFileSymlink)) { + returnEmptyFile(emptyFileSymlink); + } else if(isSymlink(emptyFileSymlink)) { + returnEmptyFile(deleteSymlink(emptyFileSymlink)); + } else { + std::ostringstream oss; + oss << "File \"" << emptyFileSymlink << "\" is neither a file nor a symlink"; + throw jexception(jerrno::JERR_EFP_BADFILETYPE, oss.str(), "EmptyFilePool", "returnEmptyFileSymlink"); + } +} + +//static +std::string EmptyFilePool::dirNameFromDataSize(const efpDataSize_kib_t efpDataSize_kib) { + std::ostringstream oss; + oss << efpDataSize_kib << "k"; + return oss.str(); +} + + +// static +efpDataSize_kib_t EmptyFilePool::dataSizeFromDirName_kib(const std::string& dirName, + const efpPartitionNumber_t partitionNumber) { + // Check for dirName format 'NNNk', where NNN is a number, convert NNN into an integer. NNN cannot be 0. + std::string n(dirName.substr(dirName.rfind('/')+1)); + bool valid = true; + for (uint16_t charNum = 0; charNum < n.length(); ++charNum) { + if (charNum < n.length()-1) { + if (!::isdigit((int)n[charNum])) { + valid = false; + break; + } + } else { + valid = n[charNum] == 'k'; + } + } + efpDataSize_kib_t s = ::atol(n.c_str()); + if (!valid || s == 0 || s % QLS_SBLK_SIZE_KIB != 0) { + std::ostringstream oss; + oss << "Partition: " << partitionNumber << "; EFP directory: \'" << n << "\'"; + throw jexception(jerrno::JERR_EFP_BADEFPDIRNAME, oss.str(), "EmptyFilePool", "fileSizeKbFromDirName"); + } + return s; +} + +// --- protected functions --- +void EmptyFilePool::checkIosState(std::ofstream& ofs, + const uint32_t jerrno, + const std::string& fqFileName, + const std::string& operation, + const std::string& errorMessage, + const std::string& className, + const std::string& fnName) { + if (!ofs.good()) { + if (ofs.is_open()) { + ofs.close(); + } + std::ostringstream oss; + oss << "IO failure: eofbit=" << (ofs.eof()?"T":"F") << " failbit=" << (ofs.fail()?"T":"F") << " badbit=" + << (ofs.bad()?"T":"F") << " file=" << fqFileName << " operation=" << operation << ": " << errorMessage; + throw jexception(jerrno, oss.str(), className, fnName); + } +} + +std::string EmptyFilePool::createEmptyFile() { + std::string efpfn = getEfpFileName(); + overwriteFileContents(efpfn); + return efpfn; +} + +std::string EmptyFilePool::getEfpFileName() { + qpid::types::Uuid uuid(true); + std::ostringstream oss; + oss << efpDirectory_ << "/" << uuid << QLS_JRNL_FILE_EXTENSION; + return oss.str(); +} + +void EmptyFilePool::initializeSubDirectory(const std::string& fqDirName) { + std::vector<std::string> dirList; + if (jdir::exists(fqDirName)) { + if (truncateFlag_) { + jdir::read_dir(fqDirName, dirList, false, true, false, false); + for (std::vector<std::string>::iterator i = dirList.begin(); i != dirList.end(); ++i) { + size_t dotPos = i->rfind("."); + if (i->substr(dotPos).compare(".jrnl") == 0 && i->length() == 41) { + returnEmptyFile(fqDirName + "/" + (*i)); + } else { + std::ostringstream oss; + oss << "File \'" << *i << "\' was not a journal file and was not returned to EFP."; + journalLogRef_.log(JournalLog::LOG_WARN, oss.str()); + } + } + } + } else { + jdir::create_dir(fqDirName); + } +} + +void EmptyFilePool::overwriteFileContents(const std::string& fqFileName) { + ::file_hdr_t fh; + ::file_hdr_create(&fh, QLS_FILE_MAGIC, QLS_JRNL_VERSION, QLS_JRNL_FHDR_RES_SIZE_SBLKS, partitionPtr_->getPartitionNumber(), efpDataSize_kib_); + std::ofstream ofs(fqFileName.c_str(), std::ofstream::out | std::ofstream::binary); + checkIosState(ofs, jerrno::JERR_EFP_FOPEN, fqFileName, "constructor", "Failed to create file", "EmptyFilePool", "overwriteFileContents"); + ofs.write((char*)&fh, sizeof(::file_hdr_t)); + checkIosState(ofs, jerrno::JERR_EFP_FWRITE, fqFileName, "write()", "Failed to write header", "EmptyFilePool", "overwriteFileContents"); + uint64_t rem = ((efpDataSize_kib_ + (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_KIB)) * 1024) - sizeof(::file_hdr_t); + while (rem--) { + ofs.put('\0'); + checkIosState(ofs, jerrno::JERR_EFP_FWRITE, fqFileName, "put()", "Failed to put \0", "EmptyFilePool", "overwriteFileContents"); + } + ofs.close(); +//std::cout << "*** WARNING: EFP " << efpDirectory_ << " is empty - created new journal file " << fqFileName.substr(fqFileName.rfind('/') + 1) << " on the fly" << std::endl; // DEBUG +} + +std::string EmptyFilePool::popEmptyFile() { + std::string emptyFileName; + bool listEmptyFlag; + { + slock l(emptyFileListMutex_); + listEmptyFlag = emptyFileList_.empty(); + if (!listEmptyFlag) { + emptyFileName = emptyFileList_.front(); + emptyFileList_.pop_front(); + } + } + // If the list is empty, create a new file and return the file name. + if (listEmptyFlag) { + emptyFileName = createEmptyFile(); + } + return emptyFileName; +} + +void EmptyFilePool::pushEmptyFile(const std::string fqFileName) { + slock l(emptyFileListMutex_); + emptyFileList_.push_back(fqFileName); +} + +void EmptyFilePool::returnEmptyFile(const std::string& emptyFileName) { + std::string returnedFileName = efpDirectory_ + "/" + s_returnedFileDirectory_ + emptyFileName.substr(emptyFileName.rfind('/')); // NOTE: substr() includes leading '/' + if (!moveFile(emptyFileName, returnedFileName)) { + ::unlink(emptyFileName.c_str()); +//std::cerr << "*** WARNING: Unable to move file " << emptyFileName << " to " << returnedFileName << "; deleted." << std::endl; // DEBUG + } + + // TODO: On a separate thread, process returned files by overwriting headers and, optionally, their contents and + // returning them to the EFP directory + resetEmptyFileHeader(returnedFileName); + if (overwriteBeforeReturnFlag_) { + overwriteFileContents(returnedFileName); + } + std::string sanitizedEmptyFileName = efpDirectory_ + returnedFileName.substr(returnedFileName.rfind('/')); // NOTE: substr() includes leading '/' + if (!moveFile(returnedFileName, sanitizedEmptyFileName)) { + ::unlink(returnedFileName.c_str()); +//std::cerr << "*** WARNING: Unable to move file " << returnedFileName << " to " << sanitizedEmptyFileName << "; deleted." << std::endl; // DEBUG + } else { + pushEmptyFile(sanitizedEmptyFileName); + } +} + +void EmptyFilePool::resetEmptyFileHeader(const std::string& fqFileName) { + std::fstream fs(fqFileName.c_str(), std::fstream::in | std::fstream::out | std::fstream::binary); + if (fs.good()) { + const std::streamsize buffsize = QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES; + char buff[buffsize]; + fs.read((char*)buff, buffsize); + std::streampos bytesRead = fs.tellg(); + if (std::streamoff(bytesRead) == buffsize) { + ::file_hdr_reset((::file_hdr_t*)buff); + // set rest of buffer to 0 + ::memset(buff + sizeof(::file_hdr_t), 0, MAX_FILE_HDR_LEN - sizeof(::file_hdr_t)); + fs.seekp(0, std::fstream::beg); + fs.write(buff, buffsize); + std::streampos bytesWritten = fs.tellp(); + if (std::streamoff(bytesWritten) != buffsize) { +//std::cerr << "*** ERROR: Unable to write file header of file \"" << fqFileName << "\": tried to write " << buffsize << " bytes; wrote " << bytesWritten << " bytes." << std::endl; // DEBUG + } + } else { +//std::cerr << "*** ERROR: Unable to read file header of file \"" << fqFileName << "\": tried to read " << sizeof(::file_hdr_t) << " bytes; read " << bytesRead << " bytes." << std::endl; // DEBUG + } + fs.close(); + } else { +//std::cerr << "*** ERROR: Unable to open file \"" << fqFileName << "\" for reading" << std::endl; // DEBUG + } +} + +bool EmptyFilePool::validateEmptyFile(const std::string& emptyFileName) const { + std::ostringstream oss; + struct stat s; + if (::stat(emptyFileName.c_str(), &s)) + { + oss << "stat: file=\"" << emptyFileName << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_STAT, oss.str(), "EmptyFilePool", "validateEmptyFile"); + } + + // Size matches pool + efpDataSize_kib_t expectedSize = (QLS_SBLK_SIZE_KIB + efpDataSize_kib_) * 1024; + if ((efpDataSize_kib_t)s.st_size != expectedSize) { + oss << "ERROR: File " << emptyFileName << ": Incorrect size: Expected=" << expectedSize + << "; actual=" << s.st_size; + journalLogRef_.log(JournalLog::LOG_ERROR, oss.str()); + return false; + } + + // Open file and read header + std::fstream fs(emptyFileName.c_str(), std::fstream::in | std::fstream::out | std::fstream::binary); + if (!fs) { + oss << "ERROR: File " << emptyFileName << ": Unable to open for reading"; + journalLogRef_.log(JournalLog::LOG_ERROR, oss.str()); + return false; + } + const std::streamsize buffsize = QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES; + char buff[buffsize]; + fs.read((char*)buff, buffsize); + std::streampos bytesRead = fs.tellg(); + if (std::streamoff(bytesRead) != buffsize) { + oss << "ERROR: Unable to read file header of file \"" << emptyFileName << "\": tried to read " + << buffsize << " bytes; read " << bytesRead << " bytes"; + journalLogRef_.log(JournalLog::LOG_ERROR, oss.str()); + fs.close(); + return false; + } + + // Check file header + ::file_hdr_t* fhp = (::file_hdr_t*)buff; + const bool jrnlMagicError = fhp->_rhdr._magic != QLS_FILE_MAGIC; + const bool jrnlVersionError = fhp->_rhdr._version != QLS_JRNL_VERSION; + const bool jrnlPartitionError = fhp->_efp_partition != partitionPtr_->getPartitionNumber(); + const bool jrnlFileSizeError = fhp->_data_size_kib != efpDataSize_kib_; + if (jrnlMagicError || jrnlVersionError || jrnlPartitionError || jrnlFileSizeError) + { + oss << "ERROR: File " << emptyFileName << ": Invalid file header - mismatched header fields: " << + (jrnlMagicError ? "magic " : "") << + (jrnlVersionError ? "version " : "") << + (jrnlPartitionError ? "partition" : "") << + (jrnlFileSizeError ? "file-size" : ""); + journalLogRef_.log(JournalLog::LOG_ERROR, oss.str()); + fs.close(); + return false; + } + + // Check file header is reset + if (!::is_file_hdr_reset(fhp)) { + ::file_hdr_reset(fhp); + ::memset(buff + sizeof(::file_hdr_t), 0, MAX_FILE_HDR_LEN - sizeof(::file_hdr_t)); // set rest of buffer to 0 + fs.seekp(0, std::fstream::beg); + fs.write(buff, buffsize); + std::streampos bytesWritten = fs.tellp(); + if (std::streamoff(bytesWritten) != buffsize) { + oss << "ERROR: Unable to write file header of file \"" << emptyFileName << "\": tried to write " + << buffsize << " bytes; wrote " << bytesWritten << " bytes"; + journalLogRef_.log(JournalLog::LOG_ERROR, oss.str()); + fs.close(); + return false; + } + oss << "WARNING: File " << emptyFileName << ": File header not reset"; + journalLogRef_.log(JournalLog::LOG_WARN, oss.str()); + } + + // Close file + fs.close(); + return true; +} + +//static +int EmptyFilePool::createSymLink(const std::string& fqFileName, + const std::string& fqLinkName) { + if(::symlink(fqFileName.c_str(), fqLinkName.c_str())) { + if (errno == EEXIST) return errno; // File name exists + std::ostringstream oss; + oss << "file=\"" << fqFileName << "\" symlink=\"" << fqLinkName << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR__SYMLINK, oss.str(), "EmptyFilePool", "createSymLink"); + } + return 0; +} + +//static +std::string EmptyFilePool::deleteSymlink(const std::string& fqLinkName) { + char buff[1024]; + ssize_t len = ::readlink(fqLinkName.c_str(), buff, 1024); + if (len < 0) { + std::ostringstream oss; + oss << "symlink=\"" << fqLinkName << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR__SYMLINK, oss.str(), "EmptyFilePool", "deleteSymlink"); + } + ::unlink(fqLinkName.c_str()); + return std::string(buff, len); +} + +//static +bool EmptyFilePool::isFile(const std::string& fqName) { + struct stat buff; + if (::lstat(fqName.c_str(), &buff)) { + std::ostringstream oss; + oss << "lstat file=\"" << fqName << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_EFP_LSTAT, oss.str(), "EmptyFilePool", "isFile"); + } + return S_ISREG(buff.st_mode); +} + +//static +bool EmptyFilePool::isSymlink(const std::string& fqName) { + struct stat buff; + if (::lstat(fqName.c_str(), &buff)) { + std::ostringstream oss; + oss << "lstat file=\"" << fqName << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_EFP_LSTAT, oss.str(), "EmptyFilePool", "isSymlink"); + } + return S_ISLNK(buff.st_mode); + +} + +// static +bool EmptyFilePool::moveFile(const std::string& from, + const std::string& to) { + if (::rename(from.c_str(), to.c_str())) { + if (errno == EEXIST) { + return false; // File name exists + } + std::ostringstream oss; + oss << "file=\"" << from << "\" dest=\"" << to << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_FMOVE, oss.str(), "EmptyFilePool", "returnEmptyFile"); + } + return true; +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePool.h b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePool.h new file mode 100644 index 0000000000..dc567ff917 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePool.h @@ -0,0 +1,118 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOL_H_ +#define QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOL_H_ + +namespace qpid { +namespace linearstore { +namespace journal { + class EmptyFilePool; +}}} + +#include <deque> +#include "qpid/linearstore/journal/EmptyFilePoolTypes.h" +#include "qpid/linearstore/journal/smutex.h" + +namespace qpid { +namespace linearstore { +namespace journal { +class EmptyFilePoolPartition; +class jdir; +class JournalFile; +class JournalLog; + +class EmptyFilePool +{ +protected: + typedef std::deque<std::string> emptyFileList_t; + typedef emptyFileList_t::const_iterator emptyFileListConstItr_t; + + static std::string s_inuseFileDirectory_; + static std::string s_returnedFileDirectory_; + + const std::string efpDirectory_; + const efpDataSize_kib_t efpDataSize_kib_; + const EmptyFilePoolPartition* partitionPtr_; + const bool overwriteBeforeReturnFlag_; + const bool truncateFlag_; + JournalLog& journalLogRef_; + +private: + emptyFileList_t emptyFileList_; + smutex emptyFileListMutex_; + +public: + EmptyFilePool(const std::string& efpDirectory, + const EmptyFilePoolPartition* partitionPtr, + const bool overwriteBeforeReturnFlag, + const bool truncateFlag, + JournalLog& journalLogRef); + virtual ~EmptyFilePool(); + + void initialize(); + efpDataSize_kib_t dataSize_kib() const; + efpFileSize_kib_t fileSize_kib() const; + efpDataSize_sblks_t dataSize_sblks() const; + efpFileSize_sblks_t fileSize_sblks() const; + efpFileCount_t numEmptyFiles() const; + efpDataSize_kib_t cumFileSize_kib() const; + efpPartitionNumber_t getPartitionNumber() const; + const EmptyFilePoolPartition* getPartition() const; + const efpIdentity_t getIdentity() const; + + std::string takeEmptyFile(const std::string& destDirectory); + void returnEmptyFileSymlink(const std::string& emptyFileSymlink); + + static std::string dirNameFromDataSize(const efpDataSize_kib_t efpDataSize_kib); + static efpDataSize_kib_t dataSizeFromDirName_kib(const std::string& dirName, + const efpPartitionNumber_t partitionNumber); + +protected: + void checkIosState(std::ofstream& ofs, + const uint32_t jerrno, + const std::string& fqFileName, + const std::string& operation, + const std::string& errorMessage, + const std::string& className, + const std::string& fnName); + std::string createEmptyFile(); + std::string getEfpFileName(); + void initializeSubDirectory(const std::string& fqDirName); + void overwriteFileContents(const std::string& fqFileName); + std::string popEmptyFile(); + void pushEmptyFile(const std::string fqFileName); + void returnEmptyFile(const std::string& emptyFileName); + void resetEmptyFileHeader(const std::string& fqFileName); + bool validateEmptyFile(const std::string& emptyFileName) const; + + static int createSymLink(const std::string& fqFileName, + const std::string& fqLinkName); + static std::string deleteSymlink(const std::string& fqLinkName); + static bool isFile(const std::string& fqName); + static bool isSymlink(const std::string& fqName); + static bool moveFile(const std::string& fromFqPath, + const std::string& toFqPath); +}; + +}}} + +#endif /* QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOL_H_ */ diff --git a/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolManager.cpp b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolManager.cpp new file mode 100644 index 0000000000..a02679736e --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolManager.cpp @@ -0,0 +1,211 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "EmptyFilePoolManager.h" + +#include "qpid/linearstore/journal/EmptyFilePool.h" +#include "qpid/linearstore/journal/EmptyFilePoolPartition.h" +#include "qpid/linearstore/journal/jdir.h" +#include "qpid/linearstore/journal/JournalLog.h" +#include "qpid/linearstore/journal/slock.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +EmptyFilePoolManager::EmptyFilePoolManager(const std::string& qlsStorePath, + const efpPartitionNumber_t defaultPartitionNumber, + const efpDataSize_kib_t defaultEfpDataSize_kib, + const bool overwriteBeforeReturnFlag, + const bool truncateFlag, + JournalLog& journalLogRef) : + qlsStorePath_(qlsStorePath), + defaultPartitionNumber_(defaultPartitionNumber), + defaultEfpDataSize_kib_(defaultEfpDataSize_kib), + overwriteBeforeReturnFlag_(overwriteBeforeReturnFlag), + truncateFlag_(truncateFlag), + journalLogRef_(journalLogRef) +{} + +EmptyFilePoolManager::~EmptyFilePoolManager() { + slock l(partitionMapMutex_); + for (partitionMapItr_t i = partitionMap_.begin(); i != partitionMap_.end(); ++i) { + delete i->second; + } + partitionMap_.clear(); +} + +void EmptyFilePoolManager::findEfpPartitions() { +//std::cout << "*** Reading " << qlsStorePath_ << std::endl; // DEBUG + bool foundPartition = false; + std::vector<std::string> dirList; + while (!foundPartition) { + jdir::read_dir(qlsStorePath_, dirList, true, false, true, false); + for (std::vector<std::string>::iterator i = dirList.begin(); i != dirList.end(); ++i) { + efpPartitionNumber_t pn = EmptyFilePoolPartition::getPartitionNumber(*i); + if (pn > 0) { // valid partition name found + std::string fullDirPath(qlsStorePath_ + "/" + (*i)); + EmptyFilePoolPartition* efppp = insertPartition(pn, fullDirPath); + if (efppp != 0) + efppp->findEmptyFilePools(); + foundPartition = true; + } + } + + // If no partition was found, create an empty default partition. + if (!foundPartition) { + std::ostringstream oss1; + oss1 << qlsStorePath_ << "/" << EmptyFilePoolPartition::getPartionDirectoryName(defaultPartitionNumber_) + << "/" << EmptyFilePoolPartition::s_efpTopLevelDir_ + << "/" << EmptyFilePool::dirNameFromDataSize(defaultEfpDataSize_kib_); + jdir::create_dir(oss1.str()); + insertPartition(defaultPartitionNumber_, oss1.str()); + std::ostringstream oss2; + oss2 << "No EFP partition found, creating an empty partition at " << oss1.str(); + journalLogRef_.log(JournalLog::LOG_INFO, oss2.str()); + } + } + + journalLogRef_.log(JournalLog::LOG_INFO, "EFP Manager initialization complete"); + std::vector<qpid::linearstore::journal::EmptyFilePoolPartition*> partitionList; + getEfpPartitions(partitionList); + if (partitionList.size() == 0) { + journalLogRef_.log(JournalLog::LOG_WARN, "NO EFP PARTITIONS FOUND! No queue creation is possible."); + } else { + std::stringstream oss; + oss << "EFP Partitions found: " << partitionList.size(); + journalLogRef_.log(JournalLog::LOG_INFO, oss.str()); + for (std::vector<qpid::linearstore::journal::EmptyFilePoolPartition*>::const_iterator i=partitionList.begin(); i!= partitionList.end(); ++i) { + journalLogRef_.log(JournalLog::LOG_INFO, (*i)->toString(5U)); + } + } +} + +void EmptyFilePoolManager::getEfpFileSizes(std::vector<efpDataSize_kib_t>& efpFileSizeList, + const efpPartitionNumber_t efpPartitionNumber) const { + if (efpPartitionNumber == 0) { + for (partitionMapConstItr_t i=partitionMap_.begin(); i!=partitionMap_.end(); ++i) { + i->second->getEmptyFilePoolSizes_kib(efpFileSizeList); + } + } else { + partitionMapConstItr_t i = partitionMap_.find(efpPartitionNumber); + if (i != partitionMap_.end()) { + i->second->getEmptyFilePoolSizes_kib(efpFileSizeList); + } + } +} + +EmptyFilePoolPartition* EmptyFilePoolManager::getEfpPartition(const efpPartitionNumber_t partitionNumber) { + partitionMapItr_t i = partitionMap_.find(partitionNumber); + if (i == partitionMap_.end()) + return 0; + else + return i->second; +} + +void EmptyFilePoolManager::getEfpPartitionNumbers(std::vector<efpPartitionNumber_t>& partitionNumberList, + const efpDataSize_kib_t efpDataSize_kib) const { + slock l(partitionMapMutex_); + for (partitionMapConstItr_t i=partitionMap_.begin(); i!=partitionMap_.end(); ++i) { + if (efpDataSize_kib == 0) { + partitionNumberList.push_back(i->first); + } else { + std::vector<efpDataSize_kib_t> efpFileSizeList; + i->second->getEmptyFilePoolSizes_kib(efpFileSizeList); + for (std::vector<efpDataSize_kib_t>::iterator j=efpFileSizeList.begin(); j!=efpFileSizeList.end(); ++j) { + if (*j == efpDataSize_kib) { + partitionNumberList.push_back(i->first); + break; + } + } + } + } +} + +void EmptyFilePoolManager::getEfpPartitions(std::vector<EmptyFilePoolPartition*>& partitionList, + const efpDataSize_kib_t efpDataSize_kib) { + slock l(partitionMapMutex_); + for (partitionMapConstItr_t i=partitionMap_.begin(); i!=partitionMap_.end(); ++i) { + if (efpDataSize_kib == 0) { + partitionList.push_back(i->second); + } else { + std::vector<efpDataSize_kib_t> efpFileSizeList; + i->second->getEmptyFilePoolSizes_kib(efpFileSizeList); + for (std::vector<efpDataSize_kib_t>::iterator j=efpFileSizeList.begin(); j!=efpFileSizeList.end(); ++j) { + if (*j == efpDataSize_kib) { + partitionList.push_back(i->second); + break; + } + } + } + } +} + +EmptyFilePool* EmptyFilePoolManager::getEmptyFilePool(const efpIdentity_t efpIdentity) { + return getEmptyFilePool(efpIdentity.pn_, efpIdentity.ds_); +} + +EmptyFilePool* EmptyFilePoolManager::getEmptyFilePool(const efpPartitionNumber_t partitionNumber, + const efpDataSize_kib_t efpDataSize_kib) { + EmptyFilePoolPartition* efppp = getEfpPartition(partitionNumber > 0 ? partitionNumber : defaultPartitionNumber_); + if (efppp == 0) { + return 0; + } + return efppp->getEmptyFilePool(efpDataSize_kib > 0 ? efpDataSize_kib : defaultEfpDataSize_kib_, true); +} + +void EmptyFilePoolManager::getEmptyFilePools(std::vector<EmptyFilePool*>& emptyFilePoolList, + const efpPartitionNumber_t efpPartitionNumber) { + if (efpPartitionNumber == 0) { + for (partitionMapConstItr_t i=partitionMap_.begin(); i!=partitionMap_.end(); ++i) { + i->second->getEmptyFilePools(emptyFilePoolList); + } + } else { + partitionMapConstItr_t i = partitionMap_.find(efpPartitionNumber); + if (i != partitionMap_.end()) { + i->second->getEmptyFilePools(emptyFilePoolList); + } + } +} + +uint16_t EmptyFilePoolManager::getNumEfpPartitions() const { + return partitionMap_.size(); +} + +EmptyFilePoolPartition* EmptyFilePoolManager::insertPartition(const efpPartitionNumber_t pn, const std::string& fullPartitionPath) { + EmptyFilePoolPartition* efppp = 0; + try { + efppp = new EmptyFilePoolPartition(pn, fullPartitionPath, overwriteBeforeReturnFlag_, truncateFlag_, journalLogRef_); + { + slock l(partitionMapMutex_); + partitionMap_[pn] = efppp; + } + } catch (const std::exception& e) { + if (efppp != 0) { + delete efppp; + efppp = 0; + } +//std::cerr << "*** Unable to initialize partition " << pn << " (\'" << fullPartitionPath << "\'): " << e.what() << std::endl; // DEBUG + } + return efppp; +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolManager.h b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolManager.h new file mode 100644 index 0000000000..d0aa7fa7d6 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolManager.h @@ -0,0 +1,83 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_QLS_JRNL_EMPTYFILEPOOLMANAGER_H_ +#define QPID_QLS_JRNL_EMPTYFILEPOOLMANAGER_H_ + +#include <map> +#include "qpid/linearstore/journal/EmptyFilePoolTypes.h" +#include "qpid/linearstore/journal/smutex.h" +#include <vector> + +namespace qpid { +namespace linearstore { +namespace journal { + +class EmptyFilePool; +class EmptyFilePoolPartition; +class JournalLog; + +class EmptyFilePoolManager +{ +protected: + typedef std::map<efpPartitionNumber_t, EmptyFilePoolPartition*> partitionMap_t; + typedef partitionMap_t::iterator partitionMapItr_t; + typedef partitionMap_t::const_iterator partitionMapConstItr_t; + + const std::string qlsStorePath_; + const efpPartitionNumber_t defaultPartitionNumber_; + const efpDataSize_kib_t defaultEfpDataSize_kib_; + const bool overwriteBeforeReturnFlag_; + const bool truncateFlag_; + JournalLog& journalLogRef_; + partitionMap_t partitionMap_; + smutex partitionMapMutex_; + +public: + EmptyFilePoolManager(const std::string& qlsStorePath_, + const efpPartitionNumber_t defaultPartitionNumber, + const efpDataSize_kib_t defaultEfpDataSize_kib, + const bool overwriteBeforeReturnFlag, + const bool truncateFlag, + JournalLog& journalLogRef_); + virtual ~EmptyFilePoolManager(); + + void findEfpPartitions(); + void getEfpFileSizes(std::vector<efpDataSize_kib_t>& efpFileSizeList, + const efpPartitionNumber_t efpPartitionNumber = 0) const; + EmptyFilePoolPartition* getEfpPartition(const efpPartitionNumber_t partitionNumber); + void getEfpPartitionNumbers(std::vector<efpPartitionNumber_t>& partitionNumberList, + const efpDataSize_kib_t efpDataSize_kib = 0) const; + void getEfpPartitions(std::vector<EmptyFilePoolPartition*>& partitionList, + const efpDataSize_kib_t efpDataSize_kib = 0); + EmptyFilePool* getEmptyFilePool(const efpIdentity_t efpIdentity); + EmptyFilePool* getEmptyFilePool(const efpPartitionNumber_t partitionNumber, + const efpDataSize_kib_t efpDataSize_kib); + void getEmptyFilePools(std::vector<EmptyFilePool*>& emptyFilePoolList, + const efpPartitionNumber_t efpPartitionNumber = 0); + uint16_t getNumEfpPartitions() const; +protected: + EmptyFilePoolPartition* insertPartition(const efpPartitionNumber_t pn, const std::string& fullPartitionPath); +}; + +}}} + +#endif /* QPID_QLS_JRNL_EMPTYFILEPOOLMANAGER_H_ */ diff --git a/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolPartition.cpp b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolPartition.cpp new file mode 100644 index 0000000000..12d2db74b8 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolPartition.cpp @@ -0,0 +1,199 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/EmptyFilePoolPartition.h" + +#include <iomanip> +#include "qpid/linearstore/journal/EmptyFilePool.h" +#include "qpid/linearstore/journal/jdir.h" +#include "qpid/linearstore/journal/JournalLog.h" +#include "qpid/linearstore/journal/slock.h" +#include <unistd.h> + +namespace qpid { +namespace linearstore { +namespace journal { + +// static +const std::string EmptyFilePoolPartition::s_efpTopLevelDir_("efp"); // Sets the top-level efp dir within a partition + +EmptyFilePoolPartition::EmptyFilePoolPartition(const efpPartitionNumber_t partitionNum, + const std::string& partitionDir, + const bool overwriteBeforeReturnFlag, + const bool truncateFlag, + JournalLog& journalLogRef) : + partitionNum_(partitionNum), + partitionDir_(partitionDir), + overwriteBeforeReturnFlag_(overwriteBeforeReturnFlag), + truncateFlag_(truncateFlag), + journalLogRef_(journalLogRef) +{ + validatePartitionDir(); +} + +EmptyFilePoolPartition::~EmptyFilePoolPartition() { + slock l(efpMapMutex_); + for (efpMapItr_t i = efpMap_.begin(); i != efpMap_.end(); ++i) { + delete i->second; + } + efpMap_.clear(); +} + +void +EmptyFilePoolPartition::findEmptyFilePools() { +//std::cout << "*** EmptyFilePoolPartition::findEmptyFilePools(): Reading " << partitionDir_ << std::endl; // DEBUG + std::string efpDir(partitionDir_ + "/" + s_efpTopLevelDir_); + if (jdir::is_dir(efpDir)) { + std::vector<std::string> dirList; + jdir::read_dir(efpDir, dirList, true, false, false, true); + for (std::vector<std::string>::iterator i = dirList.begin(); i != dirList.end(); ++i) { + createEmptyFilePool(*i); + } + } else { + std::ostringstream oss; + oss << "Partition \"" << partitionDir_ << "\" does not contain top level EFP dir \"" << s_efpTopLevelDir_ << "\""; + journalLogRef_.log(JournalLog::LOG_WARN, oss.str()); + } +} + +EmptyFilePool* EmptyFilePoolPartition::getEmptyFilePool(const efpDataSize_kib_t efpDataSize_kib, const bool createIfNonExistent) { + { + slock l(efpMapMutex_); + efpMapItr_t i = efpMap_.find(efpDataSize_kib); + if (i != efpMap_.end()) + return i->second; + } + if (createIfNonExistent) { + return createEmptyFilePool(efpDataSize_kib); + } + return 0; +} + +void EmptyFilePoolPartition::getEmptyFilePools(std::vector<EmptyFilePool*>& efpList) { + slock l(efpMapMutex_); + for (efpMapItr_t i=efpMap_.begin(); i!=efpMap_.end(); ++i) { + efpList.push_back(i->second); + } +} + +void EmptyFilePoolPartition::getEmptyFilePoolSizes_kib(std::vector<efpDataSize_kib_t>& efpDataSizesList_kib) const { + slock l(efpMapMutex_); + for (efpMapConstItr_t i=efpMap_.begin(); i!=efpMap_.end(); ++i) { + efpDataSizesList_kib.push_back(i->first); + } +} + +std::string EmptyFilePoolPartition::getPartitionDirectory() const { + return partitionDir_; +} + +efpPartitionNumber_t EmptyFilePoolPartition::getPartitionNumber() const { + return partitionNum_; +} + +std::string EmptyFilePoolPartition::toString(const uint16_t indent) const { + std::string indentStr(indent, ' '); + std::stringstream oss; + oss << "EFP Partition " << partitionNum_ << ":" << std::endl; + oss << indentStr << "EFP Partition Analysis (partition " << partitionNum_ << " at \"" << partitionDir_ << "\"):" << std::endl; + if (efpMap_.empty()) { + oss << indentStr << "<Partition empty, no EFPs found>" << std::endl; + } else { + oss << indentStr << std::setw(12) << "efp_size_kib" + << std::setw(12) << "num_files" + << std::setw(18) << "tot_capacity_kib" << std::endl; + oss << indentStr << std::setw(12) << "------------" + << std::setw(12) << "----------" + << std::setw(18) << "----------------" << std::endl; + { + slock l(efpMapMutex_); + for (efpMapConstItr_t i=efpMap_.begin(); i!= efpMap_.end(); ++i) { + oss << indentStr << std::setw(12) << i->first + << std::setw(12) << i->second->numEmptyFiles() + << std::setw(18) << i->second->cumFileSize_kib() << std::endl; + } + } + } + return oss.str(); +} + +// static +std::string EmptyFilePoolPartition::getPartionDirectoryName(const efpPartitionNumber_t partitionNumber) { + std::ostringstream oss; + oss << "p" << std::setfill('0') << std::setw(3) << partitionNumber; + return oss.str(); +} + +//static +efpPartitionNumber_t EmptyFilePoolPartition::getPartitionNumber(const std::string& name) { + if (name.length() == 4 && name[0] == 'p' && ::isdigit(name[1]) && ::isdigit(name[2]) && ::isdigit(name[3])) { + long pn = ::strtol(name.c_str() + 1, 0, 10); + if (pn == 0 && errno) { + return 0; + } else { + return (efpPartitionNumber_t)pn; + } + } + return 0; +} + +// --- protected functions --- + +EmptyFilePool* EmptyFilePoolPartition::createEmptyFilePool(const efpDataSize_kib_t efpDataSize_kib) { + std::string fqEfpDirectoryName(partitionDir_ + "/" + EmptyFilePoolPartition::s_efpTopLevelDir_ + "/" + EmptyFilePool::dirNameFromDataSize(efpDataSize_kib)); + return createEmptyFilePool(fqEfpDirectoryName); +} + +EmptyFilePool* EmptyFilePoolPartition::createEmptyFilePool(const std::string fqEfpDirectoryName) { + EmptyFilePool* efpp = 0; + try { + efpp = new EmptyFilePool(fqEfpDirectoryName, this, overwriteBeforeReturnFlag_, truncateFlag_, journalLogRef_); + { + slock l(efpMapMutex_); + efpMap_[efpp->dataSize_kib()] = efpp; + } + } + catch (const std::exception& e) { + if (efpp != 0) { + delete efpp; + efpp = 0; + } + std::ostringstream oss; + oss << "EmptyFilePool create failed: " << e.what(); + journalLogRef_.log(JournalLog::LOG_WARN, oss.str()); + } + if (efpp != 0) { + efpp->initialize(); + } + return efpp; +} + +void EmptyFilePoolPartition::validatePartitionDir() { + std::ostringstream ss; + if (!jdir::is_dir(partitionDir_)) { + ss << "Invalid partition directory: \'" << partitionDir_ << "\' is not a directory"; + throw jexception(jerrno::JERR_EFP_BADPARTITIONDIR, ss.str(), "EmptyFilePoolPartition", "validatePartitionDir"); + } + + // TODO: other validity checks here +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolPartition.h b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolPartition.h new file mode 100644 index 0000000000..570e2b073f --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolPartition.h @@ -0,0 +1,82 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOLPARTITION_H_ +#define QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOLPARTITION_H_ + +#include <map> +#include "qpid/linearstore/journal/EmptyFilePoolTypes.h" +#include "qpid/linearstore/journal/smutex.h" +#include <string> +#include <vector> + +namespace qpid { +namespace linearstore { +namespace journal { + +class EmptyFilePool; +class JournalLog; + +class EmptyFilePoolPartition +{ +public: + static const std::string s_efpTopLevelDir_; +protected: + typedef std::map<efpDataSize_kib_t, EmptyFilePool*> efpMap_t; + typedef efpMap_t::iterator efpMapItr_t; + typedef efpMap_t::const_iterator efpMapConstItr_t; + + const efpPartitionNumber_t partitionNum_; + const std::string partitionDir_; + const bool overwriteBeforeReturnFlag_; + const bool truncateFlag_; + JournalLog& journalLogRef_; + efpMap_t efpMap_; + smutex efpMapMutex_; + +public: + EmptyFilePoolPartition(const efpPartitionNumber_t partitionNum, + const std::string& partitionDir, + const bool overwriteBeforeReturnFlag, + const bool truncateFlag, + JournalLog& journalLogRef); + virtual ~EmptyFilePoolPartition(); + + void findEmptyFilePools(); + EmptyFilePool* getEmptyFilePool(const efpDataSize_kib_t efpDataSize_kib, const bool createIfNonExistent); + void getEmptyFilePools(std::vector<EmptyFilePool*>& efpList); + void getEmptyFilePoolSizes_kib(std::vector<efpDataSize_kib_t>& efpDataSizesList) const; + std::string getPartitionDirectory() const; + efpPartitionNumber_t getPartitionNumber() const; + std::string toString(const uint16_t indent) const; + + static std::string getPartionDirectoryName(const efpPartitionNumber_t partitionNumber); + static efpPartitionNumber_t getPartitionNumber(const std::string& name); + +protected: + EmptyFilePool* createEmptyFilePool(const efpDataSize_kib_t efpDataSize_kib); + EmptyFilePool* createEmptyFilePool(const std::string fqEfpDirectoryName); + void validatePartitionDir(); +}; + +}}} + +#endif /* QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOLPARTITION_H_ */ diff --git a/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolTypes.h b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolTypes.h new file mode 100644 index 0000000000..4cae4e6538 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/EmptyFilePoolTypes.h @@ -0,0 +1,57 @@ + /* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOLTYPES_H_ +#define QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOLTYPES_H_ + +#include <iostream> +#include <sstream> +#include <stdint.h> + +namespace qpid { +namespace linearstore { +namespace journal { + +typedef uint64_t efpDataSize_kib_t; ///< Size of data part of file (excluding file header) in kib +typedef uint64_t efpFileSize_kib_t; ///< Size of file (header + data) in kib +typedef uint32_t efpDataSize_sblks_t; ///< Size of data part of file (excluding file header) in sblks +typedef uint32_t efpFileSize_sblks_t; ///< Size of file (header + data) in sblks +typedef uint32_t efpFileCount_t; ///< Number of files in a partition or pool +typedef uint16_t efpPartitionNumber_t; ///< Number assigned to a partition + +typedef struct efpIdentity_t { + efpPartitionNumber_t pn_; + efpDataSize_kib_t ds_; + efpIdentity_t() : pn_(0), ds_(0) {} + efpIdentity_t(efpPartitionNumber_t pn, efpDataSize_kib_t ds) : pn_(pn), ds_(ds) {} + efpIdentity_t(const efpIdentity_t& ei) : pn_(ei.pn_), ds_(ei.ds_) {} + friend std::ostream& operator<<(std::ostream& os, const efpIdentity_t& id) { + // This two-stage write allows this << operator to be used with std::setw() for formatted writes + std::ostringstream oss; + oss << id.pn_ << "," << id.ds_; + os << oss.str(); + return os; + } +} efpIdentity_t; + +}}} + +#endif /* QPID_LINEARSTORE_JOURNAL_EMPTYFILEPOOLTYPES_H_ */ diff --git a/qpid/cpp/src/qpid/linearstore/journal/JournalFile.cpp b/qpid/cpp/src/qpid/linearstore/journal/JournalFile.cpp new file mode 100644 index 0000000000..ed03a8413f --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/JournalFile.cpp @@ -0,0 +1,349 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/JournalFile.h" + +#include <fcntl.h> +#include "qpid/linearstore/journal/jcfg.h" +#include "qpid/linearstore/journal/pmgr.h" +#include "qpid/linearstore/journal/utils/file_hdr.h" +#include <unistd.h> + +namespace qpid { +namespace linearstore { +namespace journal { + +JournalFile::JournalFile(const std::string& fqFileName, + const efpIdentity_t& efpIdentity, + const uint64_t fileSeqNum, + const std::string queueName) : + efpIdentity_(efpIdentity), + fqFileName_(fqFileName), + fileSeqNum_(fileSeqNum), + queueName_(queueName), + serial_(getRandom64()), + firstRecordOffset_(0ULL), + fileHandle_(-1), + fileCloseFlag_(false), + fileHeaderBasePtr_ (0), + fileHeaderPtr_(0), + aioControlBlockPtr_(0), + fileSize_dblks_(((efpIdentity.ds_ * 1024) + (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES)) / QLS_DBLK_SIZE_BYTES), + initializedFlag_(false), + enqueuedRecordCount_("JournalFile::enqueuedRecordCount", 0), + submittedDblkCount_("JournalFile::submittedDblkCount", 0), + completedDblkCount_("JournalFile::completedDblkCount", 0), + outstandingAioOpsCount_("JournalFile::outstandingAioOpsCount", 0) +{} + +JournalFile::JournalFile(const std::string& fqFileName, + const ::file_hdr_t& fileHeader, + const std::string queueName) : + efpIdentity_(fileHeader._efp_partition, fileHeader._data_size_kib), + fqFileName_(fqFileName), + fileSeqNum_(fileHeader._file_number), + queueName_(queueName), + serial_(fileHeader._rhdr._serial), + firstRecordOffset_(fileHeader._fro), + fileHandle_(-1), + fileCloseFlag_(false), + fileHeaderBasePtr_ (0), + fileHeaderPtr_(0), + aioControlBlockPtr_(0), + fileSize_dblks_(((fileHeader._data_size_kib * 1024) + (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES)) / QLS_DBLK_SIZE_BYTES), + initializedFlag_(false), + enqueuedRecordCount_("JournalFile::enqueuedRecordCount", 0), + submittedDblkCount_("JournalFile::submittedDblkCount", 0), + completedDblkCount_("JournalFile::completedDblkCount", 0), + outstandingAioOpsCount_("JournalFile::outstandingAioOpsCount", 0) +{} + +JournalFile::~JournalFile() { + finalize(); +} + +void +JournalFile::initialize(const uint32_t completedDblkCount) { + if (!initializedFlag_) { + if (::posix_memalign(&fileHeaderBasePtr_, QLS_AIO_ALIGN_BOUNDARY_BYTES, QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_KIB * 1024)) + { + std::ostringstream oss; + oss << "posix_memalign(): blksize=" << QLS_AIO_ALIGN_BOUNDARY_BYTES << " size=" << (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_KIB * 1024); + oss << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR__MALLOC, oss.str(), "JournalFile", "initialize"); + } + fileHeaderPtr_ = (::file_hdr_t*)fileHeaderBasePtr_; + aioControlBlockPtr_ = new aio_cb; + initializedFlag_ = true; + } + if (completedDblkCount > 0UL) { + submittedDblkCount_.set(completedDblkCount); + completedDblkCount_.set(completedDblkCount); + } +} + +void +JournalFile::finalize() { + if (fileHeaderBasePtr_ != 0) { + std::free(fileHeaderBasePtr_); + fileHeaderBasePtr_ = 0; + fileHeaderPtr_ = 0; + } + if (aioControlBlockPtr_ != 0) { + delete(aioControlBlockPtr_); + aioControlBlockPtr_ = 0; + } +} + +const std::string JournalFile::getFqFileName() const { + return fqFileName_; +} + +uint64_t JournalFile::getFileSeqNum() const { + return fileSeqNum_; +} + +uint64_t JournalFile::getSerial() const { + return serial_; +} + +int JournalFile::open() { + fileHandle_ = ::open(fqFileName_.c_str(), O_WRONLY | O_DIRECT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); // 0644 -rw-r--r-- + if (fileHandle_ < 0) { + std::ostringstream oss; + oss << "file=\"" << fqFileName_ << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JNLF_OPEN, oss.str(), "JournalFile", "open"); + } + return fileHandle_; +} + +void JournalFile::close() { + if (fileHandle_ >= 0) { + if (getOutstandingAioDblks()) { + fileCloseFlag_ = true; // Close later when all outstanding AIOs have returned + } else { + int res = ::close(fileHandle_); + fileHandle_ = -1; + if (res != 0) { + std::ostringstream oss; + oss << "file=\"" << fqFileName_ << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JNLF_CLOSE, oss.str(), "JournalFile", "open"); + } + } + } +} + +void JournalFile::asyncFileHeaderWrite(io_context_t ioContextPtr, + const efpPartitionNumber_t efpPartitionNumber, + const efpDataSize_kib_t efpDataSize_kib, + const uint16_t userFlags, + const uint64_t recordId, + const uint64_t firstRecordOffset) { + firstRecordOffset_ = firstRecordOffset; + ::file_hdr_create(fileHeaderPtr_, QLS_FILE_MAGIC, QLS_JRNL_VERSION, QLS_JRNL_FHDR_RES_SIZE_SBLKS, efpPartitionNumber, efpDataSize_kib); + ::file_hdr_init(fileHeaderBasePtr_, + QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_KIB * 1024, + userFlags, + serial_, + recordId, + firstRecordOffset, + fileSeqNum_, + queueName_.size(), + queueName_.data()); + const std::size_t wr_size = QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_KIB * 1024; + if (!isOpen()) open(); + aio::prep_pwrite(aioControlBlockPtr_, fileHandle_, (void*)fileHeaderBasePtr_, wr_size, 0UL); + if (!aio::is_aligned(aioControlBlockPtr_->u.c.buf, QLS_AIO_ALIGN_BOUNDARY_BYTES)) { + std::ostringstream oss; + oss << "AIO operation on misaligned buffer: iocb->u.c.buf=" << aioControlBlockPtr_->u.c.buf << std::endl; + throw jexception(jerrno::JERR__AIO, oss.str(), "JournalFile", "asyncFileHeaderWrite"); + } + if (aio::submit(ioContextPtr, 1, &aioControlBlockPtr_) < 0) { + std::ostringstream oss; + oss << "queue=\"" << queueName_ << "\" fid=0x" << std::hex << fileSeqNum_ << " wr_size=0x" << wr_size << " foffs=0x0"; + throw jexception(jerrno::JERR__AIO, oss.str(), "JournalFile", "asyncFileHeaderWrite"); + } + addSubmittedDblkCount(QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_DBLKS); + incrOutstandingAioOperationCount(); +} + +void JournalFile::asyncPageWrite(io_context_t ioContextPtr, + aio_cb* aioControlBlockPtr, + void* data, + uint32_t dataSize_dblks) { + const std::size_t wr_size = dataSize_dblks * QLS_DBLK_SIZE_BYTES; + const uint64_t foffs = submittedDblkCount_.get() * QLS_DBLK_SIZE_BYTES; + if (!isOpen()) open(); + aio::prep_pwrite_2(aioControlBlockPtr, fileHandle_, data, wr_size, foffs); + if (!aio::is_aligned(aioControlBlockPtr->u.c.buf, QLS_AIO_ALIGN_BOUNDARY_BYTES)) { + std::ostringstream oss; + oss << "AIO operation on misaligned buffer: iocb->u.c.buf=" << aioControlBlockPtr->u.c.buf << std::endl; + throw jexception(jerrno::JERR__AIO, oss.str(), "JournalFile", "asyncPageWrite"); + } + pmgr::page_cb* pcbp = (pmgr::page_cb*)(aioControlBlockPtr->data); // This page's control block (pcb) + pcbp->_wdblks = dataSize_dblks; + pcbp->_jfp = this; + if (aio::submit(ioContextPtr, 1, &aioControlBlockPtr) < 0) { + std::ostringstream oss; + oss << "queue=\"" << queueName_ << "\" fid=0x" << std::hex << fileSeqNum_ << " wr_size=0x" << wr_size << " foffs=0x" << foffs; + throw jexception(jerrno::JERR__AIO, oss.str(), "JournalFile", "asyncPageWrite"); + } + addSubmittedDblkCount(dataSize_dblks); + incrOutstandingAioOperationCount(); +} + +uint32_t JournalFile::getEnqueuedRecordCount() const { + return enqueuedRecordCount_.get(); +} + +uint32_t JournalFile::incrEnqueuedRecordCount() { + return enqueuedRecordCount_.increment(); +} + +uint32_t JournalFile::decrEnqueuedRecordCount() { + return enqueuedRecordCount_.decrementLimit(); +} + +uint32_t JournalFile::addCompletedDblkCount(const uint32_t a) { + return completedDblkCount_.addLimit(a, submittedDblkCount_.get(), jerrno::JERR_JNLF_CMPLOFFSOVFL); +} + +uint16_t JournalFile::getOutstandingAioOperationCount() const { + return outstandingAioOpsCount_.get(); +} + +uint16_t JournalFile::decrOutstandingAioOperationCount() { + uint16_t r = outstandingAioOpsCount_.decrementLimit(); + if (fileCloseFlag_ && outstandingAioOpsCount_ == 0) { // Delayed close + close(); + } + return r; +} + +efpIdentity_t JournalFile::getEfpIdentity() const { + return efpIdentity_; +} + +uint64_t JournalFile::getFirstRecordOffset() const { + return firstRecordOffset_; +} + +void JournalFile::setFirstRecordOffset(const uint64_t firstRecordOffset) { + firstRecordOffset_ = firstRecordOffset; +} + +// --- Status helper functions --- + +bool JournalFile::isEmpty() const { + return submittedDblkCount_ == 0; +} + +bool JournalFile::isNoEnqueuedRecordsRemaining() const { + return /*!enqueueStarted_ &&*/ // Not part-way through encoding an enqueue + isFullAndComplete() && // Full with all AIO returned + enqueuedRecordCount_ == 0; // No remaining enqueued records +} + +// debug aid +const std::string JournalFile::status_str(const uint8_t indentDepth) const { + std::string indent((size_t)indentDepth, '.'); + std::ostringstream oss; + oss << indent << "JournalFile: fileName=" << getFileName() << std::endl; + oss << indent << " directory=" << getDirectory() << std::endl; + oss << indent << " fileSizeDblks=" << fileSize_dblks_ << std::endl; + oss << indent << " open=" << (isOpen() ? "T" : "F") << std::endl; + oss << indent << " fileHandle=" << fileHandle_ << std::endl; + oss << indent << " enqueuedRecordCount=" << getEnqueuedRecordCount() << std::endl; + oss << indent << " submittedDblkCount=" << getSubmittedDblkCount() << std::endl; + oss << indent << " completedDblkCount=" << getCompletedDblkCount() << std::endl; + oss << indent << " outstandingAioOpsCount=" << getOutstandingAioOperationCount() << std::endl; + oss << indent << " isEmpty()=" << (isEmpty() ? "T" : "F") << std::endl; + oss << indent << " isDataEmpty()=" << (isDataEmpty() ? "T" : "F") << std::endl; + oss << indent << " dblksRemaining()=" << dblksRemaining() << std::endl; + oss << indent << " isFull()=" << (isFull() ? "T" : "F") << std::endl; + oss << indent << " isFullAndComplete()=" << (isFullAndComplete() ? "T" : "F") << std::endl; + oss << indent << " getOutstandingAioDblks()=" << getOutstandingAioDblks() << std::endl; + oss << indent << " getNextFile()=" << (getNextFile() ? "T" : "F") << std::endl; + return oss.str(); +} + +// --- protected functions --- + +const std::string JournalFile::getDirectory() const { + return fqFileName_.substr(0, fqFileName_.rfind('/')); +} + +const std::string JournalFile::getFileName() const { + return fqFileName_.substr(fqFileName_.rfind('/')+1); +} + +//static +uint64_t JournalFile::getRandom64() { + // TODO: ::rand() is not thread safe, either lock or use rand_r(seed) with a thread-local seed. + return ((uint64_t)::rand() << QLS_RAND_SHIFT1) | ((uint64_t)::rand() << QLS_RAND_SHIFT2) | (::rand() & QLS_RAND_MASK); +} + +bool JournalFile::isOpen() const { + return fileHandle_ >= 0; +} + +uint32_t JournalFile::getSubmittedDblkCount() const { + return submittedDblkCount_.get(); +} + +uint32_t JournalFile::addSubmittedDblkCount(const uint32_t a) { + return submittedDblkCount_.addLimit(a, fileSize_dblks_, jerrno::JERR_JNLF_FILEOFFSOVFL); +} + +uint32_t JournalFile::getCompletedDblkCount() const { + return completedDblkCount_.get(); +} + +uint16_t JournalFile::incrOutstandingAioOperationCount() { + return outstandingAioOpsCount_.increment(); +} + +u_int32_t JournalFile::dblksRemaining() const { + return fileSize_dblks_ - submittedDblkCount_; +} + +bool JournalFile::getNextFile() const { + return isFull(); +} + +u_int32_t JournalFile::getOutstandingAioDblks() const { + return submittedDblkCount_ - completedDblkCount_; +} + +bool JournalFile::isDataEmpty() const { + return submittedDblkCount_ <= QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_DBLKS; +} + +bool JournalFile::isFull() const { + return submittedDblkCount_ == fileSize_dblks_; +} + +bool JournalFile::isFullAndComplete() const { + return completedDblkCount_ == fileSize_dblks_; +} + + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/JournalFile.h b/qpid/cpp/src/qpid/linearstore/journal/JournalFile.h new file mode 100644 index 0000000000..e33830ef7f --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/JournalFile.h @@ -0,0 +1,132 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_JOURNALFILE_H_ +#define QPID_LINEARSTORE_JOURNAL_JOURNALFILE_H_ + +#include "qpid/linearstore/journal/aio.h" +#include "qpid/linearstore/journal/AtomicCounter.h" +#include "qpid/linearstore/journal/EmptyFilePoolTypes.h" + +class file_hdr_t; + +namespace qpid { +namespace linearstore { +namespace journal { + +class JournalFile +{ +protected: + const efpIdentity_t efpIdentity_; + const std::string fqFileName_; + const uint64_t fileSeqNum_; + const std::string queueName_; + const uint64_t serial_; + uint64_t firstRecordOffset_; + int fileHandle_; + bool fileCloseFlag_; + void* fileHeaderBasePtr_; + ::file_hdr_t* fileHeaderPtr_; + aio_cb* aioControlBlockPtr_; + uint32_t fileSize_dblks_; ///< File size in data blocks, including file header + bool initializedFlag_; + + AtomicCounter<uint32_t> enqueuedRecordCount_; ///< Count of enqueued records + AtomicCounter<uint32_t> submittedDblkCount_; ///< Write file count (data blocks) for submitted AIO + AtomicCounter<uint32_t> completedDblkCount_; ///< Write file count (data blocks) for completed AIO + AtomicCounter<uint16_t> outstandingAioOpsCount_; ///< Outstanding AIO operations on this file + +public: + // Constructor for creating new file with known fileSeqNum and random serial + JournalFile(const std::string& fqFileName, + const efpIdentity_t& efpIdentity, + const uint64_t fileSeqNum, + const std::string queueName); + // Constructor for recovery in which fileSeqNum and serial are recovered from fileHeader param + JournalFile(const std::string& fqFileName, + const ::file_hdr_t& fileHeader, + const std::string queueName); + virtual ~JournalFile(); + + void initialize(const uint32_t completedDblkCount); + void finalize(); + + const std::string getFqFileName() const; + uint64_t getFileSeqNum() const; + uint64_t getSerial() const; + + int open(); + void close(); + void asyncFileHeaderWrite(io_context_t ioContextPtr, + const efpPartitionNumber_t efpPartitionNumber, + const efpDataSize_kib_t efpDataSize_kib, + const uint16_t userFlags, + const uint64_t recordId, + const uint64_t firstRecordOffset); + void asyncPageWrite(io_context_t ioContextPtr, + aio_cb* aioControlBlockPtr, + void* data, + uint32_t dataSize_dblks); + + uint32_t getSubmittedDblkCount() const; + uint32_t getEnqueuedRecordCount() const; + uint32_t incrEnqueuedRecordCount(); + uint32_t decrEnqueuedRecordCount(); + + uint32_t addCompletedDblkCount(const uint32_t a); + + uint16_t getOutstandingAioOperationCount() const; + uint16_t decrOutstandingAioOperationCount(); + + efpIdentity_t getEfpIdentity() const; + uint64_t getFirstRecordOffset() const; + void setFirstRecordOffset(const uint64_t firstRecordOffset); + + // Status helper functions + bool isEmpty() const; ///< True if no writes of any kind have occurred + bool isNoEnqueuedRecordsRemaining() const; ///< True when all enqueued records (or parts) have been dequeued + + // debug aid + const std::string status_str(const uint8_t indentDepth) const; + +protected: + const std::string getDirectory() const; + const std::string getFileName() const; + static uint64_t getRandom64(); + bool isOpen() const; + + uint32_t addSubmittedDblkCount(const uint32_t a); + + uint32_t getCompletedDblkCount() const; + + uint16_t incrOutstandingAioOperationCount(); + + u_int32_t dblksRemaining() const; ///< Dblks remaining until full + bool getNextFile() const; ///< True when next file is needed + u_int32_t getOutstandingAioDblks() const; ///< Dblks still to be written + bool isDataEmpty() const; ///< True if only file header written, data is still empty + bool isFull() const; ///< True if all possible dblks have been submitted (but may not yet have returned from AIO) + bool isFullAndComplete() const; ///< True if all submitted dblks have returned from AIO +}; + +}}} + +#endif // QPID_LINEARSTORE_JOURNAL_JOURNALFILE_H_ diff --git a/qpid/cpp/src/qpid/linearstore/journal/JournalLog.cpp b/qpid/cpp/src/qpid/linearstore/journal/JournalLog.cpp new file mode 100644 index 0000000000..c35ec97e91 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/JournalLog.cpp @@ -0,0 +1,63 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/JournalLog.h" + +#include <iostream> + +namespace qpid { +namespace linearstore { +namespace journal { + +JournalLog::JournalLog(log_level_t logLevelThreshold) : logLevelThreshold_(logLevelThreshold) {} + +JournalLog::~JournalLog() {} + +void JournalLog::log(const log_level_t logLevel, + const std::string& logStatement) const { + if (logLevel >= logLevelThreshold_) { + std::cerr << log_level_str(logLevel) << ": " << logStatement << std::endl; + } +} + +void JournalLog::log(log_level_t logLevel, + const std::string& journalId, + const std::string& logStatement) const { + if (logLevel >= logLevelThreshold_) { + std::cerr << log_level_str(logLevel) << ": Journal \"" << journalId << "\": " << logStatement << std::endl; + } +} + +const char* JournalLog::log_level_str(log_level_t logLevel) { + switch (logLevel) + { + case LOG_TRACE: return "TRACE"; + case LOG_DEBUG: return "DEBUG"; + case LOG_INFO: return "INFO"; + case LOG_NOTICE: return "NOTICE"; + case LOG_WARN: return "WARN"; + case LOG_ERROR: return "ERROR"; + case LOG_CRITICAL: return "CRITICAL"; + } + return "<log level unknown>"; +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/JournalLog.h b/qpid/cpp/src/qpid/linearstore/journal/JournalLog.h new file mode 100644 index 0000000000..cf503cb9d2 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/JournalLog.h @@ -0,0 +1,60 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_JOURNALLOG_H_ +#define QPID_LINEARSTORE_JOURNAL_JOURNALLOG_H_ + +#include <string> + +namespace qpid { +namespace linearstore { +namespace journal { + +class JournalLog +{ +public: + typedef enum _log_level { + LOG_TRACE = 0, + LOG_DEBUG, + LOG_INFO, + LOG_NOTICE, + LOG_WARN, + LOG_ERROR, + LOG_CRITICAL + } log_level_t; + +protected: + const log_level_t logLevelThreshold_; + +public: + JournalLog(log_level_t logLevelThreshold); + virtual ~JournalLog(); + virtual void log(const log_level_t logLevel, + const std::string& logStatement) const; + virtual void log(const log_level_t logLevel, + const std::string& journalId, + const std::string& logStatement) const; + static const char* log_level_str(const log_level_t logLevel); +}; + +}}} + +#endif // QPID_LINEARSTORE_JOURNAL_JOURNALLOG_H_ diff --git a/qpid/cpp/src/qpid/linearstore/journal/LinearFileController.cpp b/qpid/cpp/src/qpid/linearstore/journal/LinearFileController.cpp new file mode 100644 index 0000000000..08d565ca2e --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/LinearFileController.cpp @@ -0,0 +1,243 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/LinearFileController.h" + +#include "qpid/linearstore/journal/EmptyFilePool.h" +#include "qpid/linearstore/journal/jcntl.h" +#include "qpid/linearstore/journal/JournalFile.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +LinearFileController::LinearFileController(jcntl& jcntlRef) : + jcntlRef_(jcntlRef), + emptyFilePoolPtr_(0), + fileSeqCounter_("LinearFileController::fileSeqCounter", 0), + recordIdCounter_("LinearFileController::recordIdCounter", 0), + decrCounter_("LinearFileController::decrCounter", 0), + currentJournalFilePtr_(0) +{} + +LinearFileController::~LinearFileController() {} + +void LinearFileController::initialize(const std::string& journalDirectory, + EmptyFilePool* emptyFilePoolPtr, + uint64_t initialFileNumberVal) { + journalDirectory_.assign(journalDirectory); + emptyFilePoolPtr_ = emptyFilePoolPtr; + fileSeqCounter_.set(initialFileNumberVal); +} + +void LinearFileController::finalize() { + if (currentJournalFilePtr_) { + currentJournalFilePtr_->close(); + currentJournalFilePtr_ = 0; + } + while (!journalFileList_.empty()) { + delete journalFileList_.front(); + journalFileList_.pop_front(); + } +} + +void LinearFileController::addJournalFile(JournalFile* journalFilePtr, + const uint32_t completedDblkCount, + const bool makeCurrentFlag) { + if (makeCurrentFlag && currentJournalFilePtr_) { + currentJournalFilePtr_->close(); + currentJournalFilePtr_ = 0; + } + journalFilePtr->initialize(completedDblkCount); + { + slock l(journalFileListMutex_); + journalFileList_.push_back(journalFilePtr); + } + if (makeCurrentFlag) { + currentJournalFilePtr_ = journalFilePtr; + } +} + +efpDataSize_sblks_t LinearFileController::dataSize_sblks() const { + return emptyFilePoolPtr_->dataSize_sblks(); +} + +efpFileSize_sblks_t LinearFileController::fileSize_sblks() const { + return emptyFilePoolPtr_->fileSize_sblks(); +} + +void LinearFileController::getNextJournalFile() { + if (currentJournalFilePtr_) + currentJournalFilePtr_->close(); + pullEmptyFileFromEfp(); +} + +uint64_t LinearFileController::getNextRecordId() { + return recordIdCounter_.increment(); +} + +void LinearFileController::removeFileToEfp(const std::string& fileName) { + if (emptyFilePoolPtr_) { + emptyFilePoolPtr_->returnEmptyFileSymlink(fileName); + } +} + +void LinearFileController::restoreEmptyFile(const std::string& fileName) { + // TODO: Add checks that this file is of a valid size; if not, delete this and get one from the EFP + addJournalFile(fileName, emptyFilePoolPtr_->getIdentity(), getNextFileSeqNum(), 0); +} + +void LinearFileController::purgeEmptyFilesToEfp() { + slock l(journalFileListMutex_); + while (journalFileList_.front()->isNoEnqueuedRecordsRemaining() && journalFileList_.size() > 1) { // Can't purge last file, even if it has no enqueued records + emptyFilePoolPtr_->returnEmptyFileSymlink(journalFileList_.front()->getFqFileName()); + delete journalFileList_.front(); + journalFileList_.pop_front(); + } +} + +uint32_t LinearFileController::getEnqueuedRecordCount(const uint64_t fileSeqNumber) { + return find(fileSeqNumber)->getEnqueuedRecordCount(); +} + +uint32_t LinearFileController::incrEnqueuedRecordCount(const uint64_t fileSeqNumber) { + return find(fileSeqNumber)->incrEnqueuedRecordCount(); +} + +uint32_t LinearFileController::decrEnqueuedRecordCount(const uint64_t fileSeqNumber) { + uint32_t r = find(fileSeqNumber)->decrEnqueuedRecordCount(); + + // TODO: Re-evaluate after testing and profiling + // This is the first go at implementing auto-purge, which checks for all trailing empty files and recycles + // them back to the EFP. This version checks every 100 decrements using decrCounter_ (an action which releases + // records). We need to check this rather simple scheme works for outlying scenarios (large and tiny data + // records) without impacting performance or performing badly (leaving excessive empty files in the journals). + if (decrCounter_.increment() % 100ULL == 0ULL) { + purgeEmptyFilesToEfp(); + } + return r; +} + +uint32_t LinearFileController::addWriteCompletedDblkCount(const uint64_t fileSeqNumber, const uint32_t a) { + return find(fileSeqNumber)->addCompletedDblkCount(a); +} + +uint16_t LinearFileController::decrOutstandingAioOperationCount(const uint64_t fileSeqNumber) { + return find(fileSeqNumber)->decrOutstandingAioOperationCount(); +} + +void LinearFileController::asyncFileHeaderWrite(io_context_t ioContextPtr, + const uint16_t userFlags, + const uint64_t recordId, + const uint64_t firstRecordOffset) { + currentJournalFilePtr_->asyncFileHeaderWrite(ioContextPtr, + emptyFilePoolPtr_->getPartitionNumber(), + emptyFilePoolPtr_->dataSize_kib(), + userFlags, + recordId, + firstRecordOffset); +} + +void LinearFileController::asyncPageWrite(io_context_t ioContextPtr, + aio_cb* aioControlBlockPtr, + void* data, + uint32_t dataSize_dblks) { + assertCurrentJournalFileValid("asyncPageWrite"); + currentJournalFilePtr_->asyncPageWrite(ioContextPtr, aioControlBlockPtr, data, dataSize_dblks); +} + +uint64_t LinearFileController::getCurrentFileSeqNum() const { + assertCurrentJournalFileValid("getCurrentFileSeqNum"); + return currentJournalFilePtr_->getFileSeqNum(); +} + +uint64_t LinearFileController::getCurrentSerial() const { + assertCurrentJournalFileValid("getCurrentSerial"); + return currentJournalFilePtr_->getSerial(); +} + +bool LinearFileController::isEmpty() const { + assertCurrentJournalFileValid("isEmpty"); + return currentJournalFilePtr_->isEmpty(); +} + +const std::string LinearFileController::status(const uint8_t indentDepth) const { + std::string indent((size_t)indentDepth, '.'); + std::ostringstream oss; + oss << indent << "LinearFileController: queue=" << jcntlRef_.id() << std::endl; + oss << indent << " journalDirectory=" << journalDirectory_ << std::endl; + oss << indent << " fileSeqCounter=" << fileSeqCounter_.get() << std::endl; + oss << indent << " recordIdCounter=" << recordIdCounter_.get() << std::endl; + oss << indent << " journalFileList.size=" << journalFileList_.size() << std::endl; + if (checkCurrentJournalFileValid()) { + oss << currentJournalFilePtr_->status_str(indentDepth+2); + } else { + oss << indent << " <No current journal file>" << std::endl; + } + return oss.str(); +} + +// --- protected functions --- + +void LinearFileController::addJournalFile(const std::string& fileName, + const efpIdentity_t& efpIdentity, + const uint64_t fileSeqNumber, + const uint32_t completedDblkCount) { + JournalFile* jfp = new JournalFile(fileName, efpIdentity, fileSeqNumber, jcntlRef_.id()); + addJournalFile(jfp, completedDblkCount, true); +} + +void LinearFileController::assertCurrentJournalFileValid(const char* const functionName) const { + if (!checkCurrentJournalFileValid()) { + throw jexception(jerrno::JERR__NULL, "LinearFileController", functionName); + } +} + +bool LinearFileController::checkCurrentJournalFileValid() const { + return currentJournalFilePtr_ != 0; +} + +JournalFile* LinearFileController::find(const uint64_t fileSeqNumber) { + if (currentJournalFilePtr_ && currentJournalFilePtr_->getFileSeqNum() == fileSeqNumber) + return currentJournalFilePtr_; + + slock l(journalFileListMutex_); + for (JournalFileListItr_t i=journalFileList_.begin(); i!=journalFileList_.end(); ++i) { + if ((*i)->getFileSeqNum() == fileSeqNumber) { + return *i; + } + } + + std::ostringstream oss; + oss << "fileSeqNumber=" << fileSeqNumber; + throw jexception(jerrno::JERR_LFCR_SEQNUMNOTFOUND, oss.str(), "LinearFileController", "find"); +} + +uint64_t LinearFileController::getNextFileSeqNum() { + return fileSeqCounter_.increment(); +} + +void LinearFileController::pullEmptyFileFromEfp() { + std::string efn = emptyFilePoolPtr_->takeEmptyFile(journalDirectory_); // Moves file from EFP only (ie no file init), returns new file name + addJournalFile(efn, emptyFilePoolPtr_->getIdentity(), getNextFileSeqNum(), 0); +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/LinearFileController.h b/qpid/cpp/src/qpid/linearstore/journal/LinearFileController.h new file mode 100644 index 0000000000..3cdfb72a37 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/LinearFileController.h @@ -0,0 +1,119 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_LINEARFILECONTROLLER_H_ +#define QPID_LINEARSTORE_JOURNAL_LINEARFILECONTROLLER_H_ + +#include <deque> +#include "qpid/linearstore/journal/aio.h" +#include "qpid/linearstore/journal/AtomicCounter.h" +#include "qpid/linearstore/journal/EmptyFilePoolTypes.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +class EmptyFilePool; +class jcntl; +class JournalFile; + +class LinearFileController +{ +protected: + typedef std::deque<JournalFile*> JournalFileList_t; + typedef JournalFileList_t::iterator JournalFileListItr_t; + + jcntl& jcntlRef_; + std::string journalDirectory_; + EmptyFilePool* emptyFilePoolPtr_; + AtomicCounter<uint64_t> fileSeqCounter_; + AtomicCounter<uint64_t> recordIdCounter_; + AtomicCounter<uint64_t> decrCounter_; + + JournalFileList_t journalFileList_; + JournalFile* currentJournalFilePtr_; + smutex journalFileListMutex_; + +public: + LinearFileController(jcntl& jcntlRef); + virtual ~LinearFileController(); + + void initialize(const std::string& journalDirectory, + EmptyFilePool* emptyFilePoolPtr, + uint64_t initialFileNumberVal); + void finalize(); + + void addJournalFile(JournalFile* journalFilePtr, + const uint32_t completedDblkCount, + const bool makeCurrentFlag); + + efpDataSize_sblks_t dataSize_sblks() const; + efpFileSize_sblks_t fileSize_sblks() const; + void getNextJournalFile(); + uint64_t getNextRecordId(); + void removeFileToEfp(const std::string& fileName); + void restoreEmptyFile(const std::string& fileName); + void purgeEmptyFilesToEfp(); + + // Functions for manipulating counts of non-current JournalFile instances in journalFileList_ + uint32_t getEnqueuedRecordCount(const uint64_t fileSeqNumber); + uint32_t incrEnqueuedRecordCount(const uint64_t fileSeqNumber); + uint32_t decrEnqueuedRecordCount(const uint64_t fileSeqNumber); + uint32_t addWriteCompletedDblkCount(const uint64_t fileSeqNumber, + const uint32_t a); + uint16_t decrOutstandingAioOperationCount(const uint64_t fileSeqNumber); + + // Pass-through functions for current JournalFile class + void asyncFileHeaderWrite(io_context_t ioContextPtr, + const uint16_t userFlags, + const uint64_t recordId, + const uint64_t firstRecordOffset); + void asyncPageWrite(io_context_t ioContextPtr, + aio_cb* aioControlBlockPtr, + void* data, + uint32_t dataSize_dblks); + + uint64_t getCurrentFileSeqNum() const; + uint64_t getCurrentSerial() const; + bool isEmpty() const; + + // Debug aid + const std::string status(const uint8_t indentDepth) const; + +protected: + void addJournalFile(const std::string& fileName, + const efpIdentity_t& efpIdentity, + const uint64_t fileSeqNumber, + const uint32_t completedDblkCount); + void assertCurrentJournalFileValid(const char* const functionName) const; + bool checkCurrentJournalFileValid() const; + JournalFile* find(const uint64_t fileSeqNumber); + uint64_t getNextFileSeqNum(); + void pullEmptyFileFromEfp(); +}; + +typedef void (LinearFileController::*lfcAddJournalFileFn)(JournalFile* journalFilePtr, + const uint32_t completedDblkCount, + const bool makeCurrentFlag); + +}}} + +#endif // QPID_LINEARSTORE_JOURNAL_LINEARFILECONTROLLER_H_ diff --git a/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp b/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp new file mode 100644 index 0000000000..254566e824 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp @@ -0,0 +1,949 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/RecoveryManager.h" + +#include <algorithm> +#include <cstdlib> +#include <iomanip> +#include "qpid/linearstore/journal/Checksum.h" +#include "qpid/linearstore/journal/data_tok.h" +#include "qpid/linearstore/journal/deq_rec.h" +#include "qpid/linearstore/journal/EmptyFilePool.h" +#include "qpid/linearstore/journal/EmptyFilePoolManager.h" +#include "qpid/linearstore/journal/enq_map.h" +#include "qpid/linearstore/journal/enq_rec.h" +#include "qpid/linearstore/journal/jcfg.h" +#include "qpid/linearstore/journal/jdir.h" +#include "qpid/linearstore/journal/JournalFile.h" +#include "qpid/linearstore/journal/JournalLog.h" +#include "qpid/linearstore/journal/jrec.h" +#include "qpid/linearstore/journal/LinearFileController.h" +#include "qpid/linearstore/journal/txn_map.h" +#include "qpid/linearstore/journal/txn_rec.h" +#include "qpid/linearstore/journal/utils/enq_hdr.h" +#include "qpid/linearstore/journal/utils/file_hdr.h" +#include <sstream> +#include <string> +#include <unistd.h> +#include <vector> + +namespace qpid { +namespace linearstore { +namespace journal { + +RecoveredRecordData_t::RecoveredRecordData_t(const uint64_t rid, const uint64_t fid, const std::streampos foffs, bool ptxn) : + recordId_(rid), + fileId_(fid), + fileOffset_(foffs), + pendingTransaction_(ptxn) +{} + +bool recordIdListCompare(RecoveredRecordData_t a, RecoveredRecordData_t b) { + return a.recordId_ < b.recordId_; +} + +RecoveredFileData_t::RecoveredFileData_t(JournalFile* journalFilePtr, const uint32_t completedDblkCount) : + journalFilePtr_(journalFilePtr), + completedDblkCount_(completedDblkCount) +{} + +RecoveryManager::RecoveryManager(const std::string& journalDirectory, + const std::string& queuename, + enq_map& enqueueMapRef, + txn_map& transactionMapRef, + JournalLog& journalLogRef) : + journalDirectory_(journalDirectory), + queueName_(queuename), + enqueueMapRef_(enqueueMapRef), + transactionMapRef_(transactionMapRef), + journalLogRef_(journalLogRef), + journalEmptyFlag_(false), + firstRecordOffset_(0), + endOffset_(0), + highestRecordId_(0ULL), + highestFileNumber_(0ULL), + lastFileFullFlag_(false), + initial_fid_(0), + currentSerial_(0), + efpFileSize_kib_(0) +{} + +RecoveryManager::~RecoveryManager() { + for (fileNumberMapItr_t i = fileNumberMap_.begin(); i != fileNumberMap_.end(); ++i) { + delete i->second; + } + fileNumberMap_.clear(); +} + +void RecoveryManager::analyzeJournals(const std::vector<std::string>* preparedTransactionListPtr, + EmptyFilePoolManager* emptyFilePoolManager, + EmptyFilePool** emptyFilePoolPtrPtr) { + // Analyze file headers of existing journal files + efpIdentity_t efpIdentity; + analyzeJournalFileHeaders(efpIdentity); + + if (journalEmptyFlag_) { + if (uninitFileList_.empty()) { + *emptyFilePoolPtrPtr = emptyFilePoolManager->getEmptyFilePool(0, 0); // Use default EFP + } else { + *emptyFilePoolPtrPtr = emptyFilePoolManager->getEmptyFilePool(efpIdentity); + } + } else { + *emptyFilePoolPtrPtr = emptyFilePoolManager->getEmptyFilePool(efpIdentity); + if (! *emptyFilePoolPtrPtr) { + // TODO: At a later time, this could be used to establish a new pool size provided the partition exists. + // If the partition does not exist, this is always an error. For now, throw an exception, as this should + // not occur in any practical application. Once multiple partitions and mixed EFPs are supported, this + // needs to be resolved. Note that EFP size is always a multiple of QLS_SBLK_SIZE_BYTES (currently 4096 + // bytes, any other value cannot be used and should be rejected as an error. + std::ostringstream oss; + oss << "Invalid EFP identity: Partition=" << efpIdentity.pn_ << " Size=" << efpIdentity.ds_ << "k"; + throw jexception(jerrno::JERR_RCVM_INVALIDEFPID, oss.str(), "RecoveryManager", "analyzeJournals"); + } + efpFileSize_kib_ = (*emptyFilePoolPtrPtr)->fileSize_kib(); + + // Read all records, establish remaining enqueued records + if (inFileStream_.is_open()) { + inFileStream_.close(); + } + while (getNextRecordHeader()) {} + if (inFileStream_.is_open()) { + inFileStream_.close(); + } + + // Check for file full condition + lastFileFullFlag_ = endOffset_ == (std::streamoff)(*emptyFilePoolPtrPtr)->fileSize_kib() * 1024; + + // Remove leading files which have no enqueued records + removeEmptyFiles(*emptyFilePoolPtrPtr); + + // Remove all txns from tmap that are not in the prepared list + if (preparedTransactionListPtr) { + std::vector<std::string> xidList; + transactionMapRef_.xid_list(xidList); + for (std::vector<std::string>::iterator itr = xidList.begin(); itr != xidList.end(); itr++) { + std::vector<std::string>::const_iterator pitr = + std::find(preparedTransactionListPtr->begin(), preparedTransactionListPtr->end(), *itr); + if (pitr == preparedTransactionListPtr->end()) { // not found in prepared list + txn_data_list_t tdl = transactionMapRef_.get_remove_tdata_list(*itr); // tdl will be empty if xid not found + // Unlock any affected enqueues in emap + for (tdl_itr_t i=tdl.begin(); i<tdl.end(); i++) { + if (i->enq_flag_) { // enq op - decrement enqueue count + fileNumberMap_[i->fid_]->journalFilePtr_->decrEnqueuedRecordCount(); + } else if (enqueueMapRef_.is_enqueued(i->drid_, true)) { // deq op - unlock enq record + if (enqueueMapRef_.unlock(i->drid_) < enq_map::EMAP_OK) { // fail + // enq_map::unlock()'s only error is enq_map::EMAP_RID_NOT_FOUND + std::ostringstream oss; + oss << std::hex << "_emap.unlock(): drid=0x\"" << i->drid_; + throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "RecoveryManager", "analyzeJournals"); + } + } + } + } + } + } + prepareRecordList(); + } +} + +std::streamoff RecoveryManager::getEndOffset() const { + return endOffset_; +} + +uint64_t RecoveryManager::getHighestFileNumber() const { + return highestFileNumber_; +} + +uint64_t RecoveryManager::getHighestRecordId() const { + return highestRecordId_; +} + +bool RecoveryManager::isLastFileFull() const { + return lastFileFullFlag_; +} + +bool RecoveryManager::readNextRemainingRecord(void** const dataPtrPtr, + std::size_t& dataSize, + void** const xidPtrPtr, + std::size_t& xidSize, + bool& transient, + bool& external, + data_tok* const dtokp, + bool ignore_pending_txns) { + bool foundRecord = false; + do { + if (recordIdListConstItr_ == recordIdList_.end()) { + return false; + } + if (recordIdListConstItr_->pendingTransaction_ && ignore_pending_txns) { // Pending transaction + ++recordIdListConstItr_; // ignore, go to next record + } else { + foundRecord = true; + } + } while (!foundRecord); + + if (!inFileStream_.is_open() || currentJournalFileItr_->first != recordIdListConstItr_->fileId_) { + if (!getFile(recordIdListConstItr_->fileId_, false)) { + std::ostringstream oss; + oss << "Failed to open file with file-id=" << recordIdListConstItr_->fileId_; + throw jexception(jerrno::JERR__FILEIO, oss.str(), "RecoveryManager", "readNextRemainingRecord"); + } + } + inFileStream_.seekg(recordIdListConstItr_->fileOffset_, std::ifstream::beg); + if (!inFileStream_.good()) { + std::ostringstream oss; + oss << "Could not find offset 0x" << std::hex << recordIdListConstItr_->fileOffset_ << " in file " << getCurrentFileName(); + throw jexception(jerrno::JERR__FILEIO, oss.str(), "RecoveryManager", "readNextRemainingRecord"); + } + + ::enq_hdr_t enqueueHeader; + inFileStream_.read((char*)&enqueueHeader, sizeof(::enq_hdr_t)); + if (inFileStream_.gcount() != sizeof(::enq_hdr_t)) { + std::ostringstream oss; + oss << "Could not read enqueue header from file " << getCurrentFileName() << " at offset 0x" << std::hex << recordIdListConstItr_->fileOffset_; + throw jexception(jerrno::JERR__FILEIO, oss.str(), "RecoveryManager", "readNextRemainingRecord"); + } + // check flags + transient = ::is_enq_transient(&enqueueHeader); + external = ::is_enq_external(&enqueueHeader); + + // read xid + xidSize = enqueueHeader._xidsize; + *xidPtrPtr = ::malloc(xidSize); + if (*xidPtrPtr == 0) { + std::ostringstream oss; + oss << "xidPtr, size=0x" << std::hex << xidSize; + throw jexception(jerrno::JERR__MALLOC, oss.str(), "RecoveryManager", "readNextRemainingRecord"); + } + readJournalData((char*)*xidPtrPtr, xidSize); + + // read data + dataSize = enqueueHeader._dsize; + *dataPtrPtr = ::malloc(dataSize); + if (*xidPtrPtr == 0) { + std::ostringstream oss; + oss << "dataPtr, size=0x" << std::hex << dataSize; + throw jexception(jerrno::JERR__MALLOC, oss.str(), "RecoveryManager", "readNextRemainingRecord"); + } + readJournalData((char*)*dataPtrPtr, dataSize); + + // Check enqueue record checksum + Checksum checksum; + checksum.addData((const unsigned char*)&enqueueHeader, sizeof(::enq_hdr_t)); + if (xidSize > 0) { + checksum.addData((const unsigned char*)*xidPtrPtr, xidSize); + } + if (dataSize > 0) { + checksum.addData((const unsigned char*)*dataPtrPtr, dataSize); + } + ::rec_tail_t enqueueTail; + readJournalData((char*)&enqueueTail, sizeof(::rec_tail_t)); + uint32_t cs = checksum.getChecksum(); + uint16_t res = ::rec_tail_check(&enqueueTail, &enqueueHeader._rhdr, cs); + if (res != 0) { + std::stringstream oss; + oss << "Bad record tail:" << std::hex; + if (res & ::REC_TAIL_MAGIC_ERR_MASK) { + oss << std::endl << " Magic: expected 0x" << ~enqueueHeader._rhdr._magic << "; found 0x" << enqueueTail._xmagic; + } + if (res & ::REC_TAIL_SERIAL_ERR_MASK) { + oss << std::endl << " Serial: expected 0x" << enqueueHeader._rhdr._serial << "; found 0x" << enqueueTail._serial; + } + if (res & ::REC_TAIL_RID_ERR_MASK) { + oss << std::endl << " Record Id: expected 0x" << enqueueHeader._rhdr._rid << "; found 0x" << enqueueTail._rid; + } + if (res & ::REC_TAIL_CHECKSUM_ERR_MASK) { + oss << std::endl << " Checksum: expected 0x" << cs << "; found 0x" << enqueueTail._checksum; + } + throw jexception(jerrno::JERR_JREC_BADRECTAIL, oss.str(), "RecoveryManager", "readNextRemainingRecord"); // TODO: Don't throw exception, log info + } + + // Set data token + dtokp->set_wstate(data_tok::ENQ); + dtokp->set_rid(enqueueHeader._rhdr._rid); + dtokp->set_dsize(dataSize); + if (xidSize) { + dtokp->set_xid(*xidPtrPtr, xidSize); + } + + ++recordIdListConstItr_; + return true; +} + +void RecoveryManager::recoveryComplete() { + if(inFileStream_.is_open()) { + inFileStream_.close(); + } +} + +void RecoveryManager::setLinearFileControllerJournals(lfcAddJournalFileFn fnPtr, + LinearFileController* lfcPtr) { + if (journalEmptyFlag_) { + if (uninitFileList_.size() > 0) { + // TODO: Handle case if uninitFileList_.size() > 1, but this should not happen in normal operation. Here we assume only one item in the list. + std::string uninitFile = uninitFileList_.back(); + uninitFileList_.pop_back(); + lfcPtr->restoreEmptyFile(uninitFile); + } + } else { + if (initial_fid_ == 0) { + throw jexception(jerrno::JERR_RCVM_NULLFID, "RecoveryManager", "setLinearFileControllerJournals"); + } + for (fileNumberMapConstItr_t i = fileNumberMap_.begin(); i != fileNumberMap_.end(); ++i) { + (lfcPtr->*fnPtr)(i->second->journalFilePtr_, i->second->completedDblkCount_, i->first == initial_fid_); + } + } + + std::ostringstream oss; + bool logFlag = !notNeededFilesList_.empty(); + if (logFlag) { + oss << "Files removed from head of journal: prior truncation during recovery:"; + } + while (!notNeededFilesList_.empty()) { + lfcPtr->removeFileToEfp(notNeededFilesList_.back()); + oss << std::endl << " * " << notNeededFilesList_.back(); + notNeededFilesList_.pop_back(); + } + if (logFlag) { + journalLogRef_.log(JournalLog::LOG_NOTICE, queueName_, oss.str()); + } +} + +std::string RecoveryManager::toString(const std::string& jid, const uint16_t indent) const { + std::string indentStr(indent, ' '); + std::ostringstream oss; + oss << std::endl << indentStr << "Journal recovery analysis (jid=\"" << jid << "\"):" << std::endl; + if (journalEmptyFlag_) { + oss << indentStr << "<Journal empty, no journal files found>" << std::endl; + } else { + oss << indentStr << std::setw(7) << "file_id" + << std::setw(43) << "file_name" + << std::setw(12) << "record_cnt" + << std::setw(16) << "fro" + << std::setw(12) << "efp_id" + << std::endl; + oss << indentStr << std::setw(7) << "-------" + << std::setw(43) << "-----------------------------------------" + << std::setw(12) << "----------" + << std::setw(16) << "--------------" + << std::setw(12) << "----------" + << std::endl; + uint32_t totalRecordCount(0UL); + for (fileNumberMapConstItr_t k=fileNumberMap_.begin(); k!=fileNumberMap_.end(); ++k) { + std::string fqFileName = k->second->journalFilePtr_->getFqFileName(); + std::ostringstream fid; + fid << std::hex << "0x" << k->first; + std::ostringstream fro; + fro << std::hex << "0x" << k->second->journalFilePtr_->getFirstRecordOffset(); + oss << indentStr << std::setw(7) << fid.str() + << std::setw(43) << fqFileName.substr(fqFileName.rfind('/')+1) + << std::setw(12) << k->second->journalFilePtr_->getEnqueuedRecordCount() + << std::setw(16) << fro.str() + << std::setw(12) << k->second->journalFilePtr_->getEfpIdentity() + << std::endl; + totalRecordCount += k->second->journalFilePtr_->getEnqueuedRecordCount(); + } + oss << indentStr << std::setw(62) << "----------" << std::endl; + oss << indentStr << std::setw(62) << totalRecordCount << std::endl; + oss << indentStr << "First record offset in first file = 0x" << std::hex << firstRecordOffset_ << + std::dec << " (" << (firstRecordOffset_/QLS_DBLK_SIZE_BYTES) << " dblks)" << std::endl; + oss << indentStr << "End offset in last file = 0x" << std::hex << endOffset_ << std::dec << " (" << + (endOffset_/QLS_DBLK_SIZE_BYTES) << " dblks)" << std::endl; + oss << indentStr << "Highest rid found = 0x" << std::hex << highestRecordId_ << std::dec << std::endl; + oss << indentStr << "Last file full = " << (lastFileFullFlag_ ? "TRUE" : "FALSE") << std::endl; + } + return oss.str(); +} + +// --- protected functions --- + +void RecoveryManager::analyzeJournalFileHeaders(efpIdentity_t& efpIdentity) { + std::string headerQueueName; + ::file_hdr_t fileHeader; + stringList_t directoryList; + jdir::read_dir(journalDirectory_, directoryList, false, true, false, true); + for (stringListConstItr_t i = directoryList.begin(); i != directoryList.end(); ++i) { + bool hdrOk = readJournalFileHeader(*i, fileHeader, headerQueueName); + bool hdrEmpty = ::is_file_hdr_reset(&fileHeader); + if (!hdrOk) { + std::ostringstream oss; + oss << "Journal file " << (*i) << " is corrupted or invalid"; + journalLogRef_.log(JournalLog::LOG_WARN, queueName_, oss.str()); + } else if (hdrEmpty) { + // Read symlink, find efp directory name which is efp size in KiB + // TODO: place this bit into a common function as it is also used in EmptyFilePool.cpp::deleteSymlink() + char buff[1024]; + ssize_t len = ::readlink((*i).c_str(), buff, 1024); + if (len < 0) { + std::ostringstream oss; + oss << "symlink=\"" << (*i) << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR__SYMLINK, oss.str(), "RecoveryManager", "analyzeJournalFileHeaders"); + } + // Find second and third '/' from back of string, which contains the EFP directory name + *(::strrchr(buff, '/')) = '\0'; + *(::strrchr(buff, '/')) = '\0'; + int efpDataSize_kib = atoi(::strrchr(buff, '/') + 1); + uninitFileList_.push_back(*i); + efpIdentity.pn_ = fileHeader._efp_partition; + efpIdentity.ds_ = efpDataSize_kib; + } else if (headerQueueName.compare(queueName_) != 0) { + std::ostringstream oss; + oss << "Journal file " << (*i) << " belongs to queue \"" << headerQueueName << "\": ignoring"; + journalLogRef_.log(JournalLog::LOG_WARN, queueName_, oss.str()); + } else { + JournalFile* jfp = new JournalFile(*i, fileHeader, queueName_); + std::pair<fileNumberMapItr_t, bool> res = fileNumberMap_.insert( + std::pair<uint64_t, RecoveredFileData_t*>(fileHeader._file_number, new RecoveredFileData_t(jfp, 0))); + if (!res.second) { + std::ostringstream oss; + oss << "Journal file " << (*i) << " has fid=0x" << std::hex << jfp->getFileSeqNum() << " which already exists for this journal."; + throw jexception(oss.str()); // TODO: complete this exception + } + if (fileHeader._file_number > highestFileNumber_) { + highestFileNumber_ = fileHeader._file_number; + } + // TODO: Logic weak here for detecting error conditions in journal, specifically when no + // valid files exist, or files from mixed EFPs. Currently last read file header determines + // efpIdentity. + efpIdentity.pn_ = fileHeader._efp_partition; + efpIdentity.ds_ = fileHeader._data_size_kib; + } + } + +//std::cerr << "*** RecoveryManager::analyzeJournalFileHeaders() fileNumberMap_.size()=" << fileNumberMap_.size() << std::endl; // DEBUG + if (fileNumberMap_.empty()) { + journalEmptyFlag_ = true; + } else { + currentJournalFileItr_ = fileNumberMap_.begin(); + } +} + +void RecoveryManager::checkFileStreamOk(bool checkEof) { + if (inFileStream_.fail() || inFileStream_.bad() || checkEof ? inFileStream_.eof() : false) { + std::ostringstream oss; + oss << "Stream status: fail=" << (inFileStream_.fail()?"T":"F") << " bad=" << (inFileStream_.bad()?"T":"F"); + if (checkEof) { + oss << " eof=" << (inFileStream_.eof()?"T":"F"); + } + throw jexception(jerrno::JERR_RCVM_STREAMBAD, oss.str(), "RecoveryManager", "checkFileStreamOk"); + } +} + +void RecoveryManager::checkJournalAlignment(const uint64_t start_fid, const std::streampos recordPosition) { + if (recordPosition % QLS_DBLK_SIZE_BYTES != 0) { + std::ostringstream oss; + oss << "Current read pointer not dblk aligned: recordPosition=0x" << std::hex << recordPosition; + oss << " (dblk alignment offset = 0x" << (recordPosition % QLS_DBLK_SIZE_BYTES); + throw jexception(jerrno::JERR_RCVM_NOTDBLKALIGNED, oss.str(), "RecoveryManager", "checkJournalAlignment"); + } + std::streampos currentPosn = recordPosition; + unsigned sblkOffset = currentPosn % QLS_SBLK_SIZE_BYTES; + if (sblkOffset) + { + std::ostringstream oss1; + oss1 << std::hex << "Bad record alignment found at fid=0x" << start_fid; + oss1 << " offs=0x" << currentPosn << " (likely journal overwrite boundary); " << std::dec; + oss1 << (QLS_SBLK_SIZE_DBLKS - (sblkOffset/QLS_DBLK_SIZE_BYTES)) << " filler record(s) required."; + journalLogRef_.log(JournalLog::LOG_WARN, queueName_, oss1.str()); + + fileNumberMapConstItr_t fnmItr = fileNumberMap_.find(start_fid); + std::ofstream outFileStream(fnmItr->second->journalFilePtr_->getFqFileName().c_str(), std::ios_base::in | std::ios_base::out | std::ios_base::binary); + if (!outFileStream.good()) { + throw jexception(jerrno::JERR__FILEIO, getCurrentFileName(), "RecoveryManager", "checkJournalAlignment"); + } + outFileStream.seekp(currentPosn); + + // Prepare write buffer containing a single empty record (1 dblk) + void* writeBuffer = std::malloc(QLS_DBLK_SIZE_BYTES); + if (writeBuffer == 0) { + throw jexception(jerrno::JERR__MALLOC, "RecoveryManager", "checkJournalAlignment"); + } + const uint32_t xmagic = QLS_EMPTY_MAGIC; + ::memcpy(writeBuffer, (const void*)&xmagic, sizeof(xmagic)); + ::memset((char*)writeBuffer + sizeof(xmagic), QLS_CLEAN_CHAR, QLS_DBLK_SIZE_BYTES - sizeof(xmagic)); + + // Write as many empty records as are needed to get to sblk boundary + while (currentPosn % QLS_SBLK_SIZE_BYTES) { + outFileStream.write((const char*)writeBuffer, QLS_DBLK_SIZE_BYTES); + if (outFileStream.fail()) { + throw jexception(jerrno::JERR_RCVM_WRITE, "RecoveryManager", "checkJournalAlignment"); + } + std::ostringstream oss2; + oss2 << std::hex << "Recover phase write: Wrote filler record: fid=0x" << start_fid; + oss2 << " offs=0x" << currentPosn; + journalLogRef_.log(JournalLog::LOG_NOTICE, queueName_, oss2.str()); + currentPosn = outFileStream.tellp(); + } + outFileStream.close(); + std::free(writeBuffer); + journalLogRef_.log(JournalLog::LOG_INFO, queueName_, "Bad record alignment fixed."); + } + lastRecord(start_fid, currentPosn); +} + +bool RecoveryManager::decodeRecord(jrec& record, + std::size_t& cumulativeSizeRead, + ::rec_hdr_t& headerRecord, + const uint64_t start_fid, + const std::streampos recordOffset) +{ + if (highestRecordId_ == 0) { + highestRecordId_ = headerRecord._rid; + } else if (headerRecord._rid - highestRecordId_ < 0x8000000000000000ULL) { // RFC 1982 comparison for unsigned 64-bit + highestRecordId_ = headerRecord._rid; + } + + bool done = false; + while (!done) { + try { + done = record.decode(headerRecord, &inFileStream_, cumulativeSizeRead, recordOffset); + } + catch (const jexception& e) { + if (e.err_code() == jerrno::JERR_JREC_BADRECTAIL) { + std::ostringstream oss; + oss << jerrno::err_msg(e.err_code()) << e.additional_info(); + journalLogRef_.log(JournalLog::LOG_INFO, queueName_, oss.str()); + } else { + journalLogRef_.log(JournalLog::LOG_INFO, queueName_, e.what()); + } + checkJournalAlignment(start_fid, recordOffset); + return false; + } + if (!done && needNextFile()) { + if (!getNextFile(false)) { + checkJournalAlignment(start_fid, recordOffset); + return false; + } + } + } + return true; +} + +std::string RecoveryManager::getCurrentFileName() const { + return currentJournalFileItr_->second->journalFilePtr_->getFqFileName(); +} + +uint64_t RecoveryManager::getCurrentFileNumber() const { + return currentJournalFileItr_->first; +} + +bool RecoveryManager::getFile(const uint64_t fileNumber, bool jumpToFirstRecordOffsetFlag) { + if (inFileStream_.is_open()) { + inFileStream_.close(); +//std::cout << " f=" << getCurrentFileName() << "]" << std::flush; // DEBUG + inFileStream_.clear(); // clear eof flag, req'd for older versions of c++ + } + currentJournalFileItr_ = fileNumberMap_.find(fileNumber); + if (currentJournalFileItr_ == fileNumberMap_.end()) { + return false; + } + inFileStream_.open(getCurrentFileName().c_str(), std::ios_base::in | std::ios_base::binary); + if (!inFileStream_.good()) { + throw jexception(jerrno::JERR__FILEIO, getCurrentFileName(), "RecoveryManager", "getFile"); + } +//std::cout << " [F=" << getCurrentFileName() << std::flush; // DEBUG + + if (!readFileHeader()) { + return false; + } + std::streamoff foffs = jumpToFirstRecordOffsetFlag ? firstRecordOffset_ : QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES; + inFileStream_.seekg(foffs); + return true; +} + +bool RecoveryManager::getNextFile(bool jumpToFirstRecordOffsetFlag) { + if (fileNumberMap_.empty()) { + return false; + } + if (inFileStream_.is_open()) { + inFileStream_.close(); +//std::cout << " .f=" << getCurrentFileName() << "]" << std::flush; // DEBUG + currentJournalFileItr_->second->completedDblkCount_ = efpFileSize_kib_ * 1024 / QLS_DBLK_SIZE_BYTES; + if (++currentJournalFileItr_ == fileNumberMap_.end()) { + return false; + } + inFileStream_.clear(); // clear eof flag, req'd for older versions of c++ + } + inFileStream_.open(getCurrentFileName().c_str(), std::ios_base::in | std::ios_base::binary); + if (!inFileStream_.good()) { + throw jexception(jerrno::JERR__FILEIO, getCurrentFileName(), "RecoveryManager", "getNextFile"); + } +//std::cout << " [.F=" << getCurrentFileName() << std::flush; // DEBUG + + if (!readFileHeader()) { + return false; + } + std::streamoff foffs = jumpToFirstRecordOffsetFlag ? firstRecordOffset_ : QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES; + inFileStream_.seekg(foffs); + return true; +} + +bool RecoveryManager::getNextRecordHeader() +{ + std::size_t cum_size_read = 0; + void* xidp = 0; + rec_hdr_t h; + + bool hdr_ok = false; + uint64_t file_id = currentJournalFileItr_->second->journalFilePtr_->getFileSeqNum(); + std::streampos file_pos = 0; + if (inFileStream_.is_open()) { + inFileStream_.clear(); + file_pos = inFileStream_.tellg(); + } + if (file_pos == std::streampos(-1)) { + std::ostringstream oss; + oss << "tellg() failure: fail=" << (inFileStream_.fail()?"T":"F") << " bad=" << (inFileStream_.bad()?"T":"F"); + oss << " eof=" << (inFileStream_.eof()?"T":"F") << " good=" << (inFileStream_.good()?"T":"F"); + oss << " rdstate=0x" << std::hex << inFileStream_.rdstate() << std::dec; + throw jexception(jerrno::JERR_RCVM_STREAMBAD, oss.str(), "RecoveryManager", "getNextRecordHeader"); + } + while (!hdr_ok) { + if (needNextFile()) { + if (!getNextFile(true)) { + lastRecord(file_id, file_pos); + return false; + } + } + file_id = currentJournalFileItr_->second->journalFilePtr_->getFileSeqNum(); + file_pos = inFileStream_.tellg(); + if (file_pos == std::streampos(-1)) { + std::ostringstream oss; + oss << "tellg() failure: fail=" << (inFileStream_.fail()?"T":"F") << " bad=" << (inFileStream_.bad()?"T":"F"); + oss << " eof=" << (inFileStream_.eof()?"T":"F") << " good=" << (inFileStream_.good()?"T":"F"); + oss << " rdstate=0x" << std::hex << inFileStream_.rdstate() << std::dec; + throw jexception(jerrno::JERR_RCVM_STREAMBAD, oss.str(), "RecoveryManager", "getNextRecordHeader"); + } + inFileStream_.read((char*)&h, sizeof(rec_hdr_t)); + if (inFileStream_.gcount() == sizeof(rec_hdr_t)) { + hdr_ok = true; + } else { + if (needNextFile()) { + if (!getNextFile(true)) { + lastRecord(file_id, file_pos); + return false; + } + } + } + } + + uint64_t start_fid = getCurrentFileNumber(); // fid may increment in decode() if record folds over file boundary + switch(h._magic) { + case QLS_ENQ_MAGIC: + { +//std::cout << " 0x" << std::hex << file_pos << ".e.0x" << h._rid << std::dec << std::flush; // DEBUG + if (::rec_hdr_check(&h, QLS_ENQ_MAGIC, QLS_JRNL_VERSION, currentSerial_) != 0) { + checkJournalAlignment(file_id, file_pos); + return false; + } + enq_rec er; + if (!decodeRecord(er, cum_size_read, h, start_fid, file_pos)) { + return false; + } + if (!er.is_transient()) { // Ignore transient msgs + fileNumberMap_[start_fid]->journalFilePtr_->incrEnqueuedRecordCount(); + if (er.xid_size()) { + er.get_xid(&xidp); + if (xidp == 0) { + throw jexception(jerrno::JERR_RCVM_NULLXID, "ENQ", "RecoveryManager", "getNextRecordHeader"); + } + std::string xid((char*)xidp, er.xid_size()); + transactionMapRef_.insert_txn_data(xid, txn_data_t(h._rid, 0, start_fid, file_pos, true, false, false)); + if (transactionMapRef_.set_aio_compl(xid, h._rid) < txn_map::TMAP_OK) { // fail - xid or rid not found + std::ostringstream oss; + oss << std::hex << "_tmap.set_aio_compl: txn_enq xid=\"" << xid << "\" rid=0x" << h._rid; + throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "RecoveryManager", "getNextRecordHeader"); + } + } else { + if (enqueueMapRef_.insert_pfid(h._rid, start_fid, file_pos) < enq_map::EMAP_OK) { // fail + // The only error code emap::insert_pfid() returns is enq_map::EMAP_DUP_RID. + std::ostringstream oss; + oss << std::hex << "rid=0x" << h._rid << " _pfid=0x" << start_fid; + throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "RecoveryManager", "getNextRecordHeader"); + } + } + } + } + break; + case QLS_DEQ_MAGIC: + { +//std::cout << " 0x" << std::hex << file_pos << ".d.0x" << h._rid << std::dec << std::flush; // DEBUG + if (::rec_hdr_check(&h, QLS_DEQ_MAGIC, QLS_JRNL_VERSION, currentSerial_) != 0) { + checkJournalAlignment(file_id, file_pos); + return false; + } + deq_rec dr; + if (!decodeRecord(dr, cum_size_read, h, start_fid, file_pos)) { + return false; + } + if (dr.xid_size()) { + // If the enqueue is part of a pending txn, it will not yet be in emap + enqueueMapRef_.lock(dr.deq_rid()); // ignore not found error + dr.get_xid(&xidp); + if (xidp == 0) { + throw jexception(jerrno::JERR_RCVM_NULLXID, "DEQ", "RecoveryManager", "getNextRecordHeader"); + } + std::string xid((char*)xidp, dr.xid_size()); + transactionMapRef_.insert_txn_data(xid, txn_data_t(dr.rid(), dr.deq_rid(), start_fid, file_pos, + false, false, dr.is_txn_coml_commit())); + if (transactionMapRef_.set_aio_compl(xid, dr.rid()) < txn_map::TMAP_OK) { // fail - xid or rid not found + std::ostringstream oss; + oss << std::hex << "_tmap.set_aio_compl: txn_deq xid=\"" << xid << "\" rid=0x" << dr.rid(); + throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "RecoveryManager", "getNextRecordHeader"); + } + } else { + uint64_t enq_fid; + if (enqueueMapRef_.get_remove_pfid(dr.deq_rid(), enq_fid, true) == enq_map::EMAP_OK) { // ignore not found error + fileNumberMap_[enq_fid]->journalFilePtr_->decrEnqueuedRecordCount(); + } + } + } + break; + case QLS_TXA_MAGIC: + { +//std::cout << " 0x" << std::hex << file_pos << ".a.0x" << h._rid << std::dec << std::flush; // DEBUG + if (::rec_hdr_check(&h, QLS_TXA_MAGIC, QLS_JRNL_VERSION, currentSerial_) != 0) { + checkJournalAlignment(file_id, file_pos); + return false; + } + txn_rec ar; + if (!decodeRecord(ar, cum_size_read, h, start_fid, file_pos)) { + return false; + } + // Delete this txn from tmap, unlock any locked records in emap + ar.get_xid(&xidp); + if (xidp == 0) { + throw jexception(jerrno::JERR_RCVM_NULLXID, "ABT", "RecoveryManager", "getNextRecordHeader"); + } + std::string xid((char*)xidp, ar.xid_size()); + txn_data_list_t tdl = transactionMapRef_.get_remove_tdata_list(xid); // tdl will be empty if xid not found + for (tdl_itr_t itr = tdl.begin(); itr != tdl.end(); itr++) { + if (itr->enq_flag_) { + fileNumberMap_[itr->fid_]->journalFilePtr_->decrEnqueuedRecordCount(); + } else { + enqueueMapRef_.unlock(itr->drid_); // ignore not found error + } + } + } + break; + case QLS_TXC_MAGIC: + { +//std::cout << " 0x" << std::hex << file_pos << ".c.0x" << h._rid << std::dec << std::flush; // DEBUG + if (::rec_hdr_check(&h, QLS_TXC_MAGIC, QLS_JRNL_VERSION, currentSerial_) != 0) { + checkJournalAlignment(file_id, file_pos); + return false; + } + txn_rec cr; + if (!decodeRecord(cr, cum_size_read, h, start_fid, file_pos)) { + return false; + } + // Delete this txn from tmap, process records into emap + cr.get_xid(&xidp); + if (xidp == 0) { + throw jexception(jerrno::JERR_RCVM_NULLXID, "CMT", "RecoveryManager", "getNextRecordHeader"); + } + std::string xid((char*)xidp, cr.xid_size()); + txn_data_list_t tdl = transactionMapRef_.get_remove_tdata_list(xid); // tdl will be empty if xid not found + for (tdl_itr_t itr = tdl.begin(); itr != tdl.end(); itr++) { + if (itr->enq_flag_) { // txn enqueue +//std::cout << "[rid=0x" << std::hex << itr->rid_ << std::dec << " fid=" << itr->fid_ << " fpos=0x" << std::hex << itr->foffs_ << "]" << std::dec << std::flush; // DEBUG + if (enqueueMapRef_.insert_pfid(itr->rid_, itr->fid_, itr->foffs_) < enq_map::EMAP_OK) { // fail + // The only error code emap::insert_pfid() returns is enq_map::EMAP_DUP_RID. + std::ostringstream oss; + oss << std::hex << "rid=0x" << itr->rid_ << " _pfid=0x" << itr->fid_; + throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "RecoveryManager", "getNextRecordHeader"); + } + } else { // txn dequeue + uint64_t enq_fid; + if (enqueueMapRef_.get_remove_pfid(itr->drid_, enq_fid, true) == enq_map::EMAP_OK) // ignore not found error + fileNumberMap_[enq_fid]->journalFilePtr_->decrEnqueuedRecordCount(); + } + } + } + break; + case QLS_EMPTY_MAGIC: + { +//std::cout << ".x" << std::flush; // DEBUG + uint32_t rec_dblks = jrec::size_dblks(sizeof(::rec_hdr_t)); + inFileStream_.ignore(rec_dblks * QLS_DBLK_SIZE_BYTES - sizeof(::rec_hdr_t)); + checkFileStreamOk(false); + if (needNextFile()) { + file_pos += rec_dblks * QLS_DBLK_SIZE_BYTES; + if (!getNextFile(false)) { + lastRecord(start_fid, file_pos); + return false; + } + } + } + break; + case 0: +//std::cout << " 0x" << std::hex << file_pos << ".0" << std::dec << std::endl << std::flush; // DEBUG + checkJournalAlignment(getCurrentFileNumber(), file_pos); + return false; + default: +//std::cout << " 0x" << std::hex << file_pos << ".?" << std::dec << std::endl << std::flush; // DEBUG + // Stop as this is the overwrite boundary. + checkJournalAlignment(getCurrentFileNumber(), file_pos); + return false; + } + return true; +} + +void RecoveryManager::lastRecord(const uint64_t file_id, const std::streamoff endOffset) { + endOffset_ = endOffset; + initial_fid_ = file_id; + fileNumberMap_[file_id]->completedDblkCount_ = endOffset_ / QLS_DBLK_SIZE_BYTES; + + // Remove any files in fileNumberMap_ beyond initial_fid_ + fileNumberMapItr_t unwantedFirstItr = fileNumberMap_.find(file_id); + if (++unwantedFirstItr != fileNumberMap_.end()) { + fileNumberMapItr_t itr = unwantedFirstItr; + notNeededFilesList_.push_back(unwantedFirstItr->second->journalFilePtr_->getFqFileName()); + while (++itr != fileNumberMap_.end()) { + notNeededFilesList_.push_back(itr->second->journalFilePtr_->getFqFileName()); + delete itr->second->journalFilePtr_; + delete itr->second; + } + fileNumberMap_.erase(unwantedFirstItr, fileNumberMap_.end()); + } +} + +bool RecoveryManager::needNextFile() { + if (inFileStream_.is_open()) { + return inFileStream_.eof() || inFileStream_.tellg() >= std::streampos(efpFileSize_kib_ * 1024); + } + return true; +} + +void RecoveryManager::prepareRecordList() { + // Set up recordIdList_ from enqueue map and transaction map + recordIdList_.clear(); + + // Extract records from enqueue list + std::vector<uint64_t> ridList; + enqueueMapRef_.rid_list(ridList); + qpid::linearstore::journal::enq_map::emap_data_struct_t eds; + for (std::vector<uint64_t>::const_iterator i=ridList.begin(); i!=ridList.end(); ++i) { + enqueueMapRef_.get_data(*i, eds); + recordIdList_.push_back(RecoveredRecordData_t(*i, eds._pfid, eds._file_posn, false)); + } + + // Extract records from pending transaction enqueues + std::vector<std::string> xidList; + transactionMapRef_.xid_list(xidList); + for (std::vector<std::string>::const_iterator j=xidList.begin(); j!=xidList.end(); ++j) { + qpid::linearstore::journal::txn_data_list_t tdsl = transactionMapRef_.get_tdata_list(*j); + for (qpid::linearstore::journal::tdl_itr_t k=tdsl.begin(); k!=tdsl.end(); ++k) { + if (k->enq_flag_) { + recordIdList_.push_back(RecoveredRecordData_t(k->rid_, k->fid_, k->foffs_, true)); + } + } + } + + std::sort(recordIdList_.begin(), recordIdList_.end(), recordIdListCompare); + recordIdListConstItr_ = recordIdList_.begin(); +} + +void RecoveryManager::readJournalData(char* target, + const std::streamsize readSize) { + std::streamoff bytesRead = 0; + while (bytesRead < readSize) { + std::streampos file_pos = inFileStream_.tellg(); + if (file_pos == std::streampos(-1)) { + std::ostringstream oss; + oss << "tellg() failure: fail=" << (inFileStream_.fail()?"T":"F") << " bad=" << (inFileStream_.bad()?"T":"F"); + throw jexception(jerrno::JERR_RCVM_STREAMBAD, oss.str(), "RecoveryManager", "readJournalData"); + } + inFileStream_.read(target + bytesRead, readSize - bytesRead); + std::streamoff thisReadSize = inFileStream_.gcount(); + if (thisReadSize < readSize) { + if (needNextFile()) { + getNextFile(false); + } + file_pos = inFileStream_.tellg(); + if (file_pos == std::streampos(-1)) { + std::ostringstream oss; + oss << "tellg() failure: fail=" << (inFileStream_.fail()?"T":"F") << " bad=" << (inFileStream_.bad()?"T":"F"); + throw jexception(jerrno::JERR_RCVM_STREAMBAD, oss.str(), "RecoveryManager", "readJournalData"); + } + } + bytesRead += thisReadSize; + } +} + +bool RecoveryManager::readFileHeader() { + file_hdr_t fhdr; + inFileStream_.read((char*)&fhdr, sizeof(fhdr)); + checkFileStreamOk(true); + if (::file_hdr_check(&fhdr, QLS_FILE_MAGIC, QLS_JRNL_VERSION, efpFileSize_kib_, QLS_MAX_QUEUE_NAME_LEN) != 0) { + firstRecordOffset_ = fhdr._fro; + currentSerial_ = fhdr._rhdr._serial; + } else { + inFileStream_.close(); + if (currentJournalFileItr_ == fileNumberMap_.begin()) { + journalEmptyFlag_ = true; + } + return false; + } + return true; +} + +// static private +bool RecoveryManager::readJournalFileHeader(const std::string& journalFileName, + ::file_hdr_t& fileHeaderRef, + std::string& queueName) { + const std::size_t headerBlockSize = QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_KIB * 1024; + char buffer[headerBlockSize]; + std::ifstream ifs(journalFileName.c_str(), std::ifstream::in | std::ifstream::binary); + if (!ifs.good()) { + std::ostringstream oss; + oss << "File=" << journalFileName; + throw jexception(jerrno::JERR_RCVM_OPENRD, oss.str(), "RecoveryManager", "readJournalFileHeader"); + } + ifs.read(buffer, headerBlockSize); + if (!ifs) { + std::streamsize s = ifs.gcount(); + ifs.close(); + std::ostringstream oss; + oss << "File=" << journalFileName << "; attempted_read_size=" << headerBlockSize << "; actual_read_size=" << s; + throw jexception(jerrno::JERR_RCVM_READ, oss.str(), "RecoveryManager", "readJournalFileHeader"); + } + ifs.close(); + ::memcpy(&fileHeaderRef, buffer, sizeof(::file_hdr_t)); + if (::file_hdr_check(&fileHeaderRef, QLS_FILE_MAGIC, QLS_JRNL_VERSION, 0, QLS_MAX_QUEUE_NAME_LEN)) { + return false; + } + queueName.assign(buffer + sizeof(::file_hdr_t), fileHeaderRef._queue_name_len); + return true; +} + +void RecoveryManager::removeEmptyFiles(EmptyFilePool* emptyFilePoolPtr) { + while (fileNumberMap_.begin()->second->journalFilePtr_->getEnqueuedRecordCount() == 0 && fileNumberMap_.size() > 1) { + RecoveredFileData_t* rfdp = fileNumberMap_.begin()->second; + emptyFilePoolPtr->returnEmptyFileSymlink(rfdp->journalFilePtr_->getFqFileName()); + delete rfdp->journalFilePtr_; + delete rfdp; + fileNumberMap_.erase(fileNumberMap_.begin()->first); + } +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.h b/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.h new file mode 100644 index 0000000000..55cc6f8329 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.h @@ -0,0 +1,157 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_RECOVERYSTATE_H_ +#define QPID_LINEARSTORE_JOURNAL_RECOVERYSTATE_H_ + +#include <fstream> +#include <map> +#include "qpid/linearstore/journal/LinearFileController.h" +#include <stdint.h> +#include <vector> + +struct file_hdr_t; +struct rec_hdr_t; + +namespace qpid { +namespace linearstore { +namespace journal { + +class data_tok; +class enq_map; +class EmptyFilePool; +class EmptyFilePoolManager; +class JournalLog; +class jrec; +class txn_map; + +struct RecoveredRecordData_t { + uint64_t recordId_; + uint64_t fileId_; + std::streampos fileOffset_; + bool pendingTransaction_; + RecoveredRecordData_t(const uint64_t rid, const uint64_t fid, const std::streampos foffs, bool ptxn); +}; + +struct RecoveredFileData_t { + JournalFile* journalFilePtr_; + uint32_t completedDblkCount_; + RecoveredFileData_t(JournalFile* journalFilePtr, const uint32_t completedDblkCount); +}; + +bool recordIdListCompare(RecoveredRecordData_t a, RecoveredRecordData_t b); + +class RecoveryManager +{ +protected: + // Types + typedef std::vector<std::string> stringList_t; + typedef stringList_t::const_iterator stringListConstItr_t; + typedef std::map<uint64_t, RecoveredFileData_t*> fileNumberMap_t; + typedef fileNumberMap_t::iterator fileNumberMapItr_t; + typedef fileNumberMap_t::const_iterator fileNumberMapConstItr_t; + typedef std::vector<RecoveredRecordData_t> recordIdList_t; + typedef recordIdList_t::const_iterator recordIdListConstItr_t; + + // Location and identity + const std::string journalDirectory_; + const std::string queueName_; + enq_map& enqueueMapRef_; + txn_map& transactionMapRef_; + JournalLog& journalLogRef_; + + // Initial journal analysis data + fileNumberMap_t fileNumberMap_; ///< File number - JournalFilePtr map + stringList_t notNeededFilesList_; ///< Files not needed and to be returned to EFP + stringList_t uninitFileList_; ///< File name of uninitialized journal files found during header analysis + bool journalEmptyFlag_; ///< Journal data files empty + std::streamoff firstRecordOffset_; ///< First record offset in ffid + std::streamoff endOffset_; ///< End offset (first byte past last record) + uint64_t highestRecordId_; ///< Highest rid found + uint64_t highestFileNumber_; ///< Highest file number found + bool lastFileFullFlag_; ///< Last file is full + uint64_t initial_fid_; ///< File id where initial write after recovery will occur + + // State for recovery of individual enqueued records + uint64_t currentSerial_; + uint32_t efpFileSize_kib_; + fileNumberMapConstItr_t currentJournalFileItr_; + std::string currentFileName_; + std::ifstream inFileStream_; + recordIdList_t recordIdList_; + recordIdListConstItr_t recordIdListConstItr_; + +public: + RecoveryManager(const std::string& journalDirectory, + const std::string& queuename, + enq_map& enqueueMapRef, + txn_map& transactionMapRef, + JournalLog& journalLogRef); + virtual ~RecoveryManager(); + + void analyzeJournals(const std::vector<std::string>* preparedTransactionListPtr, + EmptyFilePoolManager* emptyFilePoolManager, + EmptyFilePool** emptyFilePoolPtrPtr); + std::streamoff getEndOffset() const; + uint64_t getHighestFileNumber() const; + uint64_t getHighestRecordId() const; + bool isLastFileFull() const; + bool readNextRemainingRecord(void** const dataPtrPtr, + std::size_t& dataSize, + void** const xidPtrPtr, + std::size_t& xidSize, + bool& transient, + bool& external, + data_tok* const dtokp, + bool ignore_pending_txns); + void recoveryComplete(); + void setLinearFileControllerJournals(lfcAddJournalFileFn fnPtr, + LinearFileController* lfcPtr); + std::string toString(const std::string& jid, const uint16_t indent) const; +protected: + void analyzeJournalFileHeaders(efpIdentity_t& efpIdentity); + void checkFileStreamOk(bool checkEof); + void checkJournalAlignment(const uint64_t start_fid, const std::streampos recordPosition); + bool decodeRecord(jrec& record, + std::size_t& cumulativeSizeRead, + ::rec_hdr_t& recordHeader, + const uint64_t start_fid, + const std::streampos recordOffset); + std::string getCurrentFileName() const; + uint64_t getCurrentFileNumber() const; + bool getFile(const uint64_t fileNumber, bool jumpToFirstRecordOffsetFlag); + bool getNextFile(bool jumpToFirstRecordOffsetFlag); + bool getNextRecordHeader(); + void lastRecord(const uint64_t file_id, const std::streamoff endOffset); + bool needNextFile(); + void prepareRecordList(); + bool readFileHeader(); + void readJournalData(char* target, const std::streamsize size); + void removeEmptyFiles(EmptyFilePool* emptyFilePoolPtr); + + static bool readJournalFileHeader(const std::string& journalFileName, + ::file_hdr_t& fileHeaderRef, + std::string& queueName); +}; + +}}} + +#endif // QPID_LINEARSTORE_JOURNAL_RECOVERYSTATE_H_ diff --git a/qpid/cpp/src/qpid/linearstore/journal/aio.h b/qpid/cpp/src/qpid/linearstore/journal/aio.h new file mode 100644 index 0000000000..14589e7580 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/aio.h @@ -0,0 +1,201 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_AIO_H +#define QPID_LINEARSTORE_JOURNAL_AIO_H + +#include <libaio.h> +#include <cstring> +#include <stdint.h> + +namespace qpid { +namespace linearstore { +namespace journal { + +typedef iocb aio_cb; +typedef io_event aio_event; + +/** + * \brief This class is a C++ wrapper class for the libaio functions used by the journal. Note that only those + * functions used by the journal are included here. This is not a complete implementation of all libaio functions. + */ +class aio +{ +public: + /* + * \brief Initialize an AIO context. Causes kernel resources to be initialized for + * AIO operations. + * + * \param maxevents The maximum number of events to be handled + * \param ctxp Pointer to context struct to be initialized + */ + static inline int queue_init(int maxevents, io_context_t* ctxp) + { + return ::io_queue_init(maxevents, ctxp); + } + + /* + * \brief Release an AIO context. Causes kernel resources previously initialized to + * be released. + * + * \param ctx AIO context struct to be released + */ + static inline int queue_release(io_context_t ctx) + { + return ::io_queue_release(ctx); + } + + /* + * \brief Submit asynchronous I/O blocks for processing + * + * The io_submit() system call queues nr I/O request blocks for processing in the AIO context ctx. + * The iocbpp argument should be an array of nr AIO control blocks, which will be submitted to context ctx. + * + * \param ctx AIO context + * \param nr Number of AIO operations + * \param aios Array of nr pointers to AIO control blocks, one for each AIO operation + * \return On success, io_submit() returns the number of iocbs submitted (which may be 0 if nr is zero). + * A negative number indicates an error: + * - -EAGAIN Insufficient resources are available to queue any iocbs. + * - -EBADF The file descriptor specified in the first iocb is invalid. + * - -EFAULT One of the data structures points to invalid data. + * - -EINVAL The AIO context specified by ctx_id is invalid. nr is less than 0. The iocb at *iocbpp[0] + * is not properly initialized, or the operation specified is invalid for the file descriptor + * in the iocb. + */ + static inline int submit(io_context_t ctx, long nr, aio_cb* aios[]) + { + return ::io_submit(ctx, nr, aios); + } + + /* + * \brief Get list of completed AIO operations + * + * The io_getevents() system call attempts to read at least min_nr events and up to nr events from the + * completion queue of the AIO context specified by ctx_id. The timeout argument specifies the amount of time + * to wait for events, where a NULL timeout waits until at least min_nr events have been seen. Note that timeout + * is relative. + * + * \param ctx AIO context + * \param min_nr Minimum number of events to return, will wait until min_nr events are accumulated or until timeout + * \param nr Number of events to return + * \param events Pointer to array of aio_event structs, one for each completed event + * \param timeout Time to wait for min_nr events; 0 will cause an indefinite wait for min_nr events + * \return On success, number of events read: 0 if no events are available, or less than min_nr + * if the timeout has elapsed. A negative number indicates an error: + * - -EFAULT Either events or timeout is an invalid pointer. + * - -EINVAL ctx_id is invalid. min_nr is out of range or nr is out of range. + * - -EINTR Interrupted by a signal handler; see signal(7). + */ + static inline int getevents(io_context_t ctx, long min_nr, long nr, aio_event* events, timespec* const timeout) + { + return ::io_getevents(ctx, min_nr, nr, events, timeout); + } + + /** + * \brief This function allows iocbs to be initialized with a pointer that can be re-used. This prepares an + * aio_cb struct for read use. (This is a wrapper for libaio's ::io_prep_pread() function.) + * + * \param aiocbp Pointer to the aio_cb struct to be prepared. + * \param fd File descriptor to be used for read. + * \param buf Pointer to buffer in which read data is to be placed. MUST BE PAGE_ALIGNED. + * \param count Number of bytes to read - buffer must be large enough. + * \param offset Offset within file from which data will be read. + */ + static inline void prep_pread(aio_cb* aiocbp, int fd, void* buf, std::size_t count, int64_t offset) + { + ::io_prep_pread(aiocbp, fd, buf, count, offset); + } + + /** + * \brief Special version of libaio's io_prep_pread() which preserves the value of the data pointer. This allows + * iocbs to be initialized with a pointer that can be re-used. This prepares a aio_cb struct for read use. + * + * \param aiocbp Pointer to the aio_cb struct to be prepared. + * \param fd File descriptor to be used for read. + * \param buf Pointer to buffer in which read data is to be placed. MUST BE PAGE_ALIGNED. + * \param count Number of bytes to read - buffer must be large enough. + * \param offset Offset within file from which data will be read. + */ + static inline void prep_pread_2(aio_cb* aiocbp, int fd, void* buf, std::size_t count, int64_t offset) + { + std::memset((void*) ((char*) aiocbp + sizeof(void*)), 0, sizeof(aio_cb) - sizeof(void*)); + aiocbp->aio_fildes = fd; + aiocbp->aio_lio_opcode = IO_CMD_PREAD; + aiocbp->aio_reqprio = 0; + aiocbp->u.c.buf = buf; + aiocbp->u.c.nbytes = count; + aiocbp->u.c.offset = offset; + } + + /** + * \brief This function allows iocbs to be initialized with a pointer that can be re-used. This function prepares + * an aio_cb struct for write use. (This is a wrapper for libaio's ::io_prep_pwrite() function.) + * + * \param aiocbp Pointer to the aio_cb struct to be prepared. + * \param fd File descriptor to be used for write. + * \param buf Pointer to buffer in which data to be written is located. MUST BE PAGE_ALIGNED. + * \param count Number of bytes to write. + * \param offset Offset within file to which data will be written. + */ + static inline void prep_pwrite(aio_cb* aiocbp, int fd, void* buf, std::size_t count, int64_t offset) + { + ::io_prep_pwrite(aiocbp, fd, buf, count, offset); + } + + /** + * \brief Special version of libaio's io_prep_pwrite() which preserves the value of the data pointer. This allows + * iocbs to be initialized with a pointer that can be re-used. This function prepares an aio_cb struct for write + * use. + * + * \param aiocbp Pointer to the aio_cb struct to be prepared. + * \param fd File descriptor to be used for write. + * \param buf Pointer to buffer in which data to be written is located. MUST BE PAGE_ALIGNED. + * \param count Number of bytes to write. + * \param offset Offset within file to which data will be written. + */ + static inline void prep_pwrite_2(aio_cb* aiocbp, int fd, void* buf, std::size_t count, int64_t offset) + { + std::memset((void*) ((char*) aiocbp + sizeof(void*)), 0, sizeof(aio_cb) - sizeof(void*)); + aiocbp->aio_fildes = fd; + aiocbp->aio_lio_opcode = IO_CMD_PWRITE; + aiocbp->aio_reqprio = 0; + aiocbp->u.c.buf = buf; + aiocbp->u.c.nbytes = count; + aiocbp->u.c.offset = offset; + } + + /** + * \brief Function to check the alignment of memory. + * + * \param ptr Pointer to be checked + * \param byte_count Alignment count (or boundary) + * \returns true if ptr is aligned with byte_count, false otherwise + */ + static inline bool is_aligned(const void* ptr, uint64_t byte_count) + { + return ((uintptr_t)(ptr)) % (byte_count) == 0; + } +}; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_AIO_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/aio_callback.h b/qpid/cpp/src/qpid/linearstore/journal/aio_callback.h new file mode 100644 index 0000000000..f21b62617b --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/aio_callback.h @@ -0,0 +1,44 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_AIO_CALLBACK_H +#define QPID_LINEARSTORE_JOURNAL_AIO_CALLBACK_H + +#include <stdint.h> +#include <vector> + +namespace qpid { +namespace linearstore { +namespace journal { + +class data_tok; + +class aio_callback +{ +public: + virtual ~aio_callback() {} + virtual void wr_aio_cb(std::vector<data_tok*>& dtokl) = 0; + virtual void rd_aio_cb(std::vector<uint16_t>& pil) = 0; +}; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_AIO_CALLBACK_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/data_tok.cpp b/qpid/cpp/src/qpid/linearstore/journal/data_tok.cpp new file mode 100644 index 0000000000..3952c403a1 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/data_tok.cpp @@ -0,0 +1,136 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/data_tok.h" + +#include <iomanip> +#include "qpid/linearstore/journal/slock.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +// Static members + +uint64_t data_tok::_cnt = 0; +smutex data_tok::_mutex; + +data_tok::data_tok(): + _wstate(NONE), + _dsize(0), + _dblks_written(0), + _pg_cnt(0), + _fid(0), + _rid(0), + _xid(), + _dequeue_rid(0), + _external_rid(false) +{ + slock s(_mutex); + _icnt = _cnt++; +} + +data_tok::~data_tok() {} + +const char* +data_tok::wstate_str() const +{ + return wstate_str(_wstate); +} + +const char* +data_tok::wstate_str(write_state wstate) +{ + switch (wstate) + { + case NONE: + return "NONE"; + case ENQ_CACHED: + return "ENQ_CACHED"; + case ENQ_PART: + return "ENQ_PART"; + case ENQ_SUBM: + return "ENQ_SUBM"; + case ENQ: + return "ENQ"; + case DEQ_CACHED: + return "DEQ_CACHED"; + case DEQ_PART: + return "DEQ_PART"; + case DEQ_SUBM: + return "DEQ_SUBM"; + case DEQ: + return "DEQ"; + case ABORT_CACHED: + return "ABORT_CACHED"; + case ABORT_PART: + return "ABORT_PART"; + case ABORT_SUBM: + return "ABORT_SUBM"; + case ABORTED: + return "ABORTED"; + case COMMIT_CACHED: + return "COMMIT_CACHED"; + case COMMIT_PART: + return "COMMIT_PART"; + case COMMIT_SUBM: + return "COMMIT_SUBM"; + case COMMITTED: + return "COMMITTED"; + } + // Not using default: forces compiler to ensure all cases are covered. + return "<wstate unknown>"; +} + +void +data_tok::reset() +{ + _wstate = NONE; + _dsize = 0; + _dblks_written = 0; + _pg_cnt = 0; + _fid = 0; + _rid = 0; + _xid.clear(); +} + +// debug aid +std::string +data_tok::status_str() const +{ + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + oss << "dtok id=0x" << _icnt << "; ws=" << wstate_str()/* << "; rs=" << rstate_str()*/; + oss << "; fid=0x" << _fid << "; rid=0x" << _rid << "; xid="; + for (unsigned i=0; i<_xid.size(); i++) + { + if (isprint(_xid[i])) + oss << _xid[i]; + else + oss << "/" << std::setw(2) << (int)((char)_xid[i]); + } + oss << "; drid=0x" << _dequeue_rid << " extrid=" << (_external_rid?"T":"F"); + oss << "; ds=0x" << _dsize << "; dw=0x" << _dblks_written/* << "; dr=0x" << _dblks_read*/; + oss << "; pc=0x" << _pg_cnt; + return oss.str(); +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/data_tok.h b/qpid/cpp/src/qpid/linearstore/journal/data_tok.h new file mode 100644 index 0000000000..67e0ec9683 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/data_tok.h @@ -0,0 +1,133 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_DATA_TOK_H +#define QPID_LINEARSTORE_JOURNAL_DATA_TOK_H + +namespace qpid { +namespace linearstore { +namespace journal { +class data_tok; +}}} + +#include <cassert> +#include "qpid/linearstore/journal/smutex.h" + +namespace qpid { +namespace linearstore { +namespace journal { + + /** + * \class data_tok + * \brief Data block token (data_tok) used to track wstate of a data block through asynchronous + * I/O process + */ + class data_tok + { + public: + // TODO: Fix this, separate write state from operation + // ie: wstate = NONE, CACHED, PART, SUBM, COMPL + // op = ENQUEUE, DEQUEUE, ABORT, COMMIT + enum write_state + { + NONE, ///< Data block not sent to journal + ENQ_CACHED, ///< Data block enqueue written to page cache + ENQ_PART, ///< Data block part-submitted to AIO, waiting for page buffer to free up + ENQ_SUBM, ///< Data block enqueue submitted to AIO + ENQ, ///< Data block enqueue AIO write complete (enqueue complete) + DEQ_CACHED, ///< Data block dequeue written to page cache + DEQ_PART, ///< Data block part-submitted to AIO, waiting for page buffer to free up + DEQ_SUBM, ///< Data block dequeue submitted to AIO + DEQ, ///< Data block dequeue AIO write complete (dequeue complete) + ABORT_CACHED, + ABORT_PART, + ABORT_SUBM, + ABORTED, + COMMIT_CACHED, + COMMIT_PART, + COMMIT_SUBM, + COMMITTED + }; + + protected: + static smutex _mutex; + static uint64_t _cnt; + uint64_t _icnt; + write_state _wstate; ///< Enqueued / dequeued state of data + std::size_t _dsize; ///< Data size in bytes + uint32_t _dblks_written; ///< Data blocks read/written + uint32_t _pg_cnt; ///< Page counter - incr for each page containing part of data + uint64_t _fid; ///< FID containing header of enqueue record + uint64_t _rid; ///< RID of data set by enqueue operation + std::string _xid; ///< XID set by enqueue operation + uint64_t _dequeue_rid; ///< RID of data set by dequeue operation + bool _external_rid; ///< Flag to indicate external setting of rid + + public: + data_tok(); + virtual ~data_tok(); + + inline uint64_t id() const { return _icnt; } + inline write_state wstate() const { return _wstate; } + const char* wstate_str() const; + static const char* wstate_str(write_state wstate); + inline bool is_writable() const { return _wstate == NONE || _wstate == ENQ_PART; } + inline bool is_enqueued() const { return _wstate == ENQ; } + inline bool is_readable() const { return _wstate == ENQ; } + inline bool is_dequeueable() const { return _wstate == ENQ || _wstate == DEQ_PART; } + inline void set_wstate(const write_state wstate) { _wstate = wstate; } + inline std::size_t dsize() const { return _dsize; } + inline void set_dsize(std::size_t dsize) { _dsize = dsize; } + + inline uint32_t dblocks_written() const { return _dblks_written; } + inline void incr_dblocks_written(uint32_t dblks_written) + { _dblks_written += dblks_written; } + inline void set_dblocks_written(uint32_t dblks_written) { _dblks_written = dblks_written; } + + inline uint32_t pg_cnt() const { return _pg_cnt; } + inline uint32_t incr_pg_cnt() { return ++_pg_cnt; } + inline uint32_t decr_pg_cnt() { assert(_pg_cnt != 0); return --_pg_cnt; } + + inline uint64_t fid() const { return _fid; } + inline void set_fid(const uint64_t fid) { _fid = fid; } + inline uint64_t rid() const { return _rid; } + inline void set_rid(const uint64_t rid) { _rid = rid; } + inline uint64_t dequeue_rid() const {return _dequeue_rid; } + inline void set_dequeue_rid(const uint64_t rid) { _dequeue_rid = rid; } + inline bool external_rid() const { return _external_rid; } + inline void set_external_rid(const bool external_rid) { _external_rid = external_rid; } + + inline bool has_xid() const { return !_xid.empty(); } + inline const std::string& xid() const { return _xid; } + inline void clear_xid() { _xid.clear(); } + inline void set_xid(const std::string& xid) { _xid.assign(xid); } + inline void set_xid(const void* xidp, const std::size_t xid_len) + { _xid.assign((const char*)xidp, xid_len); } + + void reset(); + + // debug aid + std::string status_str() const; + }; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_DATA_TOK_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/deq_rec.cpp b/qpid/cpp/src/qpid/linearstore/journal/deq_rec.cpp new file mode 100644 index 0000000000..90ca27d082 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/deq_rec.cpp @@ -0,0 +1,313 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/deq_rec.h" + +#include <cassert> +#include <cstring> +#include "qpid/linearstore/journal/Checksum.h" +#include "qpid/linearstore/journal/jexception.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +deq_rec::deq_rec(): + _xidp(0), + _xid_buff(0) +{ + ::deq_hdr_init(&_deq_hdr, QLS_DEQ_MAGIC, QLS_JRNL_VERSION, 0, 0, 0, 0, 0); + ::rec_tail_copy(&_deq_tail, &_deq_hdr._rhdr, 0); +} + +deq_rec::~deq_rec() +{ + clean(); +} + +void +deq_rec::reset(const uint64_t serial, const uint64_t rid, const uint64_t drid, const void* const xidp, + const std::size_t xidlen, const bool txn_coml_commit) +{ + _deq_hdr._rhdr._serial = serial; + _deq_hdr._rhdr._rid = rid; + ::set_txn_coml_commit(&_deq_hdr, txn_coml_commit); + _deq_hdr._deq_rid = drid; + _deq_hdr._xidsize = xidlen; + _xidp = xidp; + _xid_buff = 0; + _deq_tail._serial = serial; + _deq_tail._rid = rid; + _deq_tail._checksum = 0UL; +} + +uint32_t +deq_rec::encode(void* wptr, uint32_t rec_offs_dblks, uint32_t max_size_dblks, Checksum& checksum) +{ + assert(wptr != 0); + assert(max_size_dblks > 0); + if (_xidp == 0) + assert(_deq_hdr._xidsize == 0); + + std::size_t rec_offs = rec_offs_dblks * QLS_DBLK_SIZE_BYTES; + std::size_t rem = max_size_dblks * QLS_DBLK_SIZE_BYTES; + std::size_t wr_cnt = 0; + + if (rec_offs_dblks) // Continuation of split dequeue record (over 2 or more pages) + { + if (size_dblks(rec_size()) - rec_offs_dblks > max_size_dblks) // Further split required + { + rec_offs -= sizeof(_deq_hdr); + std::size_t wsize = _deq_hdr._xidsize > rec_offs ? _deq_hdr._xidsize - rec_offs : 0; + std::size_t wsize2 = wsize; + if (wsize) + { + if (wsize > rem) + wsize = rem; + std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize); + wr_cnt += wsize; + rem -= wsize; + } + rec_offs -= _deq_hdr._xidsize - wsize2; + checksum.addData((unsigned char*)wptr, wr_cnt); + if (rem) + { + _deq_tail._checksum = checksum.getChecksum(); + wsize = sizeof(_deq_tail) > rec_offs ? sizeof(_deq_tail) - rec_offs : 0; + wsize2 = wsize; + if (wsize) + { + if (wsize > rem) + wsize = rem; + std::memcpy((char*)wptr + wr_cnt, (char*)&_deq_tail + rec_offs, wsize); + wr_cnt += wsize; + rem -= wsize; + } + rec_offs -= sizeof(_deq_tail) - wsize2; + } + assert(rem == 0); + assert(rec_offs == 0); + } + else // No further split required + { + rec_offs -= sizeof(_deq_hdr); + std::size_t wsize = _deq_hdr._xidsize > rec_offs ? _deq_hdr._xidsize - rec_offs : 0; + if (wsize) + { + std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize); + wr_cnt += wsize; + checksum.addData((unsigned char*)wptr, wr_cnt); + } + rec_offs -= _deq_hdr._xidsize - wsize; + _deq_tail._checksum = checksum.getChecksum(); + wsize = sizeof(_deq_tail) > rec_offs ? sizeof(_deq_tail) - rec_offs : 0; + if (wsize) + { + std::memcpy((char*)wptr + wr_cnt, (char*)&_deq_tail + rec_offs, wsize); + wr_cnt += wsize; +#ifdef QLS_CLEAN + std::size_t rec_offs = rec_offs_dblks * QLS_DBLK_SIZE_BYTES; + std::size_t dblk_rec_size = size_dblks(rec_size() - rec_offs) * QLS_DBLK_SIZE_BYTES; + std::memset((char*)wptr + wr_cnt, QLS_CLEAN_CHAR, dblk_rec_size - wr_cnt); +#endif + } + rec_offs -= sizeof(_deq_tail) - wsize; + assert(rec_offs == 0); + } + } + else // Start at beginning of data record + { + // Assumption: the header will always fit into the first dblk + std::memcpy(wptr, (void*)&_deq_hdr, sizeof(_deq_hdr)); + wr_cnt = sizeof(_deq_hdr); + if (size_dblks(rec_size()) > max_size_dblks) // Split required - can only occur with xid + { + std::size_t wsize; + rem -= sizeof(_deq_hdr); + if (rem) + { + wsize = rem >= _deq_hdr._xidsize ? _deq_hdr._xidsize : rem; + std::memcpy((char*)wptr + wr_cnt, _xidp, wsize); + wr_cnt += wsize; + rem -= wsize; + } + checksum.addData((unsigned char*)wptr, wr_cnt); + if (rem) + { + _deq_tail._checksum = checksum.getChecksum(); + wsize = rem >= sizeof(_deq_tail) ? sizeof(_deq_tail) : rem; + std::memcpy((char*)wptr + wr_cnt, (void*)&_deq_tail, wsize); + wr_cnt += wsize; + rem -= wsize; + } + assert(rem == 0); + } + else // No split required + { + if (_deq_hdr._xidsize) + { + std::memcpy((char*)wptr + wr_cnt, _xidp, _deq_hdr._xidsize); + wr_cnt += _deq_hdr._xidsize; + checksum.addData((unsigned char*)wptr, wr_cnt); + _deq_tail._checksum = checksum.getChecksum(); + std::memcpy((char*)wptr + wr_cnt, (void*)&_deq_tail, sizeof(_deq_tail)); + wr_cnt += sizeof(_deq_tail); + } +#ifdef QLS_CLEAN + std::size_t dblk_rec_size = size_dblks(rec_size()) * QLS_DBLK_SIZE_BYTES; + std::memset((char*)wptr + wr_cnt, QLS_CLEAN_CHAR, dblk_rec_size - wr_cnt); +#endif + } + } + return size_dblks(wr_cnt); +} + +bool +deq_rec::decode(::rec_hdr_t& h, std::ifstream* ifsp, std::size_t& rec_offs, const std::streampos rec_start) +{ + if (rec_offs == 0) + { + ::rec_hdr_copy(&_deq_hdr._rhdr, &h); + ifsp->read((char*)&_deq_hdr._deq_rid, sizeof(_deq_hdr._deq_rid)); + ifsp->read((char*)&_deq_hdr._xidsize, sizeof(_deq_hdr._xidsize)); + rec_offs = sizeof(::deq_hdr_t); + // Read header, allocate (if req'd) for xid + if (_deq_hdr._xidsize) + { + _xid_buff = std::malloc(_deq_hdr._xidsize); + MALLOC_CHK(_xid_buff, "_buff", "enq_rec", "rcv_decode"); + } + } + if (rec_offs < sizeof(_deq_hdr) + _deq_hdr._xidsize) + { + // Read xid (or continue reading xid) + std::size_t offs = rec_offs - sizeof(_deq_hdr); + ifsp->read((char*)_xid_buff + offs, _deq_hdr._xidsize - offs); + std::size_t size_read = ifsp->gcount(); + rec_offs += size_read; + if (size_read < _deq_hdr._xidsize - offs) + { + assert(ifsp->eof()); + // As we may have read past eof, turn off fail bit + ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit)); + assert(!ifsp->fail() && !ifsp->bad()); + return false; + } + } + if (rec_offs < sizeof(_deq_hdr) + + (_deq_hdr._xidsize ? _deq_hdr._xidsize + sizeof(rec_tail_t) : 0)) + { + // Read tail (or continue reading tail) + std::size_t offs = rec_offs - sizeof(_deq_hdr) - _deq_hdr._xidsize; + ifsp->read((char*)&_deq_tail + offs, sizeof(rec_tail_t) - offs); + std::size_t size_read = ifsp->gcount(); + rec_offs += size_read; + if (size_read < sizeof(rec_tail_t) - offs) + { + assert(ifsp->eof()); + // As we may have read past eof, turn off fail bit + ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit)); + assert(!ifsp->fail() && !ifsp->bad()); + return false; + } + check_rec_tail(rec_start); + } + ifsp->ignore(rec_size_dblks() * QLS_DBLK_SIZE_BYTES - rec_size()); + assert(!ifsp->fail() && !ifsp->bad()); + return true; +} + +std::size_t +deq_rec::get_xid(void** const xidpp) +{ + if (!_xid_buff) + { + *xidpp = 0; + return 0; + } + *xidpp = _xid_buff; + return _deq_hdr._xidsize; +} + +std::string& +deq_rec::str(std::string& str) const +{ + std::ostringstream oss; + oss << "deq_rec: m=" << _deq_hdr._rhdr._magic; + oss << " v=" << (int)_deq_hdr._rhdr._version; + oss << " rid=" << _deq_hdr._rhdr._rid; + oss << " drid=" << _deq_hdr._deq_rid; + if (_xidp) + oss << " xid=\"" << _xidp << "\""; + str.append(oss.str()); + return str; +} + +std::size_t +deq_rec::xid_size() const +{ + return _deq_hdr._xidsize; +} + +std::size_t +deq_rec::rec_size() const +{ + return sizeof(deq_hdr_t) + (_deq_hdr._xidsize ? _deq_hdr._xidsize + sizeof(rec_tail_t) : 0); +} + +void +deq_rec::check_rec_tail(const std::streampos rec_start) const { + Checksum checksum; + checksum.addData((const unsigned char*)&_deq_hdr, sizeof(::deq_hdr_t)); + if (_deq_hdr._xidsize > 0) { + checksum.addData((const unsigned char*)_xid_buff, _deq_hdr._xidsize); + } + uint32_t cs = checksum.getChecksum(); + uint16_t res = ::rec_tail_check(&_deq_tail, &_deq_hdr._rhdr, cs); + if (res != 0) { + std::stringstream oss; + oss << std::endl << " Record offset: 0x" << std::hex << rec_start; + if (res & ::REC_TAIL_MAGIC_ERR_MASK) { + oss << std::endl << " Magic: expected 0x" << ~_deq_hdr._rhdr._magic << "; found 0x" << _deq_tail._xmagic; + } + if (res & ::REC_TAIL_SERIAL_ERR_MASK) { + oss << std::endl << " Serial: expected 0x" << _deq_hdr._rhdr._serial << "; found 0x" << _deq_tail._serial; + } + if (res & ::REC_TAIL_RID_ERR_MASK) { + oss << std::endl << " Record Id: expected 0x" << _deq_hdr._rhdr._rid << "; found 0x" << _deq_tail._rid; + } + if (res & ::REC_TAIL_CHECKSUM_ERR_MASK) { + oss << std::endl << " Checksum: expected 0x" << cs << "; found 0x" << _deq_tail._checksum; + } + throw jexception(jerrno::JERR_JREC_BADRECTAIL, oss.str(), "deq_rec", "check_rec_tail"); + } +} + +void +deq_rec::clean() +{ + if (_xid_buff) { + std::free(_xid_buff); + _xid_buff = 0; + } +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/deq_rec.h b/qpid/cpp/src/qpid/linearstore/journal/deq_rec.h new file mode 100644 index 0000000000..9f55032e76 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/deq_rec.h @@ -0,0 +1,70 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_DEQ_REQ_H +#define QPID_LINEARSTORE_JOURNAL_DEQ_REQ_H + +#include "qpid/linearstore/journal/jrec.h" +#include "qpid/linearstore/journal/utils/deq_hdr.h" +#include "qpid/linearstore/journal/utils/rec_tail.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +/** +* \class deq_rec +* \brief Class to handle a single journal dequeue record. +*/ +class deq_rec : public jrec +{ +private: + ::deq_hdr_t _deq_hdr; ///< Local instance of dequeue header struct + const void* _xidp; ///< xid pointer for encoding (writing to disk) + void* _xid_buff; ///< Pointer to buffer to receive xid read from disk + ::rec_tail_t _deq_tail; ///< Local instance of enqueue tail struct, only encoded if XID is present + +public: + deq_rec(); + virtual ~deq_rec(); + + void reset(const uint64_t serial, const uint64_t rid, const uint64_t drid, const void* const xidp, + const std::size_t xidlen, const bool txn_coml_commit); + uint32_t encode(void* wptr, uint32_t rec_offs_dblks, uint32_t max_size_dblks, Checksum& checksum); + bool decode(::rec_hdr_t& h, std::ifstream* ifsp, std::size_t& rec_offs, const std::streampos rec_start); + + inline bool is_txn_coml_commit() const { return ::is_txn_coml_commit(&_deq_hdr); } + inline uint64_t rid() const { return _deq_hdr._rhdr._rid; } + inline uint64_t deq_rid() const { return _deq_hdr._deq_rid; } + std::size_t get_xid(void** const xidpp); + std::string& str(std::string& str) const; + inline std::size_t data_size() const { return 0; } // This record never carries data + std::size_t xid_size() const; + std::size_t rec_size() const; + void check_rec_tail(const std::streampos rec_start) const; + +private: + virtual void clean(); +}; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_DEQ_REQ_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/enq_map.cpp b/qpid/cpp/src/qpid/linearstore/journal/enq_map.cpp new file mode 100644 index 0000000000..4eaaa64992 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/enq_map.cpp @@ -0,0 +1,181 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/enq_map.h" + +#include "qpid/linearstore/journal/slock.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +// static return/error codes +int16_t enq_map::EMAP_DUP_RID = -3; +int16_t enq_map::EMAP_LOCKED = -2; +int16_t enq_map::EMAP_RID_NOT_FOUND = -1; +int16_t enq_map::EMAP_OK = 0; +int16_t enq_map::EMAP_FALSE = 0; +int16_t enq_map::EMAP_TRUE = 1; + +enq_map::enq_map(): + _map(){} + +enq_map::~enq_map() {} + + +short +enq_map::insert_pfid(const uint64_t rid, const uint64_t pfid, const std::streampos file_posn) +{ + return insert_pfid(rid, pfid, file_posn, false); +} + +short +enq_map::insert_pfid(const uint64_t rid, const uint64_t pfid, const std::streampos file_posn, const bool locked) +{ + std::pair<emap_itr, bool> ret; + emap_data_struct_t rec(pfid, file_posn, locked); + { + slock s(_mutex); + ret = _map.insert(emap_param(rid, rec)); + } + if (ret.second == false) + return EMAP_DUP_RID; + return EMAP_OK; +} + +short +enq_map::get_pfid(const uint64_t rid, uint64_t& pfid) +{ + slock s(_mutex); + emap_itr itr = _map.find(rid); + if (itr == _map.end()) // not found in map + return EMAP_RID_NOT_FOUND; + if (itr->second._lock) + return EMAP_LOCKED; + pfid = itr->second._pfid; + return EMAP_OK; +} + +short +enq_map::get_remove_pfid(const uint64_t rid, uint64_t& pfid, const bool txn_flag) +{ + slock s(_mutex); + emap_itr itr = _map.find(rid); + if (itr == _map.end()) // not found in map + return EMAP_RID_NOT_FOUND; + if (itr->second._lock && !txn_flag) // locked, but not a commit/abort + return EMAP_LOCKED; + pfid = itr->second._pfid; + _map.erase(itr); + return EMAP_OK; +} + +short +enq_map::get_file_posn(const uint64_t rid, std::streampos& file_posn) { + slock s(_mutex); + emap_itr itr = _map.find(rid); + if (itr == _map.end()) // not found in map + return EMAP_RID_NOT_FOUND; + if (itr->second._lock) + return EMAP_LOCKED; + file_posn = itr->second._file_posn; + return EMAP_OK; +} + +short +enq_map::get_data(const uint64_t rid, emap_data_struct_t& eds) { + slock s(_mutex); + emap_itr itr = _map.find(rid); + if (itr == _map.end()) // not found in map + return EMAP_RID_NOT_FOUND; + eds._pfid = itr->second._pfid; + eds._file_posn = itr->second._file_posn; + eds._lock = itr->second._lock; + return EMAP_OK; +} + +bool +enq_map::is_enqueued(const uint64_t rid, bool ignore_lock) +{ + slock s(_mutex); + emap_itr itr = _map.find(rid); + if (itr == _map.end()) // not found in map + return false; + if (!ignore_lock && itr->second._lock) // locked + return false; + return true; +} + +short +enq_map::lock(const uint64_t rid) +{ + slock s(_mutex); + emap_itr itr = _map.find(rid); + if (itr == _map.end()) // not found in map + return EMAP_RID_NOT_FOUND; + itr->second._lock = true; + return EMAP_OK; +} + +short +enq_map::unlock(const uint64_t rid) +{ + slock s(_mutex); + emap_itr itr = _map.find(rid); + if (itr == _map.end()) // not found in map + return EMAP_RID_NOT_FOUND; + itr->second._lock = false; + return EMAP_OK; +} + +short +enq_map::is_locked(const uint64_t rid) +{ + slock s(_mutex); + emap_itr itr = _map.find(rid); + if (itr == _map.end()) // not found in map + return EMAP_RID_NOT_FOUND; + return itr->second._lock ? EMAP_TRUE : EMAP_FALSE; +} + +void +enq_map::rid_list(std::vector<uint64_t>& rv) +{ + rv.clear(); + { + slock s(_mutex); + for (emap_itr itr = _map.begin(); itr != _map.end(); itr++) + rv.push_back(itr->first); + } +} + +void +enq_map::pfid_list(std::vector<uint64_t>& fv) +{ + fv.clear(); + { + slock s(_mutex); + for (emap_itr itr = _map.begin(); itr != _map.end(); itr++) + fv.push_back(itr->second._pfid); + } +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/enq_map.h b/qpid/cpp/src/qpid/linearstore/journal/enq_map.h new file mode 100644 index 0000000000..912a583ab9 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/enq_map.h @@ -0,0 +1,101 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_ENQ_MAP_H +#define QPID_LINEARSTORE_JOURNAL_ENQ_MAP_H + +#include "qpid/linearstore/journal/smutex.h" +#include <vector> + +namespace qpid { +namespace linearstore { +namespace journal { + +/** +* \class enq_map +* \brief Class for storing the physical file id (pfid) and a transaction locked flag for each enqueued +* data block using the record id (rid) as a key. This is the primary mechanism for +* deterimining the enqueue low water mark: if a pfid exists in this map, then there is +* at least one still-enqueued record in that file. (The transaction map must also be +* clear, however.) +* +* Map rids against pfid and lock status. As records are enqueued, they are added to this +* map, and as they are dequeued, they are removed. An enqueue is locked when a transactional +* dequeue is pending that has been neither committed nor aborted. +* <pre> +* key data +* +* rid1 --- [ pfid, txn_lock ] +* rid2 --- [ pfid, txn_lock ] +* rid3 --- [ pfid, txn_lock ] +* ... +* </pre> +*/ +class enq_map +{ +public: + // return/error codes + static short EMAP_DUP_RID; + static short EMAP_LOCKED; + static short EMAP_RID_NOT_FOUND; + static short EMAP_OK; + static short EMAP_FALSE; + static short EMAP_TRUE; + + typedef struct emap_data_struct_t { + uint64_t _pfid; + std::streampos _file_posn; + bool _lock; + emap_data_struct_t() : _pfid(0), _file_posn(0), _lock(false) {} + emap_data_struct_t(const uint64_t pfid, const std::streampos file_posn, const bool lock) : _pfid(pfid), _file_posn(file_posn), _lock(lock) {} + } emqp_data_struct_t; + typedef std::pair<uint64_t, emap_data_struct_t> emap_param; + typedef std::map<uint64_t, emap_data_struct_t> emap; + typedef emap::iterator emap_itr; + +private: + emap _map; + smutex _mutex; + +public: + enq_map(); + virtual ~enq_map(); + + short insert_pfid(const uint64_t rid, const uint64_t pfid, const std::streampos file_posn); // 0=ok; -3=duplicate rid; + short insert_pfid(const uint64_t rid, const uint64_t pfid, const std::streampos file_posn, const bool locked); // 0=ok; -3=duplicate rid; + short get_pfid(const uint64_t rid, uint64_t& pfid); // >=0=pfid; -1=rid not found; -2=locked + short get_remove_pfid(const uint64_t rid, uint64_t& pfid, const bool txn_flag = false); // >=0=pfid; -1=rid not found; -2=locked + short get_file_posn(const uint64_t rid, std::streampos& file_posn); // -1=rid not found; -2=locked + short get_data(const uint64_t rid, emap_data_struct_t& eds); + bool is_enqueued(const uint64_t rid, bool ignore_lock = false); + short lock(const uint64_t rid); // 0=ok; -1=rid not found + short unlock(const uint64_t rid); // 0=ok; -1=rid not found + short is_locked(const uint64_t rid); // 1=true; 0=false; -1=rid not found + inline void clear() { _map.clear(); } + inline bool empty() const { return _map.empty(); } + inline uint32_t size() const { return uint32_t(_map.size()); } + void rid_list(std::vector<uint64_t>& rv); + void pfid_list(std::vector<uint64_t>& fv); +}; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_ENQ_MAP_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/enq_rec.cpp b/qpid/cpp/src/qpid/linearstore/journal/enq_rec.cpp new file mode 100644 index 0000000000..0fecd90cbf --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/enq_rec.cpp @@ -0,0 +1,397 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/enq_rec.h" + +#include <cassert> +#include <cstring> +#include "qpid/linearstore/journal/Checksum.h" +#include "qpid/linearstore/journal/jexception.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +enq_rec::enq_rec(): + jrec(), // superclass + _xidp(0), + _data(0), + _xid_buff(0), + _data_buff(0) +{ + ::enq_hdr_init(&_enq_hdr, QLS_ENQ_MAGIC, QLS_JRNL_VERSION, 0, 0, 0, 0, false); + ::rec_tail_copy(&_enq_tail, &_enq_hdr._rhdr, 0); +} + +enq_rec::~enq_rec() +{ + clean(); +} + +void +enq_rec::reset(const uint64_t serial, const uint64_t rid, const void* const dbuf, const std::size_t dlen, + const void* const xidp, const std::size_t xidlen, const bool transient, const bool external) +{ + _enq_hdr._rhdr._serial = serial; + _enq_hdr._rhdr._rid = rid; + ::set_enq_transient(&_enq_hdr, transient); + ::set_enq_external(&_enq_hdr, external); + _enq_hdr._xidsize = xidlen; + _enq_hdr._dsize = dlen; + _xidp = xidp; + _data = dbuf; + _enq_tail._serial = serial; + _enq_tail._rid = rid; +} + +uint32_t +enq_rec::encode(void* wptr, uint32_t rec_offs_dblks, uint32_t max_size_dblks, Checksum& checksum) +{ + assert(wptr != 0); + assert(max_size_dblks > 0); + if (_xidp == 0) + assert(_enq_hdr._xidsize == 0); + + std::size_t rec_offs = rec_offs_dblks * QLS_DBLK_SIZE_BYTES; + std::size_t rem = max_size_dblks * QLS_DBLK_SIZE_BYTES; + std::size_t wr_cnt = 0; + if (rec_offs_dblks) // Continuation of split data record (over 2 or more pages) + { + if (size_dblks(rec_size()) - rec_offs_dblks > max_size_dblks) // Further split required + { + rec_offs -= sizeof(_enq_hdr); + std::size_t wsize = _enq_hdr._xidsize > rec_offs ? _enq_hdr._xidsize - rec_offs : 0; + std::size_t wsize2 = wsize; + if (wsize) + { + if (wsize > rem) + wsize = rem; + std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize); + wr_cnt = wsize; + rem -= wsize; + } + rec_offs -= _enq_hdr._xidsize - wsize2; + if (rem && !::is_enq_external(&_enq_hdr)) + { + wsize = _enq_hdr._dsize > rec_offs ? _enq_hdr._dsize - rec_offs : 0; + wsize2 = wsize; + if (wsize) + { + if (wsize > rem) + wsize = rem; + std::memcpy((char*)wptr + wr_cnt, (const char*)_data + rec_offs, wsize); + wr_cnt += wsize; + rem -= wsize; + } + rec_offs -= _enq_hdr._dsize - wsize2; + } + checksum.addData((unsigned char*)wptr, wr_cnt); + if (rem) + { + _enq_tail._checksum = checksum.getChecksum(); + wsize = sizeof(_enq_tail) > rec_offs ? sizeof(_enq_tail) - rec_offs : 0; + wsize2 = wsize; + if (wsize) + { + if (wsize > rem) + wsize = rem; + std::memcpy((char*)wptr + wr_cnt, (char*)&_enq_tail + rec_offs, wsize); + wr_cnt += wsize; + rem -= wsize; + } + rec_offs -= sizeof(_enq_tail) - wsize2; + } + assert(rem == 0); + assert(rec_offs == 0); + } + else // No further split required + { + rec_offs -= sizeof(_enq_hdr); + std::size_t xid_wsize = _enq_hdr._xidsize > rec_offs ? _enq_hdr._xidsize - rec_offs : 0; + if (xid_wsize) + { + std::memcpy(wptr, (const char*)_xidp + rec_offs, xid_wsize); + wr_cnt += xid_wsize; + } + rec_offs -= _enq_hdr._xidsize - xid_wsize; + std::size_t data_wsize = _enq_hdr._dsize > rec_offs ? _enq_hdr._dsize - rec_offs : 0; + if (data_wsize && !::is_enq_external(&_enq_hdr)) + { + std::memcpy((char*)wptr + wr_cnt, (const char*)_data + rec_offs, data_wsize); + wr_cnt += data_wsize; + } + rec_offs -= _enq_hdr._dsize - data_wsize; + if (xid_wsize || data_wsize) { + checksum.addData((unsigned char*)wptr, wr_cnt); + } + _enq_tail._checksum = checksum.getChecksum(); + std::size_t wsize = sizeof(_enq_tail) > rec_offs ? sizeof(_enq_tail) - rec_offs : 0; + if (wsize) + { + std::memcpy((char*)wptr + wr_cnt, (char*)&_enq_tail + rec_offs, wsize); + wr_cnt += wsize; +#ifdef QLS_CLEAN + std::size_t rec_offs = rec_offs_dblks * QLS_DBLK_SIZE_BYTES; + std::size_t dblk_rec_size = size_dblks(rec_size() - rec_offs) * QLS_DBLK_SIZE_BYTES; + std::memset((char*)wptr + wr_cnt, QLS_CLEAN_CHAR, dblk_rec_size - wr_cnt); +#endif + } + rec_offs -= sizeof(_enq_tail) - wsize; + assert(rec_offs == 0); + } + } + else // Start at beginning of data record + { + // Assumption: the header will always fit into the first dblk + std::memcpy(wptr, (void*)&_enq_hdr, sizeof(_enq_hdr)); + wr_cnt = sizeof(_enq_hdr); + if (size_dblks(rec_size()) > max_size_dblks) // Split required + { + std::size_t wsize; + rem -= sizeof(_enq_hdr); + if (rem) + { + wsize = rem >= _enq_hdr._xidsize ? _enq_hdr._xidsize : rem; + std::memcpy((char*)wptr + wr_cnt, _xidp, wsize); + wr_cnt += wsize; + rem -= wsize; + } + if (rem && !::is_enq_external(&_enq_hdr)) + { + wsize = rem >= _enq_hdr._dsize ? _enq_hdr._dsize : rem; + std::memcpy((char*)wptr + wr_cnt, _data, wsize); + wr_cnt += wsize; + rem -= wsize; + } + checksum.addData((unsigned char*)wptr, wr_cnt); + if (rem) + { + _enq_tail._checksum = checksum.getChecksum(); + wsize = rem >= sizeof(_enq_tail) ? sizeof(_enq_tail) : rem; + std::memcpy((char*)wptr + wr_cnt, (void*)&_enq_tail, wsize); + wr_cnt += wsize; + rem -= wsize; + } + assert(rem == 0); + } + else // No split required + { + if (_enq_hdr._xidsize) + { + std::memcpy((char*)wptr + wr_cnt, _xidp, _enq_hdr._xidsize); + wr_cnt += _enq_hdr._xidsize; + } + if (!::is_enq_external(&_enq_hdr)) + { + std::memcpy((char*)wptr + wr_cnt, _data, _enq_hdr._dsize); + wr_cnt += _enq_hdr._dsize; + } + checksum.addData((unsigned char*)wptr, wr_cnt); + _enq_tail._checksum = checksum.getChecksum(); + std::memcpy((char*)wptr + wr_cnt, (void*)&_enq_tail, sizeof(_enq_tail)); + wr_cnt += sizeof(_enq_tail); +#ifdef QLS_CLEAN + std::size_t dblk_rec_size = size_dblks(rec_size()) * QLS_DBLK_SIZE_BYTES; + std::memset((char*)wptr + wr_cnt, QLS_CLEAN_CHAR, dblk_rec_size - wr_cnt); +#endif + } + } + return size_dblks(wr_cnt); +} + +bool +enq_rec::decode(::rec_hdr_t& h, std::ifstream* ifsp, std::size_t& rec_offs, const std::streampos rec_start) +{ + if (rec_offs == 0) + { + // Read header, allocate (if req'd) for xid + ::rec_hdr_copy(&_enq_hdr._rhdr, &h); + ifsp->read((char*)&_enq_hdr._xidsize, sizeof(_enq_hdr._xidsize)); + ifsp->read((char*)&_enq_hdr._dsize, sizeof(_enq_hdr._dsize)); + rec_offs = sizeof(::enq_hdr_t); + if (_enq_hdr._xidsize > 0) + { + _xid_buff = std::malloc(_enq_hdr._xidsize); + MALLOC_CHK(_xid_buff, "_xid_buff", "enq_rec", "decode"); + } + if (_enq_hdr._dsize > 0) + { + _data_buff = std::malloc(_enq_hdr._dsize); + MALLOC_CHK(_data_buff, "_data_buff", "enq_rec", "decode") + } + } + if (rec_offs < sizeof(_enq_hdr) + _enq_hdr._xidsize) + { + // Read xid (or continue reading xid) + std::size_t offs = rec_offs - sizeof(_enq_hdr); + ifsp->read((char*)_xid_buff + offs, _enq_hdr._xidsize - offs); + std::size_t size_read = ifsp->gcount(); + rec_offs += size_read; + if (size_read < _enq_hdr._xidsize - offs) + { + assert(ifsp->eof()); + // As we may have read past eof, turn off fail bit + ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit)); + assert(!ifsp->fail() && !ifsp->bad()); + return false; + } + } + if (!::is_enq_external(&_enq_hdr)) + { + if (rec_offs < sizeof(_enq_hdr) + _enq_hdr._xidsize + _enq_hdr._dsize) + { + // Read data (or continue reading data) + std::size_t offs = rec_offs - sizeof(_enq_hdr) - _enq_hdr._xidsize; + ifsp->read((char*)_data_buff + offs, _enq_hdr._dsize - offs); + std::size_t size_read = ifsp->gcount(); + rec_offs += size_read; + if (size_read < _enq_hdr._dsize - offs) + { + assert(ifsp->eof()); + // As we may have read past eof, turn off fail bit + ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit)); + assert(!ifsp->fail() && !ifsp->bad()); + return false; + } + } + } + if (rec_offs < sizeof(_enq_hdr) + _enq_hdr._xidsize + + (::is_enq_external(&_enq_hdr) ? 0 : _enq_hdr._dsize) + sizeof(rec_tail_t)) + { + // Read tail (or continue reading tail) + std::size_t offs = rec_offs - sizeof(_enq_hdr) - _enq_hdr._xidsize; + if (!::is_enq_external(&_enq_hdr)) + offs -= _enq_hdr._dsize; + ifsp->read((char*)&_enq_tail + offs, sizeof(rec_tail_t) - offs); + std::size_t size_read = ifsp->gcount(); + rec_offs += size_read; + if (size_read < sizeof(rec_tail_t) - offs) + { + assert(ifsp->eof()); + // As we may have read past eof, turn off fail bit + ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit)); + assert(!ifsp->fail() && !ifsp->bad()); + return false; + } + check_rec_tail(rec_start); + } + ifsp->ignore(rec_size_dblks() * QLS_DBLK_SIZE_BYTES - rec_size()); + assert(!ifsp->fail() && !ifsp->bad()); + return true; +} + +std::size_t +enq_rec::get_xid(void** const xidpp) +{ + if (!_xid_buff || !_enq_hdr._xidsize) { + *xidpp = 0; + return 0; + } + *xidpp = _xid_buff; + return _enq_hdr._xidsize; +} + +std::size_t +enq_rec::get_data(void** const datapp) +{ + if (!_data_buff) { + *datapp = 0; + return 0; + } + if (::is_enq_external(&_enq_hdr)) + *datapp = 0; + else + *datapp = _data_buff; + return _enq_hdr._dsize; +} + +std::string& +enq_rec::str(std::string& str) const +{ + std::ostringstream oss; + oss << "enq_rec: m=" << _enq_hdr._rhdr._magic; + oss << " v=" << (int)_enq_hdr._rhdr._version; + oss << " rid=" << _enq_hdr._rhdr._rid; + if (_xidp) + oss << " xid=\"" << _xidp << "\""; + oss << " len=" << _enq_hdr._dsize; + str.append(oss.str()); + return str; +} + +std::size_t +enq_rec::rec_size() const +{ + return rec_size(_enq_hdr._xidsize, _enq_hdr._dsize, ::is_enq_external(&_enq_hdr)); +} + +std::size_t +enq_rec::rec_size(const std::size_t xidsize, const std::size_t dsize, const bool external) +{ + if (external) + return sizeof(enq_hdr_t) + xidsize + sizeof(rec_tail_t); + return sizeof(enq_hdr_t) + xidsize + dsize + sizeof(rec_tail_t); +} + +void +enq_rec::check_rec_tail(const std::streampos rec_start) const { + Checksum checksum; + checksum.addData((const unsigned char*)&_enq_hdr, sizeof(::enq_hdr_t)); + if (_enq_hdr._xidsize > 0) { + checksum.addData((const unsigned char*)_xid_buff, _enq_hdr._xidsize); + } + if (_enq_hdr._dsize > 0) { + checksum.addData((const unsigned char*)_data_buff, _enq_hdr._dsize); + } + uint32_t cs = checksum.getChecksum(); + uint16_t res = ::rec_tail_check(&_enq_tail, &_enq_hdr._rhdr, cs); + if (res != 0) { + std::stringstream oss; + oss << std::endl << " Record offset: 0x" << std::hex << rec_start; + if (res & ::REC_TAIL_MAGIC_ERR_MASK) { + oss << std::endl << " Magic: expected 0x" << ~_enq_hdr._rhdr._magic << "; found 0x" << _enq_tail._xmagic; + } + if (res & ::REC_TAIL_SERIAL_ERR_MASK) { + oss << std::endl << " Serial: expected 0x" << _enq_hdr._rhdr._serial << "; found 0x" << _enq_tail._serial; + } + if (res & ::REC_TAIL_RID_ERR_MASK) { + oss << std::endl << " Record Id: expected 0x" << _enq_hdr._rhdr._rid << "; found 0x" << _enq_tail._rid; + } + if (res & ::REC_TAIL_CHECKSUM_ERR_MASK) { + oss << std::endl << " Checksum: expected 0x" << cs << "; found 0x" << _enq_tail._checksum; + } + throw jexception(jerrno::JERR_JREC_BADRECTAIL, oss.str(), "enq_rec", "check_rec_tail"); + } +} + +void +enq_rec::clean() { + if (_xid_buff) { + std::free(_xid_buff); + _xid_buff = 0; + } + if (_data_buff) { + std::free(_data_buff); + _data_buff = 0; + } +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/enq_rec.h b/qpid/cpp/src/qpid/linearstore/journal/enq_rec.h new file mode 100644 index 0000000000..d85cde42f5 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/enq_rec.h @@ -0,0 +1,74 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_ENQ_REC_H +#define QPID_LINEARSTORE_JOURNAL_ENQ_REC_H + +#include "qpid/linearstore/journal/jrec.h" +#include "qpid/linearstore/journal/utils/enq_hdr.h" +#include "qpid/linearstore/journal/utils/rec_tail.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +/** +* \class enq_rec +* \brief Class to handle a single journal enqueue record. +*/ +class enq_rec : public jrec +{ +private: + ::enq_hdr_t _enq_hdr; ///< Local instance of enqueue header struct + const void* _xidp; ///< xid pointer for encoding (for writing to disk) + const void* _data; ///< Pointer to data to be written to disk + void* _xid_buff; + void* _data_buff; + ::rec_tail_t _enq_tail; ///< Local instance of enqueue tail struct + +public: + enq_rec(); + virtual ~enq_rec(); + + void reset(const uint64_t serial, const uint64_t rid, const void* const dbuf, const std::size_t dlen, + const void* const xidp, const std::size_t xidlen, const bool transient, const bool external); + uint32_t encode(void* wptr, uint32_t rec_offs_dblks, uint32_t max_size_dblks, Checksum& checksum); + bool decode(::rec_hdr_t& h, std::ifstream* ifsp, std::size_t& rec_offs, const std::streampos rec_start); + + std::size_t get_xid(void** const xidpp); + std::size_t get_data(void** const datapp); + inline bool is_transient() const { return ::is_enq_transient(&_enq_hdr); } + inline bool is_external() const { return ::is_enq_external(&_enq_hdr); } + std::string& str(std::string& str) const; + inline std::size_t data_size() const { return _enq_hdr._dsize; } + inline std::size_t xid_size() const { return _enq_hdr._xidsize; } + std::size_t rec_size() const; + static std::size_t rec_size(const std::size_t xidsize, const std::size_t dsize, const bool external); + inline uint64_t rid() const { return _enq_hdr._rhdr._rid; } + void check_rec_tail(const std::streampos rec_start) const; + +private: + virtual void clean(); +}; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_ENQ_REC_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/enums.h b/qpid/cpp/src/qpid/linearstore/journal/enums.h new file mode 100644 index 0000000000..90ec355955 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/enums.h @@ -0,0 +1,58 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_ENUMS_H +#define QPID_LINEARSTORE_JOURNAL_ENUMS_H + +namespace qpid { +namespace linearstore { +namespace journal { + +// TODO: Change this to flags, as multiple of these conditions may exist simultaneously +/** +* \brief Enumeration of possible return states from journal read and write operations. +*/ +enum _iores +{ + RHM_IORES_SUCCESS = 0, ///< Success: IO operation completed noramlly. + RHM_IORES_PAGE_AIOWAIT, ///< IO operation suspended - next page is waiting for AIO. + RHM_IORES_FILE_AIOWAIT, ///< IO operation suspended - next file is waiting for AIO. + RHM_IORES_EMPTY, ///< During read operations, nothing further is available to read. + RHM_IORES_TXPENDING ///< Operation blocked by pending transaction. +}; +typedef _iores iores; + +static inline const char* iores_str(iores res) +{ + switch (res) + { + case RHM_IORES_SUCCESS: return "RHM_IORES_SUCCESS"; + case RHM_IORES_PAGE_AIOWAIT: return "RHM_IORES_PAGE_AIOWAIT"; + case RHM_IORES_FILE_AIOWAIT: return "RHM_IORES_FILE_AIOWAIT"; + case RHM_IORES_EMPTY: return "RHM_IORES_EMPTY"; + case RHM_IORES_TXPENDING: return "RHM_IORES_TXPENDING"; + } + return "<iores unknown>"; +} + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_ENUMS_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/jcfg.h b/qpid/cpp/src/qpid/linearstore/journal/jcfg.h new file mode 100644 index 0000000000..b33a419a9d --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/jcfg.h @@ -0,0 +1,72 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include <cmath> +#include <cstdlib> + +#ifndef QPID_QLS_JRNL_JCFG_H +#define QPID_QLS_JRNL_JCFG_H + +#define QLS_SBLK_SIZE_BYTES 4096 /**< Disk softblock size in bytes, should match size used on disk media */ +#define QLS_AIO_ALIGN_BOUNDARY_BYTES QLS_SBLK_SIZE_BYTES /** Memory alignment boundary used for DMA */ +/** +* <b>Rule:</b> Data block size (QLS_DBLK_SIZE_BYTES) MUST be a power of 2 AND +* a power of 2 factor of the disk softblock size (QLS_SBLK_SIZE_BYTES): +* <pre> +* n * QLS_DBLK_SIZE_BYTES == QLS_SBLK_SIZE_BYTES (n = 1,2,4,8...) +* </pre> +*/ +#define QLS_DBLK_SIZE_BYTES 128 /**< Data block size in bytes (CANNOT BE LESS THAN 32!) */ +#define QLS_SBLK_SIZE_DBLKS (QLS_SBLK_SIZE_BYTES / QLS_DBLK_SIZE_BYTES) /**< Disk softblock size in multiples of QLS_DBLK_SIZE_BYTES */ +#define QLS_SBLK_SIZE_KIB (QLS_SBLK_SIZE_BYTES / 1024) /**< Disk softblock size in KiB */ + +#define QLS_WMGR_DEF_PAGE_SIZE_KIB 32 /**< Journal write page size in KiB (default) */ +#define QLS_WMGR_DEF_PAGE_SIZE_SBLKS (QLS_WMGR_DEF_PAGE_SIZE_KIB / QLS_SBLK_SIZE_KIB) /**< Journal write page size in softblocks (default) */ +#define QLS_WMGR_DEF_PAGES 32 /**< Number of pages to use in wmgr (default) */ + +#define QLS_WMGR_MAXDTOKPP 1024 /**< Max. dtoks (data blocks) per page in wmgr */ +#define QLS_WMGR_MAXWAITUS 100 /**< Max. wait time (us) before submitting AIO */ + +#define QLS_JRNL_FILE_EXTENSION ".jrnl" /**< Extension for journal data files */ +#define QLS_TXA_MAGIC 0x61534c51 /**< ("QLSa" in little endian) Magic for dtx abort hdrs */ +#define QLS_TXC_MAGIC 0x63534c51 /**< ("QLSc" in little endian) Magic for dtx commit hdrs */ +#define QLS_DEQ_MAGIC 0x64534c51 /**< ("QLSd" in little endian) Magic for deq rec hdrs */ +#define QLS_ENQ_MAGIC 0x65534c51 /**< ("QLSe" in little endian) Magic for enq rec hdrs */ +#define QLS_FILE_MAGIC 0x66534c51 /**< ("QLSf" in little endian) Magic for file hdrs */ +#define QLS_EMPTY_MAGIC 0x78534c51 /**< ("QLSx" in little endian) Magic for empty dblk */ +#define QLS_JRNL_VERSION 2 /**< Version (of file layout) */ +#define QLS_JRNL_FHDR_RES_SIZE_SBLKS 1 /**< Journal file header reserved size in sblks (as defined by QLS_SBLK_SIZE_BYTES) */ +#define QLS_MAX_QUEUE_NAME_LEN (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES) - sizeof(file_hdr_t) + +#define QLS_CLEAN /**< If defined, writes QLS_CLEAN_CHAR to all filled areas on disk */ +#define QLS_CLEAN_CHAR 0xff /**< Char used to clear empty space on disk */ + +namespace qpid { +namespace linearstore { + + const int QLS_RAND_WIDTH = (int)(::log((RAND_MAX + 1ULL))/::log(2)); + const int QLS_RAND_SHIFT1 = 64 - QLS_RAND_WIDTH; + const int QLS_RAND_SHIFT2 = QLS_RAND_SHIFT1 - QLS_RAND_WIDTH; + const int QLS_RAND_MASK = (int)::pow(2, QLS_RAND_SHIFT2) - 1; + +}} + +#endif /* ifndef QPID_QLS_JRNL_JCFG_H */ diff --git a/qpid/cpp/src/qpid/linearstore/journal/jcntl.cpp b/qpid/cpp/src/qpid/linearstore/journal/jcntl.cpp new file mode 100644 index 0000000000..cc31f2e1df --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/jcntl.cpp @@ -0,0 +1,440 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/jcntl.h" + +#include <iomanip> +#include "qpid/linearstore/journal/data_tok.h" +#include "qpid/linearstore/journal/JournalLog.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +#define AIO_CMPL_TIMEOUT_SEC 5 +#define AIO_CMPL_TIMEOUT_NSEC 0 +#define FINAL_AIO_CMPL_TIMEOUT_SEC 15 +#define FINAL_AIO_CMPL_TIMEOUT_NSEC 0 + +// Static +timespec jcntl::_aio_cmpl_timeout; ///< Timeout for blocking libaio returns +timespec jcntl::_final_aio_cmpl_timeout; ///< Timeout for blocking libaio returns when stopping or finalizing +bool jcntl::_init = init_statics(); +bool jcntl::init_statics() +{ + _aio_cmpl_timeout.tv_sec = AIO_CMPL_TIMEOUT_SEC; + _aio_cmpl_timeout.tv_nsec = AIO_CMPL_TIMEOUT_NSEC; + _final_aio_cmpl_timeout.tv_sec = FINAL_AIO_CMPL_TIMEOUT_SEC; + _final_aio_cmpl_timeout.tv_nsec = FINAL_AIO_CMPL_TIMEOUT_NSEC; + return true; +} + + +// Functions + +jcntl::jcntl(const std::string& jid, + const std::string& jdir, + JournalLog& jrnl_log): + _jid(jid), + _jdir(jdir), + _init_flag(false), + _stop_flag(false), + _readonly_flag(false), + _jrnl_log(jrnl_log), + _linearFileController(*this), + _emptyFilePoolPtr(0), + _emap(), + _tmap(), + _wmgr(this, _emap, _tmap, _linearFileController), + _recoveryManager(_jdir.dirname(), _jid, _emap, _tmap, jrnl_log) +{} + +jcntl::~jcntl() +{ + if (_init_flag && !_stop_flag) + try { stop(true); } + catch (const jexception& e) { std::cerr << e << std::endl; } + _linearFileController.finalize(); +} + +void +jcntl::initialize(EmptyFilePool* efpp, + const uint16_t wcache_num_pages, + const uint32_t wcache_pgsize_sblks, + aio_callback* const cbp) +{ + _init_flag = false; + _stop_flag = false; + _readonly_flag = false; + + _emap.clear(); + _tmap.clear(); + + _linearFileController.finalize(); + _jdir.clear_dir(); // Clear any existing journal files + _linearFileController.initialize(_jdir.dirname(), efpp, 0ULL); + _linearFileController.getNextJournalFile(); + _wmgr.initialize(cbp, wcache_pgsize_sblks, wcache_num_pages, QLS_WMGR_MAXDTOKPP, QLS_WMGR_MAXWAITUS, 0); + _init_flag = true; +} + +void +jcntl::recover(EmptyFilePoolManager* efpmp, + const uint16_t wcache_num_pages, + const uint32_t wcache_pgsize_sblks, + aio_callback* const cbp, + const std::vector<std::string>* prep_txn_list_ptr, + uint64_t& highest_rid) +{ + _init_flag = false; + _stop_flag = false; + _readonly_flag = false; + + _emap.clear(); + _tmap.clear(); + + _linearFileController.finalize(); + + // Verify journal dir and journal files + _jdir.verify_dir(); + _recoveryManager.analyzeJournals(prep_txn_list_ptr, efpmp, &_emptyFilePoolPtr); + assert(_emptyFilePoolPtr != 0); + + highest_rid = _recoveryManager.getHighestRecordId(); + _jrnl_log.log(/*LOG_DEBUG*/JournalLog::LOG_INFO, _jid, _recoveryManager.toString(_jid, 5U)); + _linearFileController.initialize(_jdir.dirname(), _emptyFilePoolPtr, _recoveryManager.getHighestFileNumber()); + _recoveryManager.setLinearFileControllerJournals(&qpid::linearstore::journal::LinearFileController::addJournalFile, &_linearFileController); + if (_recoveryManager.isLastFileFull()) { + _linearFileController.getNextJournalFile(); + } + _wmgr.initialize(cbp, wcache_pgsize_sblks, wcache_num_pages, QLS_WMGR_MAXDTOKPP, QLS_WMGR_MAXWAITUS, + (_recoveryManager.isLastFileFull() ? 0 : _recoveryManager.getEndOffset())); + + _readonly_flag = true; + _init_flag = true; +} + +void +jcntl::recover_complete() +{ + if (!_readonly_flag) + throw jexception(jerrno::JERR_JCNTL_NOTRECOVERED, "jcntl", "recover_complete"); + _recoveryManager.recoveryComplete(); + _readonly_flag = false; +} + +void +jcntl::delete_jrnl_files() +{ + stop(true); // wait for AIO to complete + _linearFileController.purgeEmptyFilesToEfp(); + _jdir.delete_dir(); +} + + +iores +jcntl::enqueue_data_record(const void* const data_buff, + const std::size_t tot_data_len, + const std::size_t this_data_len, + data_tok* dtokp, + const bool transient) +{ + iores r; + check_wstatus("enqueue_data_record"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.enqueue(data_buff, tot_data_len, this_data_len, dtokp, 0, 0, false, transient, false), r, + dtokp)) ; + } + return r; +} + +iores +jcntl::enqueue_extern_data_record(const std::size_t tot_data_len, + data_tok* dtokp, + const bool transient) +{ + iores r; + check_wstatus("enqueue_extern_data_record"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.enqueue(0, tot_data_len, 0, dtokp, 0, 0, false, transient, true), r, dtokp)) ; + } + return r; +} + +iores +jcntl::enqueue_txn_data_record(const void* const data_buff, + const std::size_t tot_data_len, + const std::size_t this_data_len, + data_tok* dtokp, + const std::string& xid, + const bool tpc_flag, + const bool transient) +{ + iores r; + check_wstatus("enqueue_tx_data_record"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.enqueue(data_buff, tot_data_len, this_data_len, dtokp, xid.data(), xid.size(), + tpc_flag, transient, false), r, dtokp)) ; + } + return r; +} + +iores +jcntl::enqueue_extern_txn_data_record(const std::size_t tot_data_len, + data_tok* dtokp, + const std::string& xid, + const bool tpc_flag, + const bool transient) +{ + iores r; + check_wstatus("enqueue_extern_txn_data_record"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.enqueue(0, tot_data_len, 0, dtokp, xid.data(), xid.size(), tpc_flag, transient, + true), r, dtokp)) ; + } + return r; +} + +iores +jcntl::read_data_record(void** const datapp, + std::size_t& dsize, + void** const xidpp, + std::size_t& xidsize, + bool& transient, + bool& external, + data_tok* const dtokp, + bool ignore_pending_txns) +{ + check_rstatus("read_data"); + if (_recoveryManager.readNextRemainingRecord(datapp, dsize, xidpp, xidsize, transient, external, dtokp, ignore_pending_txns)) { + return RHM_IORES_SUCCESS; + } + return RHM_IORES_EMPTY; +} + +iores +jcntl::dequeue_data_record(data_tok* const dtokp, + const bool txn_coml_commit) +{ + iores r; + check_wstatus("dequeue_data"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.dequeue(dtokp, 0, 0, false, txn_coml_commit), r, dtokp)) ; + } + return r; +} + +iores +jcntl::dequeue_txn_data_record(data_tok* const dtokp, + const std::string& xid, + const bool tpc_flag, + const bool txn_coml_commit) +{ + iores r; + check_wstatus("dequeue_data"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.dequeue(dtokp, xid.data(), xid.size(), tpc_flag, txn_coml_commit), r, dtokp)) ; + } + return r; +} + +iores +jcntl::txn_abort(data_tok* const dtokp, + const std::string& xid) +{ + iores r; + check_wstatus("txn_abort"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.abort(dtokp, xid.data(), xid.size()), r, dtokp)) ; + } + return r; +} + +iores +jcntl::txn_commit(data_tok* const dtokp, + const std::string& xid) +{ + iores r; + check_wstatus("txn_commit"); + { + slock s(_wr_mutex); + while (handle_aio_wait(_wmgr.commit(dtokp, xid.data(), xid.size()), r, dtokp)) ; + } + return r; +} + +bool +jcntl::is_txn_synced(const std::string& xid) +{ + slock s(_wr_mutex); + bool res = _wmgr.is_txn_synced(xid); + return res; +} + +int32_t +jcntl::get_wr_events(timespec* const timeout) +{ + stlock t(_wr_mutex); + if (!t.locked()) + return jerrno::LOCK_TAKEN; + return _wmgr.get_events(timeout, false); +} + +void +jcntl::stop(const bool block_till_aio_cmpl) +{ + if (_readonly_flag) + check_rstatus("stop"); + else + check_wstatus("stop"); + _stop_flag = true; + if (!_readonly_flag) + flush(block_till_aio_cmpl); +} + +LinearFileController& +jcntl::getLinearFileControllerRef() { + return _linearFileController; +} + +// static +std::string +jcntl::str2hexnum(const std::string& str) { + if (str.empty()) { + return "<null>"; + } + std::ostringstream oss; + oss << "(" << str.size() << ")0x" << std::hex; + for (unsigned i=str.size(); i>0; --i) { + oss << std::setfill('0') << std::setw(2) << (uint16_t)(uint8_t)str[i-1]; + } + return oss.str(); +} + +iores +jcntl::flush(const bool block_till_aio_cmpl) +{ + if (!_init_flag) + return RHM_IORES_SUCCESS; + if (_readonly_flag) + throw jexception(jerrno::JERR_JCNTL_READONLY, "jcntl", "flush"); + iores res; + { + slock s(_wr_mutex); + res = _wmgr.flush(); + } + if (block_till_aio_cmpl) + aio_cmpl_wait(); + return res; +} + +// Protected/Private functions + +void +jcntl::check_wstatus(const char* fn_name) const +{ + if (!_init_flag) + throw jexception(jerrno::JERR__NINIT, "jcntl", fn_name); + if (_readonly_flag) + throw jexception(jerrno::JERR_JCNTL_READONLY, "jcntl", fn_name); + if (_stop_flag) + throw jexception(jerrno::JERR_JCNTL_STOPPED, "jcntl", fn_name); +} + +void +jcntl::check_rstatus(const char* fn_name) const +{ + if (!_init_flag) + throw jexception(jerrno::JERR__NINIT, "jcntl", fn_name); + if (_stop_flag) + throw jexception(jerrno::JERR_JCNTL_STOPPED, "jcntl", fn_name); +} + + +void +jcntl::aio_cmpl_wait() +{ + //while (_wmgr.get_aio_evt_rem()) + while (true) + { + uint32_t aer; + { + slock s(_wr_mutex); + aer = _wmgr.get_aio_evt_rem(); + } + if (aer == 0) break; // no events left + if (get_wr_events(&_aio_cmpl_timeout) == jerrno::AIO_TIMEOUT) + throw jexception(jerrno::JERR_JCNTL_AIOCMPLWAIT, "jcntl", "aio_cmpl_wait"); + } +} + + +bool +jcntl::handle_aio_wait(const iores res, iores& resout, const data_tok* dtp) +{ + resout = res; + if (res == RHM_IORES_PAGE_AIOWAIT) + { + while (_wmgr.curr_pg_blocked()) + { + if (_wmgr.get_aio_evt_rem() == 0) { +//std::cout << "&&&&&& jcntl::handle_aio_wait() " << _wmgr.status_str() << std::endl; // DEBUG + throw jexception("_wmgr.curr_pg_blocked() with no events remaining"); // TODO - complete exception + } + if (_wmgr.get_events(&_aio_cmpl_timeout, false) == jerrno::AIO_TIMEOUT) + { + std::ostringstream oss; + oss << "get_events() returned JERR_JCNTL_AIOCMPLWAIT; wmgr_status: " << _wmgr.status_str(); + _jrnl_log.log(JournalLog::LOG_CRITICAL, _jid, oss.str()); + throw jexception(jerrno::JERR_JCNTL_AIOCMPLWAIT, "jcntl", "handle_aio_wait"); + } + } + return true; + } + else if (res == RHM_IORES_FILE_AIOWAIT) + { +// while (_wmgr.curr_file_blocked()) +// { +// if (_wmgr.get_events(pmgr::UNUSED, &_aio_cmpl_timeout) == jerrno::AIO_TIMEOUT) +// { +// std::ostringstream oss; +// oss << "get_events() returned JERR_JCNTL_AIOCMPLWAIT; wmgr_status: " << _wmgr.status_str(); +// this->log(LOG_CRITICAL, oss.str()); +// throw jexception(jerrno::JERR_JCNTL_AIOCMPLWAIT, "jcntl", "handle_aio_wait"); +// } +// } +// _wrfc.wr_reset(); + resout = RHM_IORES_SUCCESS; + data_tok::write_state ws = dtp->wstate(); + return ws == data_tok::ENQ_PART || ws == data_tok::DEQ_PART || ws == data_tok::ABORT_PART || + ws == data_tok::COMMIT_PART; + } + return false; +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/jcntl.h b/qpid/cpp/src/qpid/linearstore/journal/jcntl.h new file mode 100644 index 0000000000..94c00d2fab --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/jcntl.h @@ -0,0 +1,570 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_JCNTL_H +#define QPID_LINEARSTORE_JOURNAL_JCNTL_H + +#include <qpid/linearstore/journal/LinearFileController.h> +#include "qpid/linearstore/journal/jdir.h" +#include "qpid/linearstore/journal/RecoveryManager.h" +#include "qpid/linearstore/journal/wmgr.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +class EmptyFilePool; +class EmptyFilePoolManager; +class JournalLog; + +/** +* \brief Access and control interface for the journal. This is the top-level class for the +* journal. +* +* This is the top-level journal class; one instance of this class controls one instance of the +* journal and all its files and associated control structures. Besides this class, the only +* other class that needs to be used at a higher level is the data_tok class, one instance of +* which is used per data block written to the journal, and is used to track its status through +* the AIO enqueue, read and dequeue process. +*/ +class jcntl +{ +protected: + /** + * \brief Journal ID + * + * This string uniquely identifies this journal instance. It will most likely be associated + * with the identity of the message queue with which it is associated. + */ + // TODO: This is not included in any files at present, add to file_hdr? + std::string _jid; + + /** + * \brief Journal directory + * + * This string stores the path to the journal directory. It may be absolute or relative, and + * should not end in a file separator character. (e.g. "/fastdisk/jdata" is correct, + * "/fastdisk/jdata/" is not.) + */ + jdir _jdir; + + /** + * \brief Initialized flag + * + * This flag starts out set to false, is set to true once this object has been initialized, + * either by calling initialize() or recover(). + */ + bool _init_flag; + + /** + * \brief Stopped flag + * + * This flag starts out false, and is set to true when stop() is called. At this point, the + * journal will no longer accept messages until either initialize() or recover() is called. + * There is no way other than through initialization to reset this flag. + */ + // TODO: It would be helpful to distinguish between states stopping and stopped. If stop(true) is called, + // then we are stopping, but must wait for all outstanding aios to return before being finally stopped. During + // this period, however, no new enqueue/dequeue/read requests may be accepted. + bool _stop_flag; + + /** + * \brief Read-only state flag used during recover. + * + * When true, this flag prevents journal write operations (enqueue and dequeue), but + * allows read to occur. It is used during recovery, and is reset when recovered() is + * called. + */ + bool _readonly_flag; + + // Journal control structures + JournalLog& _jrnl_log; ///< Ref to Journal Log instance + LinearFileController _linearFileController; ///< Linear File Controller + EmptyFilePool* _emptyFilePoolPtr; ///< Pointer to Empty File Pool for this queue + enq_map _emap; ///< Enqueue map for low water mark management + txn_map _tmap; ///< Transaction map open transactions + wmgr _wmgr; ///< Write page manager which manages AIO + RecoveryManager _recoveryManager; ///< Recovery data used for recovery + smutex _wr_mutex; ///< Mutex for journal writes + +public: + static timespec _aio_cmpl_timeout; ///< Timeout for blocking libaio returns + static timespec _final_aio_cmpl_timeout; ///< Timeout for blocking libaio returns when stopping or finalizing + + /** + * \brief Journal constructor. + * + * Constructor which sets the physical file location and base name. + * + * \param jid A unique identifier for this journal instance. + * \param jdir The directory which will contain the journal files. + * \param base_filename The string which will be used to start all journal filenames. + */ + jcntl(const std::string& jid, + const std::string& jdir, + JournalLog& jrnl_log); + + /** + * \brief Destructor. + */ + virtual ~jcntl(); + + inline const std::string& id() const { return _jid; } + + inline const std::string& jrnl_dir() const { return _jdir.dirname(); } + + /** + * \brief Initialize the journal for storing data. + * + * Initialize the journal by creating new journal data files and initializing internal + * control structures. When complete, the journal will be empty, and ready to store data. + * + * <b>NOTE: Any existing journal will be ignored by this operation.</b> To use recover + * the data from an existing journal, use recover(). + * + * <b>NOTE: If <i>NULL</i> is passed to the deque pointers, they will be internally created + * and deleted.</b> + * + * <b>NOTE: If <i>NULL</i> is passed to the callbacks, internal default callbacks will be + * used.</b> + * + * \param num_jfiles The number of journal files to be created. + * \param auto_expand If true, allows journal file auto-expansion. In this mode, the journal will automatically + * add files to the journal if it runs out of space. No more than ae_max_jfiles may be added. If false, then + * no files are added and an exception will be thrown if the journal runs out of file space. + * \param ae_max_jfiles Upper limit of journal files for auto-expand mode. When auto_expand is true, this is the + * maximum total number of files allowed in the journal (original plus those added by auto-expand mode). If + * this number of files exist and the journal runs out of space, an exception will be thrown. This number + * must be greater than the num_jfiles parameter value but cannot exceed the maximum number of files for a + * single journal; if num_jfiles is already at its maximum value, then auto-expand will be disabled. + * \param jfsize_sblks The size of each journal file expressed in softblocks. + * \param wcache_num_pages The number of write cache pages to create. + * \param wcache_pgsize_sblks The size in sblks of each write cache page. + * \param cbp Pointer to object containing callback functions for read and write operations. May be 0 (NULL). + * + * \exception TODO + */ + void initialize(EmptyFilePool* efpp, + const uint16_t wcache_num_pages, + const uint32_t wcache_pgsize_sblks, + aio_callback* const cbp); + + /** + * /brief Initialize journal by recovering state from previously written journal. + * + * Initialize journal by recovering state from previously written journal. The journal files + * are analyzed, and all records that have not been dequeued and that remain in the journal + * will be available for reading. The journal is placed in a read-only state until + * recovered() is called; any calls to enqueue or dequeue will fail with an exception + * in this state. + * + * <b>NOTE: If <i>NULL</i> is passed to the deque pointers, they will be internally created + * and deleted.</b> + * + * <b>NOTE: If <i>NULL</i> is passed to the callbacks, internal default callbacks will be + * used.</b> + * + * \param num_jfiles The number of journal files to be created. + * \param auto_expand If true, allows journal file auto-expansion. In this mode, the journal will automatically + * add files to the journal if it runs out of space. No more than ae_max_jfiles may be added. If false, then + * no files are added and an exception will be thrown if the journal runs out of file space. + * \param ae_max_jfiles Upper limit of journal files for auto-expand mode. When auto_expand is true, this is the + * maximum total number of files allowed in the journal (original plus those added by auto-expand mode). If + * this number of files exist and the journal runs out of space, an exception will be thrown. This number + * must be greater than the num_jfiles parameter value but cannot exceed the maximum number of files for a + * single journal; if num_jfiles is already at its maximum value, then auto-expand will be disabled. + * \param jfsize_sblks The size of each journal file expressed in softblocks. + * \param wcache_num_pages The number of write cache pages to create. + * \param wcache_pgsize_sblks The size in sblks of each write cache page. + * \param cbp Pointer to object containing callback functions for read and write operations. May be 0 (NULL). + * \param prep_txn_list_ptr + * \param highest_rid Returns the highest rid found in the journal during recover + * + * \exception TODO + */ + void recover(EmptyFilePoolManager* efpm, + const uint16_t wcache_num_pages, + const uint32_t wcache_pgsize_sblks, + aio_callback* const cbp, + const std::vector<std::string>* prep_txn_list_ptr, + uint64_t& highest_rid); + + /** + * \brief Notification to the journal that recovery is complete and that normal operation + * may resume. + * + * This call notifies the journal that recovery is complete and that normal operation + * may resume. The read pointers are reset so that all records read as a part of recover + * may be re-read during normal operation. The read-only flag is then reset, allowing + * enqueue and dequeue operations to resume. + * + * \exception TODO + */ + void recover_complete(); + + /** + * \brief Stops journal and deletes all journal files. + * + * Clear the journal directory of all journal files matching the base filename. + * + * \exception TODO + */ + void delete_jrnl_files(); + + /** + * \brief Enqueue data. + * + * Enqueue data or part thereof. If a large data block is being written, then it may be + * enqueued in parts by setting this_data_len to the size of the data being written in this + * call. The total data size must be known in advance, however, as this is written into the + * record header on the first record write. The state of the write (i.e. how much has been + * written so far) is maintained in the data token dtokp. Partial writes will return in state + * ENQ_PART. + * + * Note that a return value of anything other than RHM_IORES_SUCCESS implies that this write + * operation did not complete successfully or was partially completed. The action taken under + * these conditions depends on the value of the return. For example, RHM_IORES_AIO_WAIT + * implies that all pages in the write page cache are waiting for AIO operations to return, + * and that the call should be remade after waiting a bit. + * + * Example: If a write of 99 kB is divided into three equal parts, then the following states + * and returns would characterize a successful operation: + * <pre> + * dtok. dtok. dtok. + * Pperation Return wstate() dsize() written() Comment + * -----------------+--------+--------+-------+---------+------------------------------------ + * NONE 0 0 Value of dtok before op + * edr(99000, 33000) SUCCESS ENQ_PART 99000 33000 Enqueue part 1 + * edr(99000, 33000) AIO_WAIT ENQ_PART 99000 50000 Enqueue part 2, not completed + * edr(99000, 33000) SUCCESS ENQ_PART 99000 66000 Enqueue part 2 again + * edr(99000, 33000) SUCCESS ENQ 99000 99000 Enqueue part 3 + * </pre> + * + * \param data_buff Pointer to data to be enqueued for this enqueue operation. + * \param tot_data_len Total data length. + * \param this_data_len Amount to be written in this enqueue operation. + * \param dtokp Pointer to data token which contains the details of the enqueue operation. + * \param transient Flag indicating transient persistence (ie, ignored on recover). + * + * \exception TODO + */ + iores enqueue_data_record(const void* const data_buff, + const std::size_t tot_data_len, + const std::size_t this_data_len, + data_tok* dtokp, + const bool transient); + + iores enqueue_extern_data_record(const std::size_t tot_data_len, + data_tok* dtokp, + const bool transient); + + /** + * \brief Enqueue data. + * + * \param data_buff Pointer to data to be enqueued for this enqueue operation. + * \param tot_data_len Total data length. + * \param this_data_len Amount to be written in this enqueue operation. + * \param dtokp Pointer to data token which contains the details of the enqueue operation. + * \param xid String containing xid. An empty string (i.e. length=0) will be considered + * non-transactional. + * \param transient Flag indicating transient persistence (ie, ignored on recover). + * + * \exception TODO + */ + iores enqueue_txn_data_record(const void* const data_buff, + const std::size_t tot_data_len, + const std::size_t this_data_len, + data_tok* dtokp, + const std::string& xid, + const bool tpc_flag, + const bool transient); + + iores enqueue_extern_txn_data_record(const std::size_t tot_data_len, + data_tok* dtokp, + const std::string& xid, + const bool tpc_flag, + const bool transient); + + /** + * \brief Reads data from the journal. It is the responsibility of the reader to free + * the memory that is allocated through this call - see below for details. + * + * Reads the next non-dequeued data record from the journal. + * + * <b>Note</b> that this call allocates memory into which the data and XID are copied. It + * is the responsibility of the caller to free this memory. The memory for the data and + * XID are allocated in a single call, and the XID precedes the data in the memory space. + * Thus, where an XID exists, freeing the XID pointer will free both the XID and data memory. + * However, if an XID does not exist for the message, the XID pointer xidpp is set to NULL, + * and it is the data pointer datapp that must be freed. Should neither an XID nor data be + * present (ie an empty record), then no memory is allocated, and both pointers will be NULL. + * In this case, there is no need to free memory. + * + * TODO: Fix this lousy interface. The caller should NOT be required to clean up these + * pointers! Rather use a struct, or better still, let the data token carry the data and + * xid pointers and lengths, and have the data token both allocate and delete. + * + * \param datapp Pointer to pointer that will be set to point to memory allocated and + * containing the data. Will be set to NULL if the call fails or there is no data + * in the record. + * \param dsize Ref that will be set to the size of the data. Will be set to 0 if the call + * fails or if there is no data in the record. + * \param xidpp Pointer to pointer that will be set to point to memory allocated and + * containing the XID. Will be set to NULL if the call fails or there is no XID attached + * to this record. + * \param xidsize Ref that will be set to the size of the XID. + * \param transient Ref that will be set true if record is transient. + * \param external Ref that will be set true if record is external. In this case, the data + * pointer datapp will be set to NULL, but dsize will contain the size of the data. + * NOTE: If there is an xid, then xidpp must be freed. + * \param dtokp Pointer to data_tok instance for this data, used to track state of data + * through journal. + * \param ignore_pending_txns When false (default), if the next record to be read is locked + * by a pending transaction, the read fails with RHM_IORES_TXPENDING. However, if set + * to true, then locks are ignored. This is required for reading of the Transaction + * Prepared List (TPL) which may have its entries locked, but may be read from + * time-to-time, and needs all its records (locked and unlocked) to be available. + * + * \exception TODO + */ + iores read_data_record(void** const datapp, + std::size_t& dsize, + void** const xidpp, + std::size_t& xidsize, + bool& transient, + bool& external, + data_tok* const dtokp, + bool ignore_pending_txns); + + /** + * \brief Dequeues (marks as no longer needed) data record in journal. + * + * Dequeues (marks as no longer needed) data record in journal. Note that it is possible + * to use the same data token instance used to enqueue this data; it contains the record ID + * needed to correctly mark this data as dequeued in the journal. Otherwise the RID of the + * record to be dequeued and the write state of ENQ must be manually set in a new or reset + * instance of data_tok. + * + * \param dtokp Pointer to data_tok instance for this data, used to track state of data + * through journal. + * \param txn_coml_commit Only used for preparedXID journal. When used for dequeueing + * prepared XID list items, sets whether the complete() was called in commit or abort + * mode. + * + * \exception TODO + */ + iores dequeue_data_record(data_tok* const dtokp, + const bool txn_coml_commit); + + /** + * \brief Dequeues (marks as no longer needed) data record in journal. + * + * Dequeues (marks as no longer needed) data record in journal as part of a transaction. + * Note that it is possible to use the same data token instance used to enqueue this data; + * it contains the RID needed to correctly mark this data as dequeued in the journal. + * Otherwise the RID of the record to be dequeued and the write state of ENQ must be + * manually set in a new or reset instance of data_tok. + * + * \param dtokp Pointer to data_tok instance for this data, used to track state of data + * through journal. + * \param xid String containing xid. An empty string (i.e. length=0) will be considered + * non-transactional. + * \param txn_coml_commit Only used for preparedXID journal. When used for dequeueing + * prepared XID list items, sets whether the complete() was called in commit or abort + * mode. + * + * \exception TODO + */ + iores dequeue_txn_data_record(data_tok* const dtokp, + const std::string& xid, + const bool tpc_flag, + const bool txn_coml_commit); + + /** + * \brief Abort the transaction for all records enqueued or dequeued with the matching xid. + * + * Abort the transaction for all records enqueued with the matching xid. All enqueued records + * are effectively deleted from the journal, and can not be read. All dequeued records remain + * as though they had never been dequeued. + * + * \param dtokp Pointer to data_tok instance for this data, used to track state of data + * through journal. + * \param xid String containing xid. + * + * \exception TODO + */ + iores txn_abort(data_tok* const dtokp, + const std::string& xid); + + /** + * \brief Commit the transaction for all records enqueued or dequeued with the matching xid. + * + * Commit the transaction for all records enqueued with the matching xid. All enqueued + * records are effectively released for reading and dequeueing. All dequeued records are + * removed and can no longer be accessed. + * + * \param dtokp Pointer to data_tok instance for this data, used to track state of data + * through journal. + * \param xid String containing xid. + * + * \exception TODO + */ + iores txn_commit(data_tok* const dtokp, + const std::string& xid); + + /** + * \brief Check whether all the enqueue records for the given xid have reached disk. + * + * \param xid String containing xid. + * + * \exception TODO + */ + bool is_txn_synced(const std::string& xid); + + /** + * \brief Forces a check for returned AIO write events. + * + * Forces a check for returned AIO write events. This is normally performed by enqueue() and + * dequeue() operations, but if these operations cease, then this call needs to be made to + * force the processing of any outstanding AIO operations. + */ + int32_t get_wr_events(timespec* const timeout); + + /** + * \brief Stop the journal from accepting any further requests to read or write data. + * + * This operation is used to stop the journal. This is the normal mechanism for bringing the + * journal to an orderly stop. Any outstanding AIO operations or partially written pages in + * the write page cache will by flushed and will complete. + * + * <b>Note:</b> The journal cannot be restarted without either initializing it or restoring + * it. + * + * \param block_till_aio_cmpl If true, will block the thread while waiting for all + * outstanding AIO operations to complete. + */ + void stop(const bool block_till_aio_cmpl); + + /** + * \brief Force a flush of the write page cache, creating a single AIO write operation. + */ + iores flush(const bool block_till_aio_cmpl); + + inline uint32_t get_enq_cnt() const { return _emap.size(); } // TODO: _emap: Thread safe? + + inline uint32_t get_wr_aio_evt_rem() const { slock l(_wr_mutex); return _wmgr.get_aio_evt_rem(); } + + uint32_t get_wr_outstanding_aio_dblks() const; + + uint32_t get_rd_outstanding_aio_dblks() const; + + LinearFileController& getLinearFileControllerRef(); + + /** + * \brief Check if a particular rid is enqueued. Note that this function will return + * false if the rid is transactionally enqueued and is not committed, or if it is + * locked (i.e. transactionally dequeued, but the dequeue has not been committed). + */ + inline bool is_enqueued(const uint64_t rid, bool ignore_lock) { return _emap.is_enqueued(rid, ignore_lock); } + + inline bool is_locked(const uint64_t rid) { + if (_emap.is_enqueued(rid, true) < enq_map::EMAP_OK) + return false; + return _emap.is_locked(rid) == enq_map::EMAP_TRUE; + } + + inline void enq_rid_list(std::vector<uint64_t>& rids) { _emap.rid_list(rids); } + + inline void enq_xid_list(std::vector<std::string>& xids) { _tmap.xid_list(xids); } + + inline uint32_t get_open_txn_cnt() const { return _tmap.size(); } + + // TODO Make this a const, but txn_map must support const first. + inline txn_map& get_txn_map() { return _tmap; } + + /** + * \brief Check if the journal is stopped. + * + * \return <b><i>true</i></b> if the jouranl is stopped; + * <b><i>false</i></b> otherwise. + */ + inline bool is_stopped() { return _stop_flag; } + + /** + * \brief Check if the journal is ready to read and write data. + * + * Checks if the journal is ready to read and write data. This function will return + * <b><i>true</i></b> if the journal has been either initialized or restored, and the stop() + * function has not been called since the initialization. + * + * Note that the journal may also be stopped if an internal error occurs (such as running out + * of data journal file space). + * + * \return <b><i>true</i></b> if the journal is ready to read and write data; + * <b><i>false</i></b> otherwise. + */ + inline bool is_ready() const { return _init_flag && !_stop_flag; } + + inline bool is_read_only() const { return _readonly_flag; } + + /** + * \brief Get the journal directory. + * + * This returns the journal directory as set during initialization. This is the directory + * into which the journal files will be written. + */ + inline const std::string& dirname() const { return _jdir.dirname(); } + + // Management instrumentation callbacks + inline virtual void instr_incr_outstanding_aio_cnt() {} + inline virtual void instr_decr_outstanding_aio_cnt() {} + + static std::string str2hexnum(const std::string& str); + +protected: + static bool _init; + static bool init_statics(); + + /** + * \brief Check status of journal before allowing write operations. + */ + void check_wstatus(const char* fn_name) const; + + /** + * \brief Check status of journal before allowing read operations. + */ + void check_rstatus(const char* fn_name) const; + + /** + * \brief Call that blocks while waiting for all outstanding AIOs to complete + */ + void aio_cmpl_wait(); + + /** + * \brief Call that blocks until at least one message returns; used to wait for + * AIO wait conditions to clear. + */ + bool handle_aio_wait(const iores res, iores& resout, const data_tok* dtp); +}; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_JCNTL_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/jdir.cpp b/qpid/cpp/src/qpid/linearstore/journal/jdir.cpp new file mode 100644 index 0000000000..72b94d0098 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/jdir.cpp @@ -0,0 +1,457 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/jdir.h" + +#include <cstring> +#include <cerrno> +#include <iomanip> +#include "qpid/linearstore/journal/jexception.h" +#include <sys/stat.h> +#include <unistd.h> + +namespace qpid { +namespace linearstore { +namespace journal { + +jdir::jdir(const std::string& dirname/*, const std::string& _base_filename*/): + _dirname(dirname)/*, + _base_filename(_base_filename)*/ +{} + +jdir::~jdir() +{} + +// === create_dir === + +void +jdir::create_dir() +{ + create_dir(_dirname); +} + + +void +jdir::create_dir(const char* dirname) +{ + create_dir(std::string(dirname)); +} + + +void +jdir::create_dir(const std::string& dirname) +{ + std::size_t fdp = dirname.find_last_of('/'); + if (fdp != std::string::npos) + { + std::string parent_dir = dirname.substr(0, fdp); + if (!exists(parent_dir)) + create_dir(parent_dir); + } + if (::mkdir(dirname.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH)) + { + if (errno != EEXIST) // Dir exists, ignore + { + std::ostringstream oss; + oss << "dir=\"" << dirname << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_MKDIR, oss.str(), "jdir", "create_dir"); + } + } +} + + +// === clear_dir === + +void +jdir::clear_dir(const bool create_flag) +{ + clear_dir(_dirname/*, _base_filename*/, create_flag); +} + +void +jdir::clear_dir(const char* dirname/*, const char* base_filename*/, const bool create_flag) +{ + clear_dir(std::string(dirname)/*, std::string(base_filename)*/, create_flag); +} + + +void +jdir::clear_dir(const std::string& dirname/*, const std::string& +#ifndef RHM_JOWRITE + base_filename +#endif +*/ + , const bool create_flag) +{ + DIR* dir = open_dir(dirname, "clear_dir", true); + if (!dir && create_flag) { + create_dir(dirname); + dir = open_dir(dirname, "clear_dir", true); + } +//#ifndef RHM_JOWRITE + struct dirent* entry; + bool found = false; + std::string bak_dir; + while ((entry = ::readdir(dir)) != 0) + { + // Ignore . and .. + if (std::strcmp(entry->d_name, ".") != 0 && std::strcmp(entry->d_name, "..") != 0) + { + if (std::strlen(entry->d_name) >= 3) // 'bak' + { + if (std::strncmp(entry->d_name, "bak", 3) == 0) + { + if (!found) + { + bak_dir = create_bak_dir(dirname/*, base_filename*/); + found = true; + } + std::ostringstream oldname; + oldname << dirname << "/" << entry->d_name; + std::ostringstream newname; + newname << bak_dir << "/" << entry->d_name; + if (::rename(oldname.str().c_str(), newname.str().c_str())) + { + ::closedir(dir); + std::ostringstream oss; + oss << "file=\"" << oldname.str() << "\" dest=\"" << + newname.str() << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_FMOVE, oss.str(), "jdir", "clear_dir"); + } + } + } + } + } +// FIXME: Find out why this fails with false alarms/errors from time to time... +// While commented out, there is no error capture from reading dir entries. +// check_err(errno, dir, dirname, "clear_dir"); +//#endif + close_dir(dir, dirname, "clear_dir"); +} + +// === push_down === + +std::string +jdir::push_down(const std::string& dirname, const std::string& target_dir/*, const std::string& bak_dir_base*/) +{ + std::string bak_dir_name = create_bak_dir(dirname/*, bak_dir_base*/); + + DIR* dir = open_dir(dirname, "push_down", false); + // Copy contents of targetDirName into bak dir + struct dirent* entry; + while ((entry = ::readdir(dir)) != 0) + { + // Search for targetDirName in storeDirName + if (std::strcmp(entry->d_name, target_dir.c_str()) == 0) + { + std::ostringstream oldname; + oldname << dirname << "/" << target_dir; + std::ostringstream newname; + newname << bak_dir_name << "/" << target_dir; + if (::rename(oldname.str().c_str(), newname.str().c_str())) + { + ::closedir(dir); + std::ostringstream oss; + oss << "file=\"" << oldname.str() << "\" dest=\"" << newname.str() << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_FMOVE, oss.str(), "jdir", "push_down"); + } + break; + } + } + close_dir(dir, dirname, "push_down"); + return bak_dir_name; +} + +// === verify_dir === + +void +jdir::verify_dir() +{ + verify_dir(_dirname/*, _base_filename*/); +} + +void +jdir::verify_dir(const char* dirname/*, const char* base_filename*/) +{ + verify_dir(std::string(dirname)/*, std::string(base_filename)*/); +} + + +void +jdir::verify_dir(const std::string& dirname/*, const std::string& base_filename*/) +{ + if (!is_dir(dirname)) + { + std::ostringstream oss; + oss << "dir=\"" << dirname << "\""; + throw jexception(jerrno::JERR_JDIR_NOTDIR, oss.str(), "jdir", "verify_dir"); + } + + // Read jinf file, then verify all journal files are present +// jinf ji(dirname + "/" + base_filename + "." + QLS_JRNL_FILE_EXTENSION, true); +// for (uint16_t fnum=0; fnum < ji.num_jfiles(); fnum++) +// { +// std::ostringstream oss; +// oss << dirname << "/" << base_filename << "."; +// oss << std::setw(4) << std::setfill('0') << std::hex << fnum; +// oss << "." << QLS_JRNL_FILE_EXTENSION; +// if (!exists(oss.str())) +// throw jexception(jerrno::JERR_JDIR_NOSUCHFILE, oss.str(), "jdir", "verify_dir"); +// } +} + + +// === delete_dir === + +void +jdir::delete_dir(bool children_only) +{ + delete_dir(_dirname, children_only); +} + +void +jdir::delete_dir(const char* dirname, bool children_only) +{ + delete_dir(std::string(dirname), children_only); +} + +void +jdir::delete_dir(const std::string& dirname, bool children_only) +{ + struct dirent* entry; + struct stat s; + DIR* dir = open_dir(dirname, "delete_dir", true); // true = allow dir does not exist, return 0 + if (!dir) return; + while ((entry = ::readdir(dir)) != 0) + { + // Ignore . and .. + if (std::strcmp(entry->d_name, ".") != 0 && std::strcmp(entry->d_name, "..") != 0) + { + std::string full_name(dirname + "/" + entry->d_name); + if (::lstat(full_name.c_str(), &s)) + { + ::closedir(dir); + std::ostringstream oss; + oss << "stat: file=\"" << full_name << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_STAT, oss.str(), "jdir", "delete_dir"); + } + if (S_ISREG(s.st_mode) || S_ISLNK(s.st_mode)) // This is a file or slink + { + if(::unlink(full_name.c_str())) + { + ::closedir(dir); + std::ostringstream oss; + oss << "unlink: file=\"" << entry->d_name << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_UNLINK, oss.str(), "jdir", "delete_dir"); + } + } + else if (S_ISDIR(s.st_mode)) // This is a dir + { + delete_dir(full_name); + } + else // all other types, throw up! + { + ::closedir(dir); + std::ostringstream oss; + oss << "file=\"" << entry->d_name << "\" is not a dir, file or slink."; + oss << " (mode=0x" << std::hex << s.st_mode << std::dec << ")"; + throw jexception(jerrno::JERR_JDIR_BADFTYPE, oss.str(), "jdir", "delete_dir"); + } + } + } + +// FIXME: Find out why this fails with false alarms/errors from time to time... +// While commented out, there is no error capture from reading dir entries. +// check_err(errno, dir, dirname, "delete_dir"); + // Now dir is empty, close and delete it + close_dir(dir, dirname, "delete_dir"); + + if (!children_only) + if (::rmdir(dirname.c_str())) + { + std::ostringstream oss; + oss << "dir=\"" << dirname << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_RMDIR, oss.str(), "jdir", "delete_dir"); + } +} + + +std::string +jdir::create_bak_dir(const std::string& dirname) +{ + DIR* dir = open_dir(dirname, "create_bak_dir", false); + long dir_num = 0L; + struct dirent* entry; + while ((entry = ::readdir(dir)) != 0) + { + // Ignore . and .. + if (std::strcmp(entry->d_name, ".") != 0 && std::strcmp(entry->d_name, "..") != 0) + { + if (std::strlen(entry->d_name) == 9) // Format: _bak.XXXX + { + if (std::strncmp(entry->d_name, "_bak.", 5) == 0) + { + long this_dir_num = std::strtol(entry->d_name + 5, 0, 16); + if (this_dir_num > dir_num) + dir_num = this_dir_num; + } + } + } + } +// FIXME: Find out why this fails with false alarms/errors from time to time... +// While commented out, there is no error capture from reading dir entries. +// check_err(errno, dir, dirname, "create_bak_dir"); + close_dir(dir, dirname, "create_bak_dir"); + + std::ostringstream dn; + dn << dirname << "/_bak." << std::hex << std::setw(4) << std::setfill('0') << ++dir_num; + if (::mkdir(dn.str().c_str(), S_IRWXU | S_IRWXG | S_IROTH)) + { + std::ostringstream oss; + oss << "dir=\"" << dn.str() << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_MKDIR, oss.str(), "jdir", "create_bak_dir"); + } + return std::string(dn.str()); +} + +bool +jdir::is_dir(const char* name) +{ + struct stat s; + if (::stat(name, &s)) + { + std::ostringstream oss; + oss << "file=\"" << name << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_STAT, oss.str(), "jdir", "is_dir"); + } + return S_ISDIR(s.st_mode); +} + +bool +jdir::is_dir(const std::string& name) +{ + return is_dir(name.c_str()); +} + +bool +jdir::exists(const char* name) +{ + struct stat s; + if (::stat(name, &s)) + { + if (errno == ENOENT) // No such dir or file + return false; + // Throw for any other condition + std::ostringstream oss; + oss << "file=\"" << name << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_STAT, oss.str(), "jdir", "exists"); + } + return true; +} + +bool +jdir::exists(const std::string& name) +{ + return exists(name.c_str()); +} + +void +jdir::read_dir(const std::string& name, std::vector<std::string>& dir_list, const bool incl_dirs, const bool incl_files, const bool incl_links, const bool return_fqfn) { + struct stat s; + if (is_dir(name)) { + DIR* dir = open_dir(name, "read_dir", false); + struct dirent* entry; + while ((entry = ::readdir(dir)) != 0) { + if (std::strcmp(entry->d_name, ".") != 0 && std::strcmp(entry->d_name, "..") != 0) { // Ignore . and .. + std::string full_name(name + "/" + entry->d_name); + if (::stat(full_name.c_str(), &s)) + { + ::closedir(dir); + std::ostringstream oss; + oss << "stat: file=\"" << full_name << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_STAT, oss.str(), "jdir", "delete_dir"); + } + if ((S_ISREG(s.st_mode) && incl_files) || (S_ISDIR(s.st_mode) && incl_dirs) || (S_ISLNK(s.st_mode) && incl_links)) { + if (return_fqfn) { + dir_list.push_back(name + "/" + entry->d_name); + } else { + dir_list.push_back(entry->d_name); + } + } + } + } + close_dir(dir, name, "read_dir"); + } +} + +void +jdir::check_err(const int err_num, DIR* dir, const std::string& dir_name, const std::string& fn_name) +{ + if (err_num) + { + std::ostringstream oss; + oss << "dir=\"" << dir_name << "\"" << FORMAT_SYSERR(err_num); + ::closedir(dir); // Try to close, it makes no sense to trap errors here... + throw jexception(jerrno::JERR_JDIR_READDIR, oss.str(), "jdir", fn_name); + } +} + +void +jdir::close_dir(DIR* dir, const std::string& dir_name, const std::string& fn_name) +{ + if (::closedir(dir)) + { + std::ostringstream oss; + oss << "dir=\"" << dir_name << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_CLOSEDIR, oss.str(), "jdir", fn_name); + } +} + +DIR* +jdir::open_dir(const std::string& dir_name, const std::string& fn_name, const bool test_enoent) +{ + DIR* dir = ::opendir(dir_name.c_str()); + if (!dir) { + if (test_enoent && errno == ENOENT) { + return 0; + } + std::ostringstream oss; + oss << "dir=\"" << dir_name << "\"" << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR_JDIR_OPENDIR, oss.str(), "jdir", fn_name); + } + return dir; +} + +std::ostream& +operator<<(std::ostream& os, const jdir& jdir) +{ + os << jdir._dirname; + return os; +} + +std::ostream& +operator<<(std::ostream& os, const jdir* jdirPtr) +{ + os << jdirPtr->_dirname; + return os; +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/jdir.h b/qpid/cpp/src/qpid/linearstore/journal/jdir.h new file mode 100644 index 0000000000..59f21ce499 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/jdir.h @@ -0,0 +1,362 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_JDIR_H +#define QPID_LINEARSTORE_JOURNAL_JDIR_H + +#include <dirent.h> +#include <string> +#include <vector> + +namespace qpid { +namespace linearstore { +namespace journal { + + /** + * \class jdir + * \brief Class to manage the %journal directory + */ + class jdir + { + private: + std::string _dirname; + //std::string _base_filename; + + public: + + /** + * \brief Sole constructor + * + * \param dirname Name of directory to be managed. + * \param base_filename Filename root used in the creation of %journal files + * and sub-directories. + */ + jdir(const std::string& dirname/*, const std::string& base_filename*/); + + virtual ~jdir(); + + + /** + * \brief Create %journal directory as set in the dirname parameter of the constructor. + * Recursive creation is supported. + * + * \exception jerrno::JERR_JDIR_MKDIR The creation of dirname failed. + */ + void create_dir(); + + /** + * \brief Static function to create a directory. Recursive creation is supported. + * + * \param dirname C-string containing name of directory. + * + * \exception jerrno::JERR_JDIR_MKDIR The creation of dirname failed. + */ + static void create_dir(const char* dirname); + + /** + * \brief Static function to create a directory. Recursive creation is supported. + * + * \param dirname String containing name of directory. + * + * \exception jerrno::JERR_JDIR_MKDIR The creation of dirname failed. + */ + static void create_dir(const std::string& dirname); + + + /** + * \brief Clear the %journal directory of files matching the base filename + * by moving them into a subdirectory. This fn uses the dirname and base_filename + * that were set on construction. + * + * \param create_flag If set, create dirname if it is non-existent, otherwise throw + * exception. + * + * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened. + * \exception jerrno::JERR_JDIR_FMOVE Moving the files from the %journal directory to the created backup + * directory failed. + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + */ + void clear_dir(const bool create_flag = true); + + /** + * \brief Clear the directory dirname of %journal files matching base_filename + * by moving them into a subdirectory. + * + * \param dirname C-string containing name of %journal directory. + * \param base_filename C-string containing base filename of %journal files to be matched + * for moving into subdirectory. + * \param create_flag If set, create dirname if it is non-existent, otherwise throw + * exception + * + * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened. + * \exception jerrno::JERR_JDIR_FMOVE Moving the files from the %journal directory to the created backup + * directory failed. + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + */ + static void clear_dir(const char* dirname/*, const char* base_filename*/, + const bool create_flag = true); + + /** + * \brief Clear the directory dirname of %journal files matching base_filename + * by moving them into a subdirectory. + * + * \param dirname String containing name of %journal directory. + * \param base_filename String containing base filename of %journal files to be matched + * for moving into subdirectory. + * \param create_flag If set, create dirname if it is non-existent, otherwise throw + * exception + * + * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened. + * \exception jerrno::JERR_JDIR_FMOVE Moving the files from the %journal directory to the created backup + * directory failed. + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + */ + static void clear_dir(const std::string& dirname/*, const std::string& base_filename*/, + const bool create_flag = true); + + + + /** + * \brief Move (push down) the directory target_dir located in directory dirname into a backup directory + * named _bak_dir_base.XXXX (note prepended underscore), where XXXX is an increasing hex serial number + * starting at 0000. + * + * \param dirname Full path to directory containing directory to be pushed down. + * \param target_dir Name of directory in dirname to be pushed down. + * \param bak_dir_base Base name for backup directory to be created in dirname, into which target_dir will be moved. + * \return Name of backup dir into which target_dir was pushed. + */ + static std::string push_down(const std::string& dirname, const std::string& target_dir/*, const std::string& bak_dir_base*/); + + + /** + * \brief Verify that dirname is a valid %journal directory. + * + * The validation reads the .%jinf file, and using this information verifies that all the expected %journal + * (.jdat) files are present. + * + * \exception jerrno::JERR_JDIR_NOTDIR dirname is not a directory + * \exception jerrno::JERR_JDIR_STAT Could not stat dirname + * \exception jerrno::JERR__FILEIO Error reading %jinf file + * \exception jerrno::JERR_JINF_CVALIDFAIL Error validating %jinf file + * \exception jerrno::JERR_JDIR_NOSUCHFILE Expected jdat file is missing + */ + void verify_dir(); + + /** + * \brief Verify that dirname is a valid %journal directory. + * + * The validation reads the .%jinf file, and using this information verifies that all the expected %journal + * (.jdat) files are present. + * + * \param dirname C-string containing name of %journal directory. + * \param base_filename C-string containing base filename of %journal files to be matched for moving into sub-directory. + * + * \exception jerrno::JERR_JDIR_NOTDIR dirname is not a directory + * \exception jerrno::JERR_JDIR_STAT Could not stat dirname + * \exception jerrno::JERR__FILEIO Error reading %jinf file + * \exception jerrno::JERR_JINF_CVALIDFAIL Error validating %jinf file + * \exception jerrno::JERR_JDIR_NOSUCHFILE Expected jdat file is missing + */ + static void verify_dir(const char* dirname/*, const char* base_filename*/); + + /** + * \brief Verify that dirname is a valid %journal directory. + * + * The validation reads the .%jinf file, and using this information verifies that all the expected %journal + * (.jdat) files are present. + * + * \param dirname String containing name of %journal directory. + * \param base_filename String containing base filename of %journal files to be matched for moving into sub-directory. + * + * \exception jerrno::JERR_JDIR_NOTDIR dirname is not a directory + * \exception jerrno::JERR_JDIR_STAT Could not stat dirname + * \exception jerrno::JERR__FILEIO Error reading %jinf file + * \exception jerrno::JERR_JINF_CVALIDFAIL Error validating %jinf file + * \exception jerrno::JERR_JDIR_NOSUCHFILE Expected jdat file is missing + */ + static void verify_dir(const std::string& dirname/*, const std::string& base_filename*/); + + /** + * \brief Delete the %journal directory and all files and sub--directories that it may + * contain. This is equivilent of rm -rf. + * + * FIXME: links are not handled correctly. + * + * \param children_only If true, delete only children of dirname, but leave dirname itself. + * + * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened. + * \exception jerrno::JERR_JDIR_STAT Could not stat dirname. + * \exception jerrno::JERR_JDIR_UNLINK A file could not be deleted. + * \exception jerrno::JERR_JDIR_BADFTYPE A dir entry is neiter a file nor a dir. + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + * \exception jerrno::JERR_JDIR_RMDIR A directory could not be deleted. + */ + void delete_dir(bool children_only = false ); + + /** + * \brief Delete the %journal directory and all files and sub--directories that it may + * contain. This is equivilent of rm -rf. + * + * FIXME: links are not handled correctly. + * + * \param dirname C-string containing name of directory to be deleted. + * \param children_only If true, delete only children of dirname, but leave dirname itself. + * + * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened. + * \exception jerrno::JERR_JDIR_STAT Could not stat dirname. + * \exception jerrno::JERR_JDIR_UNLINK A file could not be deleted. + * \exception jerrno::JERR_JDIR_BADFTYPE A dir entry is neiter a file nor a dir. + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + * \exception jerrno::JERR_JDIR_RMDIR A directory could not be deleted. + */ + static void delete_dir(const char* dirname, bool children_only = false); + + /** + * \brief Delete the %journal directory and all files and sub--directories that it may + * contain. This is equivilent of rm -rf. + * + * FIXME: links are not handled correctly. + * + * \param dirname String containing name of directory to be deleted. + * \param children_only If true, delete only children of dirname, but leave dirname itself. + * + * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened. + * \exception jerrno::JERR_JDIR_STAT Could not stat dirname. + * \exception jerrno::JERR_JDIR_UNLINK A file could not be deleted. + * \exception jerrno::JERR_JDIR_BADFTYPE A dir entry is neiter a file nor a dir. + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + * \exception jerrno::JERR_JDIR_RMDIR A directory could not be deleted. + */ + static void delete_dir(const std::string& dirname, bool children_only = false); + + /** + * \brief Create bakup directory that is next in sequence and move all %journal files + * matching base_filename into it. + * + * In directory dirname, search for existing backup directory using pattern + * "_basename.bak.XXXX" where XXXX is a hexadecimal sequence, and create next directory + * based on highest number found. Move all %journal files which match the base_fileaname + * parameter into this new backup directory. + * + * \param dirname String containing name of %journal directory. + * \param base_filename String containing base filename of %journal files to be matched + * for moving into subdirectory. + * + * \exception jerrno::JERR_JDIR_OPENDIR The %journal directory could not be opened. + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + * \exception jerrno::JERR_JDIR_MKDIR The backup directory could not be deleted. + */ + static std::string create_bak_dir(const std::string& dirname/*, + const std::string& base_filename*/); + + /** + * \brief Return the directory name as a string. + */ + inline const std::string& dirname() const { return _dirname; } + + /** + * \brief Return the %journal base filename name as a string. + */ +// inline const std::string& base_filename() const { return _base_filename; } + + /** + * \brief Test whether the named file is a directory. + * + * \param name Name of file to be tested. + * \return <b><i>true</i></b> if the named file is a directory; <b><i>false</i></b> + * otherwise. + * \exception jerrno::JERR_JDIR_STAT Could not stat name. + */ + static bool is_dir(const char* name); + + /** + * \brief Test whether the named file is a directory. + * + * \param name Name of file to be tested. + * \return <b><i>true</i></b> if the named file is a directory; <b><i>false</i></b> + * otherwise. + * \exception jerrno::JERR_JDIR_STAT Could not stat name. + */ + static bool is_dir(const std::string& name); + + + /** + * \brief Test whether the named entity exists on the filesystem. + * + * If stat() fails with error ENOENT, then this will return <b><i>false</i></b>. If + * stat() succeeds, then <b><i>true</i></b> is returned, irrespective of the file type. + * If stat() fails with any other error, an exception is thrown. + * + * \param name Name of entity to be tested. + * \return <b><i>true</i></b> if the named entity exists; <b><i>false</i></b> + * otherwise. + * \exception jerrno::JERR_JDIR_STAT Could not stat name. + */ + static bool exists(const char* name); + + /** + * \brief Test whether the named entity exists on the filesystem. + * + * If stat() fails with error ENOENT, then this will return <b><i>false</i></b>. If + * stat() succeeds, then <b><i>true</i></b> is returned, irrespective of the file type. + * If stat() fails with any other error, an exception is thrown. + * + * \param name Name of entity to be tested. + * \return <b><i>true</i></b> if the named entity exists; <b><i>false</i></b> + * otherwise. + * \exception jerrno::JERR_JDIR_STAT Could not stat name. + */ + static bool exists(const std::string& name); + + static void read_dir(const std::string& name, std::vector<std::string>& dir_list, const bool incl_dirs, const bool incl_files, const bool incl_links, const bool return_fqfn); + + /** + * \brief Stream operator + */ + friend std::ostream& operator<<(std::ostream& os, const jdir& jdir); + + /** + * \brief Stream operator + */ + friend std::ostream& operator<<(std::ostream& os, const jdir* jdirPtr); + + private: + /** + * \brief Check for error, if non-zero close DIR handle and throw JERR_JDIR_READDIR + * + * \exception jerrno::JERR_JDIR_READDIR Error while reading contents of dir. + */ + static void check_err(const int err_num, DIR* dir, const std::string& dir_name, const std::string& fn_name); + + /** + * \brief Close a DIR handle, throw JERR_JDIR_CLOSEDIR if error occurs during close + * + * \exception jerrno::JERR_JDIR_CLOSEDIR The directory handle could not be closed. + */ + static void close_dir(DIR* dir, const std::string& dir_name, const std::string& fn_name); + + static DIR* open_dir(const std::string& dir_name, const std::string& fn_name, const bool test_enoent); + }; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_JDIR_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp b/qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp new file mode 100644 index 0000000000..ce88e7809c --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp @@ -0,0 +1,236 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/jerrno.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +std::map<uint32_t, const char*> jerrno::_err_map; +std::map<uint32_t, const char*>::iterator jerrno::_err_map_itr; +bool jerrno::_initialized = jerrno::__init(); + +// generic errors +const uint32_t jerrno::JERR__MALLOC = 0x0100; +const uint32_t jerrno::JERR__UNDERFLOW = 0x0101; +const uint32_t jerrno::JERR__NINIT = 0x0102; +const uint32_t jerrno::JERR__AIO = 0x0103; +const uint32_t jerrno::JERR__FILEIO = 0x0104; +const uint32_t jerrno::JERR__RTCLOCK = 0x0105; +const uint32_t jerrno::JERR__PTHREAD = 0x0106; +const uint32_t jerrno::JERR__TIMEOUT = 0x0107; +const uint32_t jerrno::JERR__UNEXPRESPONSE = 0x0108; +const uint32_t jerrno::JERR__RECNFOUND = 0x0109; +const uint32_t jerrno::JERR__NOTIMPL = 0x010a; +const uint32_t jerrno::JERR__NULL = 0x010b; +const uint32_t jerrno::JERR__SYMLINK = 0x010c; + +// class jcntl +const uint32_t jerrno::JERR_JCNTL_STOPPED = 0x0200; +const uint32_t jerrno::JERR_JCNTL_READONLY = 0x0201; +const uint32_t jerrno::JERR_JCNTL_AIOCMPLWAIT = 0x0202; +const uint32_t jerrno::JERR_JCNTL_UNKNOWNMAGIC = 0x0203; +const uint32_t jerrno::JERR_JCNTL_NOTRECOVERED = 0x0204; +const uint32_t jerrno::JERR_JCNTL_ENQSTATE = 0x0207; +const uint32_t jerrno::JERR_JCNTL_INVALIDENQHDR = 0x0208; + +// class jdir +const uint32_t jerrno::JERR_JDIR_NOTDIR = 0x0300; +const uint32_t jerrno::JERR_JDIR_MKDIR = 0x0301; +const uint32_t jerrno::JERR_JDIR_OPENDIR = 0x0302; +const uint32_t jerrno::JERR_JDIR_READDIR = 0x0303; +const uint32_t jerrno::JERR_JDIR_CLOSEDIR = 0x0304; +const uint32_t jerrno::JERR_JDIR_RMDIR = 0x0305; +const uint32_t jerrno::JERR_JDIR_NOSUCHFILE = 0x0306; +const uint32_t jerrno::JERR_JDIR_FMOVE = 0x0307; +const uint32_t jerrno::JERR_JDIR_STAT = 0x0308; +const uint32_t jerrno::JERR_JDIR_UNLINK = 0x0309; +const uint32_t jerrno::JERR_JDIR_BADFTYPE = 0x030a; + +// class JournalFile +const uint32_t jerrno::JERR_JNLF_OPEN = 0x0400; +const uint32_t jerrno::JERR_JNLF_CLOSE = 0x0401; +const uint32_t jerrno::JERR_JNLF_FILEOFFSOVFL = 0x0402; +const uint32_t jerrno::JERR_JNLF_CMPLOFFSOVFL = 0x0403; + +// class LinearFileController +const uint32_t jerrno::JERR_LFCR_SEQNUMNOTFOUND = 0x0500; + +// class jrec, enq_rec, deq_rec, txn_rec +const uint32_t jerrno::JERR_JREC_BADRECHDR = 0x0700; +const uint32_t jerrno::JERR_JREC_BADRECTAIL = 0x0701; + +// class wmgr +const uint32_t jerrno::JERR_WMGR_BADPGSTATE = 0x0801; +const uint32_t jerrno::JERR_WMGR_BADDTOKSTATE = 0x0802; +const uint32_t jerrno::JERR_WMGR_ENQDISCONT = 0x0803; +const uint32_t jerrno::JERR_WMGR_DEQDISCONT = 0x0804; +const uint32_t jerrno::JERR_WMGR_DEQRIDNOTENQ = 0x0805; +const uint32_t jerrno::JERR_WMGR_BADFH = 0x0806; +const uint32_t jerrno::JERR_WMGR_NOTSBLKALIGNED = 0x0807; + +// class RecoveryManager +const uint32_t jerrno::JERR_RCVM_OPENRD = 0x0900; +const uint32_t jerrno::JERR_RCVM_STREAMBAD = 0x0901; +const uint32_t jerrno::JERR_RCVM_READ = 0x0902; +const uint32_t jerrno::JERR_RCVM_WRITE = 0x0903; +const uint32_t jerrno::JERR_RCVM_NULLXID = 0x0904; +const uint32_t jerrno::JERR_RCVM_NOTDBLKALIGNED = 0x0905; +const uint32_t jerrno::JERR_RCVM_NULLFID = 0x0907; +const uint32_t jerrno::JERR_RCVM_INVALIDEFPID = 0x0908; + +// class data_tok +const uint32_t jerrno::JERR_DTOK_ILLEGALSTATE = 0x0a00; +// const uint32_t jerrno::JERR_DTOK_RIDNOTSET = 0x0a01; + +// class enq_map, txn_map +const uint32_t jerrno::JERR_MAP_DUPLICATE = 0x0b00; +const uint32_t jerrno::JERR_MAP_NOTFOUND = 0x0b01; +const uint32_t jerrno::JERR_MAP_LOCKED = 0x0b02; + +// EFP errors +const uint32_t jerrno::JERR_EFP_BADPARTITIONNAME = 0x0d01; +const uint32_t jerrno::JERR_EFP_BADPARTITIONDIR = 0x0d02; +const uint32_t jerrno::JERR_EFP_BADEFPDIRNAME = 0x0d03; +const uint32_t jerrno::JERR_EFP_NOEFP = 0x0d04; +const uint32_t jerrno::JERR_EFP_EMPTY = 0x0d05; +const uint32_t jerrno::JERR_EFP_LSTAT = 0x0d06; +const uint32_t jerrno::JERR_EFP_BADFILETYPE = 0x0d07; +const uint32_t jerrno::JERR_EFP_FOPEN = 0x0d08; +const uint32_t jerrno::JERR_EFP_FWRITE = 0x0d09; +const uint32_t jerrno::JERR_EFP_MKDIR = 0x0d0a; + +// Negative returns for some functions +const int32_t jerrno::AIO_TIMEOUT = -1; +const int32_t jerrno::LOCK_TAKEN = -2; + + +// static initialization fn + +bool +jerrno::__init() +{ + // generic errors + _err_map[JERR__MALLOC] = "JERR__MALLOC: Buffer memory allocation failed."; + _err_map[JERR__UNDERFLOW] = "JERR__UNDERFLOW: Underflow error"; + _err_map[JERR__NINIT] = "JERR__NINIT: Operation on uninitialized class."; + _err_map[JERR__AIO] = "JERR__AIO: AIO error."; + _err_map[JERR__FILEIO] = "JERR__FILEIO: File read or write failure."; + _err_map[JERR__RTCLOCK] = "JERR__RTCLOCK: Reading real-time clock failed."; + _err_map[JERR__PTHREAD] = "JERR__PTHREAD: pthread failure."; + _err_map[JERR__TIMEOUT] = "JERR__TIMEOUT: Timeout waiting for event."; + _err_map[JERR__UNEXPRESPONSE] = "JERR__UNEXPRESPONSE: Unexpected response to call or event."; + _err_map[JERR__RECNFOUND] = "JERR__RECNFOUND: Record not found."; + _err_map[JERR__NOTIMPL] = "JERR__NOTIMPL: Not implemented"; + _err_map[JERR__NULL] = "JERR__NULL: Operation on null pointer"; + _err_map[JERR__SYMLINK] = "JERR__SYMLINK: Symbolic link operation failed"; + + // class jcntl + _err_map[JERR_JCNTL_STOPPED] = "JERR_JCNTL_STOPPED: Operation on stopped journal."; + _err_map[JERR_JCNTL_READONLY] = "JERR_JCNTL_READONLY: Write operation on read-only journal (during recovery)."; + _err_map[JERR_JCNTL_AIOCMPLWAIT] = "JERR_JCNTL_AIOCMPLWAIT: Timeout waiting for AIOs to complete."; + _err_map[JERR_JCNTL_UNKNOWNMAGIC] = "JERR_JCNTL_UNKNOWNMAGIC: Found record with unknown magic."; + _err_map[JERR_JCNTL_NOTRECOVERED] = "JERR_JCNTL_NOTRECOVERED: Operation requires recover() to be run first."; + _err_map[JERR_JCNTL_ENQSTATE] = "JERR_JCNTL_ENQSTATE: Read error: Record not in ENQ state"; + _err_map[JERR_JCNTL_INVALIDENQHDR] = "JERR_JCNTL_INVALIDENQHDR: Invalid ENQ header"; + + // class jdir + _err_map[JERR_JDIR_NOTDIR] = "JERR_JDIR_NOTDIR: Directory name exists but is not a directory."; + _err_map[JERR_JDIR_MKDIR] = "JERR_JDIR_MKDIR: Directory creation failed."; + _err_map[JERR_JDIR_OPENDIR] = "JERR_JDIR_OPENDIR: Directory open failed."; + _err_map[JERR_JDIR_READDIR] = "JERR_JDIR_READDIR: Directory read failed."; + _err_map[JERR_JDIR_CLOSEDIR] = "JERR_JDIR_CLOSEDIR: Directory close failed."; + _err_map[JERR_JDIR_RMDIR] = "JERR_JDIR_RMDIR: Directory delete failed."; + _err_map[JERR_JDIR_NOSUCHFILE] = "JERR_JDIR_NOSUCHFILE: File does not exist."; + _err_map[JERR_JDIR_FMOVE] = "JERR_JDIR_FMOVE: File move failed."; + _err_map[JERR_JDIR_STAT] = "JERR_JDIR_STAT: File stat failed."; + _err_map[JERR_JDIR_UNLINK] = "JERR_JDIR_UNLINK: File delete failed."; + _err_map[JERR_JDIR_BADFTYPE] = "JERR_JDIR_BADFTYPE: Bad or unknown file type (stat mode)."; + + // class JournalFile + _err_map[JERR_JNLF_OPEN] = "JERR_JNLF_OPEN: Unable to open file for write"; + _err_map[JERR_JNLF_CLOSE] = "JERR_JNLF_CLOSE: Unable to close file"; + _err_map[JERR_JNLF_FILEOFFSOVFL] = "JERR_JNLF_FILEOFFSOVFL: Attempted to increase submitted offset past file size."; + _err_map[JERR_JNLF_CMPLOFFSOVFL] = "JERR_JNLF_CMPLOFFSOVFL: Attempted to increase completed file offset past submitted offset."; + + // class LinearFileController + _err_map[JERR_LFCR_SEQNUMNOTFOUND] = "JERR_LFCR_SEQNUMNOTFOUND: File sequence number not found"; + + // class jrec, enq_rec, deq_rec, txn_rec + _err_map[JERR_JREC_BADRECHDR] = "JERR_JREC_BADRECHDR: Invalid record header."; + _err_map[JERR_JREC_BADRECTAIL] = "JERR_JREC_BADRECTAIL: Invalid record tail."; + + // class wmgr + _err_map[JERR_WMGR_BADPGSTATE] = "JERR_WMGR_BADPGSTATE: Page buffer in illegal state for operation."; + _err_map[JERR_WMGR_BADDTOKSTATE] = "JERR_WMGR_BADDTOKSTATE: Data token in illegal state for operation."; + _err_map[JERR_WMGR_ENQDISCONT] = "JERR_WMGR_ENQDISCONT: Enqueued new dtok when previous enqueue returned partly completed (state ENQ_PART)."; + _err_map[JERR_WMGR_DEQDISCONT] = "JERR_WMGR_DEQDISCONT: Dequeued new dtok when previous dequeue returned partly completed (state DEQ_PART)."; + _err_map[JERR_WMGR_DEQRIDNOTENQ] = "JERR_WMGR_DEQRIDNOTENQ: Dequeue rid is not enqueued."; + _err_map[JERR_WMGR_BADFH] = "JERR_WMGR_BADFH: Bad file handle."; + _err_map[JERR_WMGR_NOTSBLKALIGNED] = "JERR_WMGR_NOTSBLKALIGNED: Offset is not soft block (sblk)-aligned"; + + // class RecoveryManager + _err_map[JERR_RCVM_OPENRD] = "JERR_RCVM_OPENRD: Unable to open file for read"; + _err_map[JERR_RCVM_STREAMBAD] = "JERR_RCVM_STREAMBAD: Read/write stream error"; + _err_map[JERR_RCVM_READ] = "JERR_RCVM_READ: Read error: no or insufficient data to read"; + _err_map[JERR_RCVM_WRITE] = "JERR_RCVM_WRITE: Write error"; + _err_map[JERR_RCVM_NULLXID] = "JERR_RCVM_NULLXID: Null XID when XID length non-null in header"; + _err_map[JERR_RCVM_NOTDBLKALIGNED] = "JERR_RCVM_NOTDBLKALIGNED: Offset is not data block (dblk)-aligned"; + _err_map[JERR_RCVM_NULLFID] = "JERR_RCVM_NULLFID: Null file id (FID)"; + _err_map[JERR_RCVM_INVALIDEFPID] = "JERR_RCVM_INVALIDEFPID: Invalid EFP identity (partition/size)"; + + // class data_tok + _err_map[JERR_DTOK_ILLEGALSTATE] = "JERR_MTOK_ILLEGALSTATE: Attempted to change to illegal state."; + //_err_map[JERR_DTOK_RIDNOTSET] = "JERR_DTOK_RIDNOTSET: Record ID not set."; + + // class enq_map, txn_map + _err_map[JERR_MAP_DUPLICATE] = "JERR_MAP_DUPLICATE: Attempted to insert record into map using duplicate key."; + _err_map[JERR_MAP_NOTFOUND] = "JERR_MAP_NOTFOUND: Key not found in map."; + _err_map[JERR_MAP_LOCKED] = "JERR_MAP_LOCKED: Record ID locked by a pending transaction."; + + // EFP errors + _err_map[JERR_EFP_BADPARTITIONNAME] = "JERR_EFP_BADPARTITIONNAME: Invalid partition name (must be \'pNNN\' where NNN is a non-zero number)"; + _err_map[JERR_EFP_BADEFPDIRNAME] = "JERR_EFP_BADEFPDIRNAME: Bad Empty File Pool directory name (must be \'NNNk\', where NNN is a number which is a multiple of 4)"; + _err_map[JERR_EFP_BADPARTITIONDIR] = "JERR_EFP_BADPARTITIONDIR: Invalid partition directory"; + _err_map[JERR_EFP_NOEFP] = "JERR_EFP_NOEFP: No Empty File Pool found for given partition and empty file size"; + _err_map[JERR_EFP_EMPTY] = "JERR_EFP_EMPTY: Empty File Pool is empty"; + _err_map[JERR_EFP_LSTAT] = "JERR_EFP_LSTAT: lstat() operation failed"; + _err_map[JERR_EFP_BADFILETYPE] = "JERR_EFP_BADFILETYPE: File type incorrect for operation"; + _err_map[JERR_EFP_FOPEN] = "JERR_EFP_FOPEN: Unable to fopen file for write"; + _err_map[JERR_EFP_FWRITE] = "JERR_EFP_FWRITE: Write failed"; + _err_map[JERR_EFP_MKDIR] = "JERR_EFP_MKDIR: Directory creation failed"; + + //_err_map[] = ""; + + return true; +} + +const char* +jerrno::err_msg(const uint32_t err_no) throw () +{ + _err_map_itr = _err_map.find(err_no); + if (_err_map_itr == _err_map.end()) + return "<Unknown error code>"; + return _err_map_itr->second; +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/jerrno.h b/qpid/cpp/src/qpid/linearstore/journal/jerrno.h new file mode 100644 index 0000000000..6e817682ca --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/jerrno.h @@ -0,0 +1,157 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_JERRNO_H +#define QPID_LINEARSTORE_JOURNAL_JERRNO_H + +namespace qpid { +namespace linearstore { +namespace journal { +class jerrno; +}}} + +#include <map> +#include <stdint.h> +#include <string> + +namespace qpid { +namespace linearstore { +namespace journal { + + /** + * \class jerrno + * \brief Class containing static error definitions and static map for error messages. + */ + class jerrno + { + static std::map<uint32_t, const char*> _err_map; ///< Map of error messages + static std::map<uint32_t, const char*>::iterator _err_map_itr; ///< Iterator + static bool _initialized; ///< Dummy flag, used to initialise map. + + public: + // generic errors + static const uint32_t JERR__MALLOC; ///< Buffer memory allocation failed + static const uint32_t JERR__UNDERFLOW; ///< Underflow error + static const uint32_t JERR__NINIT; ///< Operation on uninitialized class + static const uint32_t JERR__AIO; ///< AIO failure + static const uint32_t JERR__FILEIO; ///< File read or write failure + static const uint32_t JERR__RTCLOCK; ///< Reading real-time clock failed + static const uint32_t JERR__PTHREAD; ///< pthread failure + static const uint32_t JERR__TIMEOUT; ///< Timeout waiting for an event + static const uint32_t JERR__UNEXPRESPONSE; ///< Unexpected response to call or event + static const uint32_t JERR__RECNFOUND; ///< Record not found + static const uint32_t JERR__NOTIMPL; ///< Not implemented + static const uint32_t JERR__NULL; ///< Operation on null pointer + static const uint32_t JERR__SYMLINK; ///< Symbolic Link operation failed + + // class jcntl + static const uint32_t JERR_JCNTL_STOPPED; ///< Operation on stopped journal + static const uint32_t JERR_JCNTL_READONLY; ///< Write operation on read-only journal + static const uint32_t JERR_JCNTL_AIOCMPLWAIT; ///< Timeout waiting for AIOs to complete + static const uint32_t JERR_JCNTL_UNKNOWNMAGIC; ///< Found record with unknown magic + static const uint32_t JERR_JCNTL_NOTRECOVERED; ///< Req' recover() to be called first + static const uint32_t JERR_JCNTL_ENQSTATE; ///< Read error: Record not in ENQ state + static const uint32_t JERR_JCNTL_INVALIDENQHDR; ///< Invalid ENQ header + + // class jdir + static const uint32_t JERR_JDIR_NOTDIR; ///< Exists but is not a directory + static const uint32_t JERR_JDIR_MKDIR; ///< Directory creation failed + static const uint32_t JERR_JDIR_OPENDIR; ///< Directory open failed + static const uint32_t JERR_JDIR_READDIR; ///< Directory read failed + static const uint32_t JERR_JDIR_CLOSEDIR; ///< Directory close failed + static const uint32_t JERR_JDIR_RMDIR; ///< Directory delete failed + static const uint32_t JERR_JDIR_NOSUCHFILE; ///< File does not exist + static const uint32_t JERR_JDIR_FMOVE; ///< File move failed + static const uint32_t JERR_JDIR_STAT; ///< File stat failed + static const uint32_t JERR_JDIR_UNLINK; ///< File delete failed + static const uint32_t JERR_JDIR_BADFTYPE; ///< Bad or unknown file type (stat mode) + + // class JournalFile + static const uint32_t JERR_JNLF_OPEN; ///< Unable to open file for write + static const uint32_t JERR_JNLF_CLOSE; ///< Unable to close file + static const uint32_t JERR_JNLF_FILEOFFSOVFL; ///< Increased offset past file size + static const uint32_t JERR_JNLF_CMPLOFFSOVFL; ///< Increased cmpl offs past subm offs + + // class LinearFileController + static const uint32_t JERR_LFCR_SEQNUMNOTFOUND; ///< File sequence number not found + + // class jrec, enq_rec, deq_rec, txn_rec + static const uint32_t JERR_JREC_BADRECHDR; ///< Invalid data record header + static const uint32_t JERR_JREC_BADRECTAIL; ///< Invalid data record tail + + // class wmgr + static const uint32_t JERR_WMGR_BADPGSTATE; ///< Page buffer in illegal state. + static const uint32_t JERR_WMGR_BADDTOKSTATE; ///< Data token in illegal state. + static const uint32_t JERR_WMGR_ENQDISCONT; ///< Enq. new dtok when previous part compl. + static const uint32_t JERR_WMGR_DEQDISCONT; ///< Deq. new dtok when previous part compl. + static const uint32_t JERR_WMGR_DEQRIDNOTENQ; ///< Deq. rid not enqueued + static const uint32_t JERR_WMGR_BADFH; ///< Bad file handle + static const uint32_t JERR_WMGR_NOTSBLKALIGNED; ///< Offset is not soft block (sblk)-aligned + + // class RecoveryManager + static const uint32_t JERR_RCVM_OPENRD; ///< Unable to open file for read + static const uint32_t JERR_RCVM_STREAMBAD; ///< Read/write stream error + static const uint32_t JERR_RCVM_READ; ///< Read error: no or insufficient data to read + static const uint32_t JERR_RCVM_WRITE; ///< Write error + static const uint32_t JERR_RCVM_NULLXID; ///< Null XID when XID length non-null in header + static const uint32_t JERR_RCVM_NOTDBLKALIGNED; ///< Offset is not data block (dblk)-aligned + static const uint32_t JERR_RCVM_NULLFID; ///< Null file ID (FID) + static const uint32_t JERR_RCVM_INVALIDEFPID; ///< Invalid EFP identity (partition/size) + + // class data_tok + static const uint32_t JERR_DTOK_ILLEGALSTATE; ///< Attempted to change to illegal state +// static const uint32_t JERR_DTOK_RIDNOTSET; ///< Record ID not set + + // class enq_map, txn_map + static const uint32_t JERR_MAP_DUPLICATE; ///< Attempted to insert using duplicate key + static const uint32_t JERR_MAP_NOTFOUND; ///< Key not found in map + static const uint32_t JERR_MAP_LOCKED; ///< rid locked by pending txn + + // EFP errors + static const uint32_t JERR_EFP_BADPARTITIONNAME; ///< Partition name invalid or of value 0 + static const uint32_t JERR_EFP_BADEFPDIRNAME; ///< Empty File Pool directory name invalid + static const uint32_t JERR_EFP_BADPARTITIONDIR; ///< Invalid partition directory + static const uint32_t JERR_EFP_NOEFP; ///< No EFP found for given partition and file size + static const uint32_t JERR_EFP_EMPTY; ///< EFP empty + static const uint32_t JERR_EFP_LSTAT; ///< lstat operation failed + static const uint32_t JERR_EFP_BADFILETYPE; ///< Bad file type + static const uint32_t JERR_EFP_FOPEN; ///< Unable to fopen file for write + static const uint32_t JERR_EFP_FWRITE; ///< Write failed + static const uint32_t JERR_EFP_MKDIR; ///< Directory creation failed + + // Negative returns for some functions + static const int32_t AIO_TIMEOUT; ///< Timeout waiting for AIO return + static const int32_t LOCK_TAKEN; ///< Attempted to take lock, but it was taken by another thread + /** + * \brief Method to access error message from known error number. + */ + static const char* err_msg(const uint32_t err_no) throw (); + + private: + /** + * \brief Static function to initialize map. + */ + static bool __init(); + }; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_JERRNO_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/jexception.cpp b/qpid/cpp/src/qpid/linearstore/journal/jexception.cpp new file mode 100644 index 0000000000..49f486746a --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/jexception.cpp @@ -0,0 +1,168 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/jexception.h" + +#include <iomanip> + +#define CATLEN(p) MAX_MSG_SIZE - std::strlen(p) - 1 + +namespace qpid { +namespace linearstore { +namespace journal { + +jexception::jexception() throw (): + std::exception(), + _err_code(0) +{ + format(); +} + +jexception::jexception(const uint32_t err_code) throw (): + std::exception(), + _err_code(err_code) +{ + format(); +} + +jexception::jexception(const char* additional_info) throw (): + std::exception(), + _err_code(0), + _additional_info(additional_info) +{ + format(); +} + +jexception::jexception(const std::string& additional_info) throw (): + std::exception(), + _err_code(0), + _additional_info(additional_info) +{ + format(); +} + +jexception::jexception(const uint32_t err_code, const char* additional_info) throw (): + std::exception(), + _err_code(err_code), + _additional_info(additional_info) +{ + format(); +} + +jexception::jexception(const uint32_t err_code, const std::string& additional_info) throw (): + std::exception(), + _err_code(err_code), + _additional_info(additional_info) +{ + format(); +} + +jexception::jexception(const uint32_t err_code, const char* throwing_class, + const char* throwing_fn) throw (): + std::exception(), + _err_code(err_code), + _throwing_class(throwing_class), + _throwing_fn(throwing_fn) +{ + format(); +} + +jexception::jexception(const uint32_t err_code, const std::string& throwing_class, + const std::string& throwing_fn) throw (): + std::exception(), + _err_code(err_code), + _throwing_class(throwing_class), + _throwing_fn(throwing_fn) +{ + format(); +} + +jexception::jexception(const uint32_t err_code, const char* additional_info, + const char* throwing_class, const char* throwing_fn) throw (): + std::exception(), + _err_code(err_code), + _additional_info(additional_info), + _throwing_class(throwing_class), + _throwing_fn(throwing_fn) +{ + format(); +} + +jexception::jexception(const uint32_t err_code, const std::string& additional_info, + const std::string& throwing_class, const std::string& throwing_fn) throw (): + std::exception(), + _err_code(err_code), + _additional_info(additional_info), + _throwing_class(throwing_class), + _throwing_fn(throwing_fn) +{ + format(); +} + +jexception::~jexception() throw () +{} + +void +jexception::format() +{ + const bool ai = !_additional_info.empty(); + const bool tc = !_throwing_class.empty(); + const bool tf = !_throwing_fn.empty(); + std::ostringstream oss; + oss << "jexception 0x" << std::hex << std::setfill('0') << std::setw(4) << _err_code << " "; + if (tc) + { + oss << _throwing_class; + if (tf) + oss << "::"; + else + oss << " "; + } + if (tf) + oss << _throwing_fn << "() "; + if (tc || tf) + oss << "threw " << jerrno::err_msg(_err_code); + if (ai) + oss << " (" << _additional_info << ")"; + _what.assign(oss.str()); +} + +const char* +jexception::what() const throw () +{ + return _what.c_str(); +} + +std::ostream& +operator<<(std::ostream& os, const jexception& je) +{ + os << je.what(); + return os; +} + +std::ostream& +operator<<(std::ostream& os, const jexception* jePtr) +{ + os << jePtr->what(); + return os; +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/jexception.h b/qpid/cpp/src/qpid/linearstore/journal/jexception.h new file mode 100644 index 0000000000..d03ee32e3f --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/jexception.h @@ -0,0 +1,125 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_JEXCEPTION_H +#define QPID_LINEARSTORE_JOURNAL_JEXCEPTION_H + +namespace qpid { +namespace linearstore { +namespace journal { +class jexception; +}}} + +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <exception> +#include "qpid/linearstore/journal/jerrno.h" +#include <sstream> +#include <string> + +// Macro for formatting commom system errors +#define FORMAT_SYSERR(errno) " errno=" << errno << " (" << std::strerror(errno) << ")" + +#define MALLOC_CHK(ptr, var, cls, fn) if(ptr == 0) { \ + clean(); \ + std::ostringstream oss; \ + oss << var << ": malloc() failed: " << FORMAT_SYSERR(errno); \ + throw jexception(jerrno::JERR__MALLOC, oss.str(), cls, fn); \ + } + +// TODO: The following is a temporary bug-tracking aid which forces a core. +// Replace with the commented out version below when BZ484048 is resolved. +#define PTHREAD_CHK(err, pfn, cls, fn) if(err != 0) { \ + std::ostringstream oss; \ + oss << cls << "::" << fn << "(): " << pfn; \ + errno = err; \ + ::perror(oss.str().c_str()); \ + ::abort(); \ + } +/* +#define PTHREAD_CHK(err, pfn, cls, fn) if(err != 0) { \ + std::ostringstream oss; \ + oss << pfn << " failed: " << FORMAT_SYSERR(err); \ + throw jexception(jerrno::JERR__PTHREAD, oss.str(), cls, fn); \ + } +*/ + +#define ASSERT(cond, msg) if(cond == 0) { \ + std::cerr << msg << std::endl; \ + ::abort(); \ + } + +namespace qpid { +namespace linearstore { +namespace journal { + + /** + * \class jexception + * \brief Generic journal exception class + */ + class jexception : public std::exception + { + private: + uint32_t _err_code; + std::string _additional_info; + std::string _throwing_class; + std::string _throwing_fn; + std::string _what; + void format(); + + public: + jexception() throw (); + + jexception(const uint32_t err_code) throw (); + + jexception(const char* additional_info) throw (); + jexception(const std::string& additional_info) throw (); + + jexception(const uint32_t err_code, const char* additional_info) throw (); + jexception(const uint32_t err_code, const std::string& additional_info) throw (); + + jexception(const uint32_t err_code, const char* throwing_class, const char* throwing_fn) + throw (); + jexception(const uint32_t err_code, const std::string& throwing_class, + const std::string& throwing_fn) throw (); + + jexception(const uint32_t err_code, const char* additional_info, + const char* throwing_class, const char* throwing_fn) throw (); + jexception(const uint32_t err_code, const std::string& additional_info, + const std::string& throwing_class, const std::string& throwing_fn) throw (); + + virtual ~jexception() throw (); + virtual const char* what() const throw (); // override std::exception::what() + + inline uint32_t err_code() const throw () { return _err_code; } + inline const std::string additional_info() const throw () { return _additional_info; } + inline const std::string throwing_class() const throw () { return _throwing_class; } + inline const std::string throwing_fn() const throw () { return _throwing_fn; } + + friend std::ostream& operator<<(std::ostream& os, const jexception& je); + friend std::ostream& operator<<(std::ostream& os, const jexception* jePtr); + }; // class jexception + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_JEXCEPTION_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/jrec.h b/qpid/cpp/src/qpid/linearstore/journal/jrec.h new file mode 100644 index 0000000000..cad0e5d7a2 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/jrec.h @@ -0,0 +1,122 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_JREC_H +#define QPID_LINEARSTORE_JOURNAL_JREC_H + +#include <fstream> +#include "qpid/linearstore/journal/jcfg.h" +#include <stdint.h> + +struct rec_hdr_t; + +namespace qpid { +namespace linearstore { +namespace journal { + +class Checksum; + +/** +* \class jrec +* \brief Abstract class for all file jrecords, both data and log. This class establishes +* the common data format and structure for these jrecords. +*/ +class jrec +{ +public: + jrec() {} + virtual ~jrec() {} + + /** + * \brief Encode this instance of jrec into the write buffer at the disk-block-aligned + * pointer wptr starting at position rec_offs_dblks in the encoded record to a + * maximum size of max_size_dblks. + * + * This call encodes the content of the data contianed in this instance of jrec into a + * disk-softblock-aligned (defined by JRNL_SBLK_SIZE) buffer pointed to by parameter + * wptr. No more than paramter max_size_dblks data-blocks may be written to the buffer. + * The parameter rec_offs_dblks is the offset in data-blocks within the fully encoded + * data block this instance represents at which to start encoding. + * + * Encoding entails writing the record header (struct enq_hdr), the data and the record tail + * (struct enq_tail). The record must be data-block-aligned (defined by JRNL_DBLK_SIZE), + * thus any remaining space in the final data-block is ignored; the returned value is the + * number of data-blocks consumed from the page by the encode action. Provided the initial + * alignment requirements are met, records may be of arbitrary size and may span multiple + * data-blocks, disk-blocks and/or pages. + * + * Since the record size in data-blocks is known, the general usage pattern is to call + * encode() as many times as is needed to fully encode the data. Each call to encode() + * will encode as much of the record as it can to what remains of the current page cache, + * and will return the number of data-blocks actually encoded. + * + * <b>Example:</b> Assume that record r1 was previously written to page 0, and that this + * is an instance representing record r2. Being larger than the page size ps, r2 would span + * multiple pages as follows: + * <pre> + * |<---ps--->| + * +----------+----------+----------+----... + * | |r2a| r2b | r2c | | + * |<-r1-><----------r2----------> | + * +----------+----------+----------+----... + * page: p0 p1 p2 + * </pre> + * Encoding record r2 will require multiple calls to encode; one for each page which + * is involved. Record r2 is divided logically into sections r2a, r2b and r2c at the + * points where the page boundaries intersect with the record. Assuming a page size + * of ps, the page boundary pointers are represented by their names p0, p1... and the + * sizes of the record segments are represented by their names r1, r2a, r2b..., the calls + * should be as follows: + * <pre> + * encode(p0+r1, 0, ps-r1); (returns r2a data-blocks) + * encode(p1, r2a, ps); (returns r2b data-blocks which equals ps) + * encode(p2, r2a+r2b, ps); (returns r2c data-blocks) + * </pre> + * + * \param wptr Data-block-aligned pointer to position in page buffer where encoding is to + * take place. + * \param rec_offs_dblks Offset in data-blocks within record from which to start encoding. + * \param max_size_dblks Maximum number of data-blocks to write to pointer wptr. + * \returns Number of data-blocks encoded. + */ + virtual uint32_t encode(void* wptr, uint32_t rec_offs_dblks, uint32_t max_size_dblks, Checksum& checksum) = 0; + virtual bool decode(::rec_hdr_t& h, std::ifstream* ifsp, std::size_t& rec_offs, const std::streampos rec_start) = 0; + + virtual std::string& str(std::string& str) const = 0; + virtual std::size_t data_size() const = 0; + virtual std::size_t xid_size() const = 0; + virtual std::size_t rec_size() const = 0; + inline virtual uint32_t rec_size_dblks() const { return size_dblks(rec_size()); } + static inline uint32_t size_dblks(const std::size_t size) + { return size_blks(size, QLS_DBLK_SIZE_BYTES); } + static inline uint32_t size_sblks(const std::size_t size) + { return size_blks(size, QLS_SBLK_SIZE_BYTES); } + static inline uint32_t size_blks(const std::size_t size, const std::size_t blksize) + { return (size + blksize - 1)/blksize; } + virtual uint64_t rid() const = 0; + +protected: + virtual void clean() = 0; +}; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JRNL_JREC_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/pmgr.cpp b/qpid/cpp/src/qpid/linearstore/journal/pmgr.cpp new file mode 100644 index 0000000000..764beaa879 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/pmgr.cpp @@ -0,0 +1,192 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/pmgr.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +pmgr::page_cb::page_cb(uint16_t index): + _index(index), + _state(UNUSED), + _frid(0), + _wdblks(0), + _pdtokl(0), + _jfp(0), + _pbuff(0) +{} + +// TODO: almost identical to pmgr::page_state_str() below - resolve +const char* +pmgr::page_cb::state_str() const +{ + switch(_state) + { + case UNUSED: + return "UNUSED"; + case IN_USE: + return "IN_USE"; + case AIO_PENDING: + return "AIO_PENDING"; + } + return "<unknown>"; +} + +// static +const uint32_t pmgr::_sblkSizeBytes = QLS_SBLK_SIZE_BYTES; + +pmgr::pmgr(jcntl* jc, enq_map& emap, txn_map& tmap): + _cache_pgsize_sblks(0), + _cache_num_pages(0), + _jc(jc), + _emap(emap), + _tmap(tmap), + _page_base_ptr(0), + _page_ptr_arr(0), + _page_cb_arr(0), + _aio_cb_arr(0), + _aio_event_arr(0), + _ioctx(0), + _pg_index(0), + _pg_cntr(0), + _pg_offset_dblks(0), + _aio_evt_rem(0), + _cbp(0), + _enq_rec(), + _deq_rec(), + _txn_rec() +{} + +pmgr::~pmgr() +{ + pmgr::clean(); +} + +void +pmgr::initialize(aio_callback* const cbp, const uint32_t cache_pgsize_sblks, const uint16_t cache_num_pages) +{ + // As static use of this class keeps old values around, clean up first... + pmgr::clean(); + _pg_index = 0; + _pg_cntr = 0; + _pg_offset_dblks = 0; + _aio_evt_rem = 0; + _cache_pgsize_sblks = cache_pgsize_sblks; + _cache_num_pages = cache_num_pages; + _cbp = cbp; + + // 1. Allocate page memory (as a single block) + std::size_t cache_pgsize = _cache_num_pages * _cache_pgsize_sblks * _sblkSizeBytes; + if (::posix_memalign(&_page_base_ptr, QLS_AIO_ALIGN_BOUNDARY_BYTES, cache_pgsize)) + { + clean(); + std::ostringstream oss; + oss << "posix_memalign(): alignment=" << QLS_AIO_ALIGN_BOUNDARY_BYTES << " size=" << cache_pgsize; + oss << FORMAT_SYSERR(errno); + throw jexception(jerrno::JERR__MALLOC, oss.str(), "pmgr", "initialize"); + } + + // 2. Allocate array of page pointers + _page_ptr_arr = (void**)std::malloc(_cache_num_pages * sizeof(void*)); + MALLOC_CHK(_page_ptr_arr, "_page_ptr_arr", "pmgr", "initialize"); + + // 3. Allocate and initialize page control block (page_cb) array + _page_cb_arr = (page_cb*)std::malloc(_cache_num_pages * sizeof(page_cb)); + MALLOC_CHK(_page_cb_arr, "_page_cb_arr", "pmgr", "initialize"); + std::memset(_page_cb_arr, 0, _cache_num_pages * sizeof(page_cb)); + + // 4. Allocate IO control block (iocb) array + _aio_cb_arr = (aio_cb*)std::malloc(_cache_num_pages * sizeof(aio_cb)); + MALLOC_CHK(_aio_cb_arr, "_aio_cb_arr", "pmgr", "initialize"); + + // 5. Set page pointers in _page_ptr_arr, _page_cb_arr and iocbs to pages within page block + for (uint16_t i=0; i<_cache_num_pages; i++) + { + _page_ptr_arr[i] = (void*)((char*)_page_base_ptr + _cache_pgsize_sblks * _sblkSizeBytes * i); + _page_cb_arr[i]._index = i; + _page_cb_arr[i]._state = UNUSED; + _page_cb_arr[i]._pbuff = _page_ptr_arr[i]; + _page_cb_arr[i]._pdtokl = new std::deque<data_tok*>; + _page_cb_arr[i]._pdtokl->clear(); + _aio_cb_arr[i].data = (void*)&_page_cb_arr[i]; + } + + // 6. Allocate io_event array, max one event per cache page plus one for each file + const uint16_t max_aio_evts = _cache_num_pages + 1; // One additional event for file header writes + _aio_event_arr = (aio_event*)std::malloc(max_aio_evts * sizeof(aio_event)); + MALLOC_CHK(_aio_event_arr, "_aio_event_arr", "pmgr", "initialize"); + + // 7. Initialize AIO context + if (int ret = aio::queue_init(max_aio_evts, &_ioctx)) + { + std::ostringstream oss; + oss << "io_queue_init() failed: " << FORMAT_SYSERR(-ret); + throw jexception(jerrno::JERR__AIO, oss.str(), "pmgr", "initialize"); + } +} + +void +pmgr::clean() +{ + // Clean up allocated memory here + + if (_ioctx) + aio::queue_release(_ioctx); + + std::free(_page_base_ptr); + _page_base_ptr = 0; + + if (_page_cb_arr) + { + for (int i=0; i<_cache_num_pages; i++) + delete _page_cb_arr[i]._pdtokl; + std::free(_page_ptr_arr); + _page_ptr_arr = 0; + } + + std::free(_page_cb_arr); + _page_cb_arr = 0; + + std::free(_aio_cb_arr); + _aio_cb_arr = 0; + + std::free(_aio_event_arr); + _aio_event_arr = 0; +} + +// TODO: almost identical to pmgr::page_cb::state_str() above - resolve +const char* +pmgr::page_state_str(page_state ps) +{ + switch (ps) + { + case UNUSED: + return "UNUSED"; + case IN_USE: + return "IN_USE"; + case AIO_PENDING: + return "AIO_PENDING"; + } + return "<page_state unknown>"; +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/pmgr.h b/qpid/cpp/src/qpid/linearstore/journal/pmgr.h new file mode 100644 index 0000000000..e618397647 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/pmgr.h @@ -0,0 +1,119 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_PMGR_H +#define QPID_LINEARSTORE_JOURNAL_PMGR_H + +#include <deque> +#include "qpid/linearstore/journal/aio.h" +#include "qpid/linearstore/journal/deq_rec.h" +#include "qpid/linearstore/journal/enq_map.h" +#include "qpid/linearstore/journal/enq_rec.h" +#include "qpid/linearstore/journal/txn_map.h" +#include "qpid/linearstore/journal/txn_rec.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +class aio_callback; +class data_tok; +class jcntl; +class JournalFile; + +/** +* \brief Abstract class for managing either read or write page cache of arbitrary size and +* number of cache_num_pages. +*/ +class pmgr +{ +public: + /** + * \brief Enumeration of possible stats of a page within a page cache. + */ + enum page_state + { + UNUSED, ///< A page is uninitialized, contains no data. + IN_USE, ///< Page is in use. + AIO_PENDING ///< An AIO request outstanding. + }; + + /** + * \brief Page control block, carries control and state information for each page in the + * cache. + */ + struct page_cb + { + uint16_t _index; ///< Index of this page + page_state _state; ///< Status of page + uint64_t _frid; ///< First rid in page (used for fhdr init) + uint32_t _wdblks; ///< Total number of dblks in page so far + std::deque<data_tok*>* _pdtokl; ///< Page message tokens list + JournalFile* _jfp; ///< Journal file for incrementing compl counts + void* _pbuff; ///< Page buffer + + page_cb(uint16_t index); ///< Convenience constructor + const char* state_str() const; ///< Return state as string for this pcb + }; + +protected: + static const uint32_t _sblkSizeBytes; ///< Disk softblock size + uint32_t _cache_pgsize_sblks; ///< Size of page cache cache_num_pages + uint16_t _cache_num_pages; ///< Number of page cache cache_num_pages + jcntl* _jc; ///< Pointer to journal controller + enq_map& _emap; ///< Ref to enqueue map + txn_map& _tmap; ///< Ref to transaction map + void* _page_base_ptr; ///< Base pointer to page memory + void** _page_ptr_arr; ///< Array of pointers to cache_num_pages in page memory + page_cb* _page_cb_arr; ///< Array of page_cb structs + aio_cb* _aio_cb_arr; ///< Array of iocb structs + aio_event* _aio_event_arr; ///< Array of io_events + io_context_t _ioctx; ///< AIO context for read/write operations + uint16_t _pg_index; ///< Index of current page being used + uint32_t _pg_cntr; ///< Page counter; determines if file rotation req'd + uint32_t _pg_offset_dblks; ///< Page offset (used so far) in data blocks + uint32_t _aio_evt_rem; ///< Remaining AIO events + aio_callback* _cbp; ///< Pointer to callback object + + enq_rec _enq_rec; ///< Enqueue record used for encoding/decoding + deq_rec _deq_rec; ///< Dequeue record used for encoding/decoding + txn_rec _txn_rec; ///< Transaction record used for encoding/decoding + +public: + pmgr(jcntl* jc, enq_map& emap, txn_map& tmap); + virtual ~pmgr(); + + virtual int32_t get_events(timespec* const timeout, bool flush) = 0; + inline uint32_t get_aio_evt_rem() const { return _aio_evt_rem; } + static const char* page_state_str(page_state ps); + inline uint32_t cache_pgsize_sblks() const { return _cache_pgsize_sblks; } + inline uint16_t cache_num_pages() const { return _cache_num_pages; } + +protected: + virtual void initialize(aio_callback* const cbp, const uint32_t cache_pgsize_sblks, + const uint16_t cache_num_pages); + virtual void rotate_page() = 0; + virtual void clean(); +}; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_PMGR_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/slock.h b/qpid/cpp/src/qpid/linearstore/journal/slock.h new file mode 100644 index 0000000000..12e9e2d08c --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/slock.h @@ -0,0 +1,71 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_SLOCK_H +#define QPID_LINEARSTORE_JOURNAL_SLOCK_H + +#include "qpid/linearstore/journal/smutex.h" +#include <pthread.h> + +namespace qpid { +namespace linearstore { +namespace journal { + +// Ultra-simple scoped lock class, auto-releases mutex when it goes out-of-scope +class slock +{ +protected: + const smutex& _sm; +public: + inline slock(const smutex& sm) : _sm(sm) + { + PTHREAD_CHK(::pthread_mutex_lock(_sm.get()), "::pthread_mutex_lock", "slock", "slock"); + } + inline ~slock() + { + PTHREAD_CHK(::pthread_mutex_unlock(_sm.get()), "::pthread_mutex_unlock", "slock", "~slock"); + } +}; + +// Ultra-simple scoped try-lock class, auto-releases mutex when it goes out-of-scope +class stlock +{ +protected: + const smutex& _sm; + bool _locked; +public: + inline stlock(const smutex& sm) : _sm(sm), _locked(false) + { + int ret = ::pthread_mutex_trylock(_sm.get()); + _locked = (ret == 0); // check if lock obtained + if (!_locked && ret != EBUSY) PTHREAD_CHK(ret, "::pthread_mutex_trylock", "stlock", "stlock"); + } + inline ~stlock() + { + if (_locked) + PTHREAD_CHK(::pthread_mutex_unlock(_sm.get()), "::pthread_mutex_unlock", "stlock", "~stlock"); + } + inline bool locked() const { return _locked; } +}; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_SLOCK_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/smutex.h b/qpid/cpp/src/qpid/linearstore/journal/smutex.h new file mode 100644 index 0000000000..b43f55944c --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/smutex.h @@ -0,0 +1,51 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_SMUTEX_H +#define QPID_LINEARSTORE_JOURNAL_SMUTEX_H + +#include "qpid/linearstore/journal/jexception.h" +#include <pthread.h> + +namespace qpid { +namespace linearstore { +namespace journal { + + // Ultra-simple scoped mutex class that allows a posix mutex to be initialized and destroyed with error checks + class smutex + { + protected: + mutable pthread_mutex_t _m; + public: + inline smutex() + { + PTHREAD_CHK(::pthread_mutex_init(&_m, 0), "::pthread_mutex_init", "smutex", "smutex"); + } + inline virtual ~smutex() + { + PTHREAD_CHK(::pthread_mutex_destroy(&_m), "::pthread_mutex_destroy", "smutex", "~smutex"); + } + inline pthread_mutex_t* get() const { return &_m; } + }; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_SMUTEX_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/time_ns.cpp b/qpid/cpp/src/qpid/linearstore/journal/time_ns.cpp new file mode 100644 index 0000000000..39f2cd1d88 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/time_ns.cpp @@ -0,0 +1,41 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/time_ns.h" + +#include <sstream> + +namespace qpid { +namespace linearstore { +namespace journal { + +const std::string +time_ns::str(int precision) const +{ + const double t = tv_sec + (tv_nsec/1e9); + std::ostringstream oss; + oss.setf(std::ios::fixed, std::ios::floatfield); + oss.precision(precision); + oss << t; + return oss.str(); +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/time_ns.h b/qpid/cpp/src/qpid/linearstore/journal/time_ns.h new file mode 100644 index 0000000000..a228d47475 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/time_ns.h @@ -0,0 +1,92 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_TIME_NS_H +#define QPID_LINEARSTORE_JOURNAL_TIME_NS_H + +#include <cerrno> +#include <ctime> +#include <string> + +namespace qpid { +namespace linearstore { +namespace journal { + +struct time_ns : public timespec +{ + inline time_ns() { tv_sec = 0; tv_nsec = 0; } + inline time_ns(const std::time_t sec, const long nsec = 0) { tv_sec = sec; tv_nsec = nsec; } + inline time_ns(const time_ns& t) { tv_sec = t.tv_sec; tv_nsec = t.tv_nsec; } + + inline void set_zero() { tv_sec = 0; tv_nsec = 0; } + inline bool is_zero() const { return tv_sec == 0 && tv_nsec == 0; } + inline int now() { if(::clock_gettime(CLOCK_REALTIME, this)) return errno; return 0; } + const std::string str(int precision = 6) const; + + inline time_ns& operator=(const time_ns& rhs) + { tv_sec = rhs.tv_sec; tv_nsec = rhs.tv_nsec; return *this; } + inline time_ns& operator+=(const time_ns& rhs) + { + tv_nsec += rhs.tv_nsec; + if (tv_nsec >= 1000000000L) { tv_sec++; tv_nsec -= 1000000000L; } + tv_sec += rhs.tv_sec; + return *this; + } + inline time_ns& operator+=(const long ns) + { + tv_nsec += ns; + if (tv_nsec >= 1000000000L) { tv_sec++; tv_nsec -= 1000000000L; } + return *this; + } + inline time_ns& operator-=(const long ns) + { + tv_nsec -= ns; + if (tv_nsec < 0) { tv_sec--; tv_nsec += 1000000000L; } + return *this; + } + inline time_ns& operator-=(const time_ns& rhs) + { + tv_nsec -= rhs.tv_nsec; + if (tv_nsec < 0) { tv_sec--; tv_nsec += 1000000000L; } + tv_sec -= rhs.tv_sec; + return *this; + } + inline const time_ns operator+(const time_ns& rhs) + { time_ns t(*this); t += rhs; return t; } + inline const time_ns operator-(const time_ns& rhs) + { time_ns t(*this); t -= rhs; return t; } + inline bool operator==(const time_ns& rhs) + { return tv_sec == rhs.tv_sec && tv_nsec == rhs.tv_nsec; } + inline bool operator!=(const time_ns& rhs) + { return tv_sec != rhs.tv_sec || tv_nsec != rhs.tv_nsec; } + inline bool operator>(const time_ns& rhs) + { if(tv_sec == rhs.tv_sec) return tv_nsec > rhs.tv_nsec; return tv_sec > rhs.tv_sec; } + inline bool operator>=(const time_ns& rhs) + { if(tv_sec == rhs.tv_sec) return tv_nsec >= rhs.tv_nsec; return tv_sec >= rhs.tv_sec; } + inline bool operator<(const time_ns& rhs) + { if(tv_sec == rhs.tv_sec) return tv_nsec < rhs.tv_nsec; return tv_sec < rhs.tv_sec; } + inline bool operator<=(const time_ns& rhs) + { if(tv_sec == rhs.tv_sec) return tv_nsec <= rhs.tv_nsec; return tv_sec <= rhs.tv_sec; } +}; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_TIME_NS_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/txn_map.cpp b/qpid/cpp/src/qpid/linearstore/journal/txn_map.cpp new file mode 100644 index 0000000000..8336d36b80 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/txn_map.cpp @@ -0,0 +1,263 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/txn_map.h" + +#include "qpid/linearstore/journal/slock.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +// return/error codes +int16_t txn_map::TMAP_RID_NOT_FOUND = -2; +int16_t txn_map::TMAP_XID_NOT_FOUND = -1; +int16_t txn_map::TMAP_OK = 0; +int16_t txn_map::TMAP_NOT_SYNCED = 0; +int16_t txn_map::TMAP_SYNCED = 1; + +txn_data_t::txn_data_t(const uint64_t rid, + const uint64_t drid, + const uint64_t fid, + const uint64_t foffs, + const bool enq_flag, + const bool tpc_flag, + const bool commit_flag): + rid_(rid), + drid_(drid), + fid_(fid), + foffs_(foffs), + enq_flag_(enq_flag), + tpc_flag_(tpc_flag), + commit_flag_(commit_flag), + aio_compl_(false) +{} + +txn_op_stats_t::txn_op_stats_t(const txn_data_list_t& tdl) : + enqCnt(0U), + deqCnt(0U), + tpcCnt(0U), + abortCnt(0U), + commitCnt(0U), + rid(0ULL) +{ + for (tdl_const_itr_t i=tdl.begin(); i!=tdl.end(); ++i) { + if (i->enq_flag_) { + ++enqCnt; + rid = i->rid_; + } else { + ++deqCnt; + if (i->commit_flag_) { + ++commitCnt; + } else { + ++abortCnt; + } + } + if (i->tpc_flag_) { + ++tpcCnt; + } + } + if (tpcCnt > 0 && tpcCnt != tdl.size()) { + throw jexception("Inconsistent 2PC count"); // TODO: complete exception details + } + if (abortCnt > 0 && commitCnt > 0) { + throw jexception("Both abort and commit in same transaction"); // TODO: complete exception details + } +} + +txn_map::txn_map(): + _map()/*, + _pfid_txn_cnt()*/ +{} + +txn_map::~txn_map() {} + +bool +txn_map::insert_txn_data(const std::string& xid, const txn_data_t& td) +{ + bool ok = true; + slock s(_mutex); + xmap_itr itr = _map.find(xid); + if (itr == _map.end()) // not found in map + { + txn_data_list_t list; + list.push_back(td); + std::pair<xmap_itr, bool> ret = _map.insert(xmap_param(xid, list)); + if (!ret.second) // duplicate + ok = false; + } + else + itr->second.push_back(td); + return ok; +} + +const txn_data_list_t +txn_map::get_tdata_list(const std::string& xid) +{ + slock s(_mutex); + return get_tdata_list_nolock(xid); +} + +const txn_data_list_t +txn_map::get_tdata_list_nolock(const std::string& xid) +{ + xmap_itr itr = _map.find(xid); + if (itr == _map.end()) // not found in map + return _empty_data_list; + return itr->second; +} + +const txn_data_list_t +txn_map::get_remove_tdata_list(const std::string& xid) +{ + slock s(_mutex); + xmap_itr itr = _map.find(xid); + if (itr == _map.end()) // not found in map + return _empty_data_list; + txn_data_list_t list = itr->second; + _map.erase(itr); + return list; +} + +bool +txn_map::in_map(const std::string& xid) +{ + slock s(_mutex); + xmap_itr itr= _map.find(xid); + return itr != _map.end(); +} + +uint32_t +txn_map::enq_cnt() +{ + return cnt(true); +} + +uint32_t +txn_map::deq_cnt() +{ + return cnt(true); +} + +uint32_t +txn_map::cnt(const bool enq_flag) +{ + slock s(_mutex); + uint32_t c = 0; + for (xmap_itr i = _map.begin(); i != _map.end(); i++) + { + for (tdl_itr_t j = i->second.begin(); j < i->second.end(); j++) + { + if (j->enq_flag_ == enq_flag) + c++; + } + } + return c; +} + +int16_t +txn_map::is_txn_synced(const std::string& xid) +{ + slock s(_mutex); + xmap_itr itr = _map.find(xid); + if (itr == _map.end()) // not found in map + return TMAP_XID_NOT_FOUND; + bool is_synced = true; + for (tdl_itr_t litr = itr->second.begin(); litr < itr->second.end(); litr++) + { + if (!litr->aio_compl_) + { + is_synced = false; + break; + } + } + return is_synced ? TMAP_SYNCED : TMAP_NOT_SYNCED; +} + +int16_t +txn_map::set_aio_compl(const std::string& xid, const uint64_t rid) +{ + slock s(_mutex); + xmap_itr itr = _map.find(xid); + if (itr == _map.end()) // xid not found in map + return TMAP_XID_NOT_FOUND; + for (tdl_itr_t litr = itr->second.begin(); litr < itr->second.end(); litr++) + { + if (litr->rid_ == rid) + { + litr->aio_compl_ = true; + return TMAP_OK; // rid found + } + } + // xid present, but rid not found + return TMAP_RID_NOT_FOUND; +} + +bool +txn_map::data_exists(const std::string& xid, const uint64_t rid) +{ + bool found = false; + { + slock s(_mutex); + txn_data_list_t tdl = get_tdata_list_nolock(xid); + tdl_itr_t itr = tdl.begin(); + while (itr != tdl.end() && !found) + { + found = itr->rid_ == rid; + itr++; + } + } + return found; +} + +bool +txn_map::is_enq(const uint64_t rid) +{ + bool found = false; + { + slock s(_mutex); + for (xmap_itr i = _map.begin(); i != _map.end() && !found; i++) + { + txn_data_list_t list = i->second; + for (tdl_itr_t j = list.begin(); j < list.end() && !found; j++) + { + if (j->enq_flag_) + found = j->rid_ == rid; + else + found = j->drid_ == rid; + } + } + } + return found; +} + +void +txn_map::xid_list(std::vector<std::string>& xv) +{ + xv.clear(); + { + slock s(_mutex); + for (xmap_itr itr = _map.begin(); itr != _map.end(); itr++) + xv.push_back(itr->first); + } +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/txn_map.h b/qpid/cpp/src/qpid/linearstore/journal/txn_map.h new file mode 100644 index 0000000000..e79c0522d8 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/txn_map.h @@ -0,0 +1,150 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_TXN_MAP_H +#define QPID_LINEARSTORE_JOURNAL_TXN_MAP_H + +#include "qpid/linearstore/journal/smutex.h" +#include <map> +#include <vector> + +namespace qpid { +namespace linearstore { +namespace journal { + + /** + * \struct txn_data_struct + * \brief Struct encapsulating transaction data necessary for processing a transaction + * in the journal once it is closed with either a commit or abort. + */ + typedef struct txn_data_t + { + uint64_t rid_; ///< Record id for this operation + uint64_t drid_; ///< Dequeue record id for this operation + uint64_t fid_; ///< File seq number, to be used when transferring to emap on commit + uint64_t foffs_; ///< Offset in file for this record + bool enq_flag_; ///< If true, enq op, otherwise deq op + bool tpc_flag_; ///< 2PC transaction if true + bool commit_flag_; ///< TPL only: (2PC transactions) Records 2PC complete c/a mode + bool aio_compl_; ///< Initially false, set to true when record AIO returns + txn_data_t(const uint64_t rid, + const uint64_t drid, + const uint64_t fid, + const uint64_t foffs, + const bool enq_flag, + const bool tpc_flag, + const bool commit_flag); + } txn_data_t; + typedef std::vector<txn_data_t> txn_data_list_t; + typedef txn_data_list_t::iterator tdl_itr_t; + typedef txn_data_list_t::const_iterator tdl_const_itr_t; + + typedef struct txn_op_stats_t + { + uint16_t enqCnt; + uint16_t deqCnt; + uint16_t tpcCnt; + uint16_t abortCnt; + uint16_t commitCnt; + uint64_t rid; + txn_op_stats_t(const txn_data_list_t& tdl); + } txn_op_stats_t; + + /** + * \class txn_map + * \brief Class for storing transaction data for each open (ie not committed or aborted) + * xid in the store. If aborted, records are discarded; if committed, they are + * transferred to the enqueue map. + * + * The data is encapsulated by struct txn_data_struct. A vector containing the information + * for each operation included as part of the same transaction is mapped against the + * xid. + * + * The aio_compl flag is set true as each AIO write operation for the enqueue or dequeue + * returns. Checking that all of these flags are true for a given xid is the mechanism + * used to determine if the transaction is syncronized (through method is_txn_synced()). + * + * On transaction commit, then each operation is handled as follows: + * + * If an enqueue (_enq_flag is true), then the rid and pfid are transferred to the enq_map. + * If a dequeue (_enq_flag is false), then the rid stored in the drid field is used to + * remove the corresponding record from the enq_map. + * + * On transaction abort, then each operation is handled as follows: + * + * If an enqueue (_enq_flag is true), then the data is simply discarded. + * If a dequeue (_enq_flag is false), then the lock for the corresponding enqueue in enq_map + * (if not a part of the same transaction) is removed, and the data discarded. + * + * <pre> + * key data + * + * xid1 --- vector< [ rid, drid, pfid, enq_flag, commit_flag, aio_compl ] > + * xid2 --- vector< [ rid, drid, pfid, enq_flag, commit_flag, aio_compl ] > + * xid3 --- vector< [ rid, drid, pfid, enq_flag, commit_flag, aio_compl ] > + * ... + * </pre> + */ + class txn_map + { + public: + // return/error codes + static int16_t TMAP_RID_NOT_FOUND; + static int16_t TMAP_XID_NOT_FOUND; + static int16_t TMAP_OK; + static int16_t TMAP_NOT_SYNCED; + static int16_t TMAP_SYNCED; + + private: + typedef std::pair<std::string, txn_data_list_t> xmap_param; + typedef std::map<std::string, txn_data_list_t> xmap; + typedef xmap::iterator xmap_itr; + + xmap _map; + smutex _mutex; + const txn_data_list_t _empty_data_list; + + public: + txn_map(); + virtual ~txn_map(); + + bool insert_txn_data(const std::string& xid, const txn_data_t& td); + const txn_data_list_t get_tdata_list(const std::string& xid); + const txn_data_list_t get_remove_tdata_list(const std::string& xid); + bool in_map(const std::string& xid); + uint32_t enq_cnt(); + uint32_t deq_cnt(); + int16_t is_txn_synced(const std::string& xid); // -1=xid not found; 0=not synced; 1=synced + int16_t set_aio_compl(const std::string& xid, const uint64_t rid); // -2=rid not found; -1=xid not found; 0=done + bool data_exists(const std::string& xid, const uint64_t rid); + bool is_enq(const uint64_t rid); + inline void clear() { _map.clear(); } + inline bool empty() const { return _map.empty(); } + inline size_t size() const { return _map.size(); } + void xid_list(std::vector<std::string>& xv); + private: + uint32_t cnt(const bool enq_flag); + const txn_data_list_t get_tdata_list_nolock(const std::string& xid); + }; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_TXN_MAP_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/txn_rec.cpp b/qpid/cpp/src/qpid/linearstore/journal/txn_rec.cpp new file mode 100644 index 0000000000..298ab608b1 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/txn_rec.cpp @@ -0,0 +1,305 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/txn_rec.h" + +#include <cassert> +#include <cstring> +#include "qpid/linearstore/journal/Checksum.h" +#include "qpid/linearstore/journal/jexception.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +txn_rec::txn_rec(): + _xidp(0), + _xid_buff(0) +{ + ::txn_hdr_init(&_txn_hdr, 0, QLS_JRNL_VERSION, 0, 0, 0, 0); + ::rec_tail_init(&_txn_tail, 0, 0, 0, 0); +} + +txn_rec::~txn_rec() +{ + clean(); +} + +void +txn_rec::reset(const bool commitFlag, const uint64_t serial, const uint64_t rid, const void* const xidp, + const std::size_t xidlen) +{ + _txn_hdr._rhdr._magic = commitFlag ? QLS_TXC_MAGIC : QLS_TXA_MAGIC; + _txn_hdr._rhdr._serial = serial; + _txn_hdr._rhdr._rid = rid; + _txn_hdr._xidsize = xidlen; + _xidp = xidp; + _xid_buff = 0; + _txn_tail._xmagic = ~_txn_hdr._rhdr._magic; + _txn_tail._serial = serial; + _txn_tail._rid = rid; + _txn_tail._checksum = 0UL; +} + +uint32_t +txn_rec::encode(void* wptr, uint32_t rec_offs_dblks, uint32_t max_size_dblks, Checksum& checksum) +{ + assert(wptr != 0); + assert(max_size_dblks > 0); + assert(_xidp != 0 && _txn_hdr._xidsize > 0); + + std::size_t rec_offs = rec_offs_dblks * QLS_DBLK_SIZE_BYTES; + std::size_t rem = max_size_dblks * QLS_DBLK_SIZE_BYTES; + std::size_t wr_cnt = 0; + if (rec_offs_dblks) // Continuation of split dequeue record (over 2 or more pages) + { + if (size_dblks(rec_size()) - rec_offs_dblks > max_size_dblks) // Further split required + { + rec_offs -= sizeof(txn_hdr_t); + std::size_t wsize = _txn_hdr._xidsize > rec_offs ? _txn_hdr._xidsize - rec_offs : 0; + std::size_t wsize2 = wsize; + if (wsize) + { + if (wsize > rem) + wsize = rem; + std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize); + wr_cnt += wsize; + rem -= wsize; + } + rec_offs -= _txn_hdr._xidsize - wsize2; + checksum.addData((unsigned char*)wptr, wr_cnt); + if (rem) + { + _txn_tail._checksum = checksum.getChecksum(); + wsize = sizeof(_txn_tail) > rec_offs ? sizeof(_txn_tail) - rec_offs : 0; + wsize2 = wsize; + if (wsize) + { + if (wsize > rem) + wsize = rem; + std::memcpy((char*)wptr + wr_cnt, (char*)&_txn_tail + rec_offs, wsize); + wr_cnt += wsize; + rem -= wsize; + } + rec_offs -= sizeof(_txn_tail) - wsize2; + } + assert(rem == 0); + assert(rec_offs == 0); + } + else // No further split required + { + rec_offs -= sizeof(txn_hdr_t); + std::size_t wsize = _txn_hdr._xidsize > rec_offs ? _txn_hdr._xidsize - rec_offs : 0; + if (wsize) + { + std::memcpy(wptr, (const char*)_xidp + rec_offs, wsize); + wr_cnt += wsize; + checksum.addData((unsigned char*)wptr, wr_cnt); + } + rec_offs -= _txn_hdr._xidsize - wsize; + _txn_tail._checksum = checksum.getChecksum(); + wsize = sizeof(_txn_tail) > rec_offs ? sizeof(_txn_tail) - rec_offs : 0; + if (wsize) + { + std::memcpy((char*)wptr + wr_cnt, (char*)&_txn_tail + rec_offs, wsize); + wr_cnt += wsize; +#ifdef QLS_CLEAN + std::size_t rec_offs = rec_offs_dblks * QLS_DBLK_SIZE_BYTES; + std::size_t dblk_rec_size = size_dblks(rec_size() - rec_offs) * QLS_DBLK_SIZE_BYTES; + std::memset((char*)wptr + wr_cnt, QLS_CLEAN_CHAR, dblk_rec_size - wr_cnt); +#endif + } + rec_offs -= sizeof(_txn_tail) - wsize; + assert(rec_offs == 0); + } + } + else // Start at beginning of data record + { + // Assumption: the header will always fit into the first dblk + std::memcpy(wptr, (void*)&_txn_hdr, sizeof(txn_hdr_t)); + wr_cnt = sizeof(txn_hdr_t); + if (size_dblks(rec_size()) > max_size_dblks) // Split required + { + std::size_t wsize; + rem -= sizeof(txn_hdr_t); + if (rem) + { + wsize = rem >= _txn_hdr._xidsize ? _txn_hdr._xidsize : rem; + std::memcpy((char*)wptr + wr_cnt, _xidp, wsize); + wr_cnt += wsize; + rem -= wsize; + } + checksum.addData((unsigned char*)wptr, wr_cnt); + if (rem) + { + _txn_tail._checksum = checksum.getChecksum(); + wsize = rem >= sizeof(_txn_tail) ? sizeof(_txn_tail) : rem; + std::memcpy((char*)wptr + wr_cnt, (void*)&_txn_tail, wsize); + wr_cnt += wsize; + rem -= wsize; + } + assert(rem == 0); + } + else // No split required + { + std::memcpy((char*)wptr + wr_cnt, _xidp, _txn_hdr._xidsize); + wr_cnt += _txn_hdr._xidsize; + checksum.addData((unsigned char*)wptr, wr_cnt); + _txn_tail._checksum = checksum.getChecksum(); + std::memcpy((char*)wptr + wr_cnt, (void*)&_txn_tail, sizeof(_txn_tail)); + wr_cnt += sizeof(_txn_tail); +#ifdef QLS_CLEAN + std::size_t dblk_rec_size = size_dblks(rec_size()) * QLS_DBLK_SIZE_BYTES; + std::memset((char*)wptr + wr_cnt, QLS_CLEAN_CHAR, dblk_rec_size - wr_cnt); +#endif + } + } + return size_dblks(wr_cnt); +} + +bool +txn_rec::decode(::rec_hdr_t& h, std::ifstream* ifsp, std::size_t& rec_offs, const std::streampos rec_start) +{ + if (rec_offs == 0) + { + // Read header, allocate for xid + ::rec_hdr_copy(&_txn_hdr._rhdr, &h); + ifsp->read((char*)&_txn_hdr._xidsize, sizeof(_txn_hdr._xidsize)); + rec_offs = sizeof(::txn_hdr_t); + _xid_buff = std::malloc(_txn_hdr._xidsize); + MALLOC_CHK(_xid_buff, "_buff", "txn_rec", "rcv_decode"); + } + if (rec_offs < sizeof(txn_hdr_t) + _txn_hdr._xidsize) + { + // Read xid (or continue reading xid) + std::size_t offs = rec_offs - sizeof(txn_hdr_t); + ifsp->read((char*)_xid_buff + offs, _txn_hdr._xidsize - offs); + std::size_t size_read = ifsp->gcount(); + rec_offs += size_read; + if (size_read < _txn_hdr._xidsize - offs) + { + assert(ifsp->eof()); + // As we may have read past eof, turn off fail bit + ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit)); + assert(!ifsp->fail() && !ifsp->bad()); + return false; + } + } + if (rec_offs < sizeof(txn_hdr_t) + _txn_hdr._xidsize + sizeof(rec_tail_t)) + { + // Read tail (or continue reading tail) + std::size_t offs = rec_offs - sizeof(txn_hdr_t) - _txn_hdr._xidsize; + ifsp->read((char*)&_txn_tail + offs, sizeof(rec_tail_t) - offs); + std::size_t size_read = ifsp->gcount(); + rec_offs += size_read; + if (size_read < sizeof(rec_tail_t) - offs) + { + assert(ifsp->eof()); + // As we may have read past eof, turn off fail bit + ifsp->clear(ifsp->rdstate()&(~std::ifstream::failbit)); + assert(!ifsp->fail() && !ifsp->bad()); + return false; + } + check_rec_tail(rec_start); + } + ifsp->ignore(rec_size_dblks() * QLS_DBLK_SIZE_BYTES - rec_size()); + assert(!ifsp->fail() && !ifsp->bad()); + assert(_txn_hdr._xidsize > 0); + return true; +} + +std::size_t +txn_rec::get_xid(void** const xidpp) +{ + if (!_xid_buff) + { + *xidpp = 0; + return 0; + } + *xidpp = _xid_buff; + return _txn_hdr._xidsize; +} + +std::string& +txn_rec::str(std::string& str) const +{ + std::ostringstream oss; + if (_txn_hdr._rhdr._magic == QLS_TXA_MAGIC) + oss << "dtxa_rec: m=" << _txn_hdr._rhdr._magic; + else + oss << "dtxc_rec: m=" << _txn_hdr._rhdr._magic; + oss << " v=" << (int)_txn_hdr._rhdr._version; + oss << " rid=" << _txn_hdr._rhdr._rid; + oss << " xid=\"" << _xidp << "\""; + str.append(oss.str()); + return str; +} + +std::size_t +txn_rec::xid_size() const +{ + return _txn_hdr._xidsize; +} + +std::size_t +txn_rec::rec_size() const +{ + return sizeof(txn_hdr_t) + _txn_hdr._xidsize + sizeof(rec_tail_t); +} + +void +txn_rec::check_rec_tail(const std::streampos rec_start) const { + Checksum checksum; + checksum.addData((const unsigned char*)&_txn_hdr, sizeof(::txn_hdr_t)); + if (_txn_hdr._xidsize > 0) { + checksum.addData((const unsigned char*)_xid_buff, _txn_hdr._xidsize); + } + uint32_t cs = checksum.getChecksum(); + uint16_t res = ::rec_tail_check(&_txn_tail, &_txn_hdr._rhdr, cs); + if (res != 0) { + std::stringstream oss; + oss << std::endl << " Record offset: 0x" << std::hex << rec_start; + if (res & ::REC_TAIL_MAGIC_ERR_MASK) { + oss << std::endl << " Magic: expected 0x" << ~_txn_hdr._rhdr._magic << "; found 0x" << _txn_tail._xmagic; + } + if (res & ::REC_TAIL_SERIAL_ERR_MASK) { + oss << std::endl << " Serial: expected 0x" << _txn_hdr._rhdr._serial << "; found 0x" << _txn_tail._serial; + } + if (res & ::REC_TAIL_RID_ERR_MASK) { + oss << std::endl << " Record Id: expected 0x" << _txn_hdr._rhdr._rid << "; found 0x" << _txn_tail._rid; + } + if (res & ::REC_TAIL_CHECKSUM_ERR_MASK) { + oss << std::endl << " Checksum: expected 0x" << cs << "; found 0x" << _txn_tail._checksum; + } + throw jexception(jerrno::JERR_JREC_BADRECTAIL, oss.str(), "txn_rec", "check_rec_tail"); + } +} + +void +txn_rec::clean() +{ + if (_xid_buff) { + std::free(_xid_buff); + _xid_buff = 0; + } +} + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/txn_rec.h b/qpid/cpp/src/qpid/linearstore/journal/txn_rec.h new file mode 100644 index 0000000000..4552071595 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/txn_rec.h @@ -0,0 +1,68 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_TXN_REC_H +#define QPID_LINEARSTORE_JOURNAL_TXN_REC_H + +#include "qpid/linearstore/journal/jrec.h" +#include "qpid/linearstore/journal/utils/txn_hdr.h" +#include "qpid/linearstore/journal/utils/rec_tail.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +/** +* \class txn_rec +* \brief Class to handle a single journal commit or abort record. +*/ +class txn_rec : public jrec +{ +private: + ::txn_hdr_t _txn_hdr; ///< Local instance of transaction header struct + const void* _xidp; ///< xid pointer for encoding (writing to disk) + void* _xid_buff; ///< Pointer to buffer to receive xid read from disk + ::rec_tail_t _txn_tail; ///< Local instance of enqueue tail struct + +public: + txn_rec(); + virtual ~txn_rec(); + + void reset(const bool commitFlag, const uint64_t serial, const uint64_t rid, const void* const xidp, + const std::size_t xidlen); + uint32_t encode(void* wptr, uint32_t rec_offs_dblks, uint32_t max_size_dblks, Checksum& checksum); + bool decode(::rec_hdr_t& h, std::ifstream* ifsp, std::size_t& rec_offs, const std::streampos rec_start); + + std::size_t get_xid(void** const xidpp); + std::string& str(std::string& str) const; + inline std::size_t data_size() const { return 0; } // This record never carries data + std::size_t xid_size() const; + std::size_t rec_size() const; + inline uint64_t rid() const { return _txn_hdr._rhdr._rid; } + void check_rec_tail(const std::streampos rec_start) const; + +private: + virtual void clean(); +}; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_TXN_REC_H diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/deq_hdr.c b/qpid/cpp/src/qpid/linearstore/journal/utils/deq_hdr.c new file mode 100644 index 0000000000..b55c1c16c8 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/utils/deq_hdr.c @@ -0,0 +1,46 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "deq_hdr.h" + +/*static const uint16_t DEQ_HDR_TXNCMPLCOMMIT_MASK = 0x10;*/ + +void deq_hdr_init(deq_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag, + const uint64_t serial, const uint64_t rid, const uint64_t deq_rid, const uint64_t xidsize) { + rec_hdr_init(&dest->_rhdr, magic, version, uflag, serial, rid); + dest->_deq_rid = deq_rid; + dest->_xidsize = xidsize; +} + +void deq_hdr_copy(deq_hdr_t* dest, const deq_hdr_t* src) { + rec_hdr_copy(&dest->_rhdr, &src->_rhdr); + dest->_deq_rid = src->_deq_rid; + dest->_xidsize = src->_xidsize; +} + +bool is_txn_coml_commit(const deq_hdr_t *dh) { + return dh->_rhdr._uflag & DEQ_HDR_TXNCMPLCOMMIT_MASK; +} + +void set_txn_coml_commit(deq_hdr_t *dh, const bool commit) { + dh->_rhdr._uflag = commit ? dh->_rhdr._uflag | DEQ_HDR_TXNCMPLCOMMIT_MASK : // set flag bit + dh->_rhdr._uflag & (~DEQ_HDR_TXNCMPLCOMMIT_MASK); // unset flag bit +} diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/deq_hdr.h b/qpid/cpp/src/qpid/linearstore/journal/utils/deq_hdr.h new file mode 100644 index 0000000000..3392867153 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/utils/deq_hdr.h @@ -0,0 +1,83 @@ +#ifndef QPID_LINEARSTORE_JOURNAL_UTILS_DEQ_HDR_H +#define QPID_LINEARSTORE_JOURNAL_UTILS_DEQ_HDR_H +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include <stdbool.h> +#include "rec_hdr.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#pragma pack(1) + +/** + * \brief Struct for dequeue record. + * + * Struct for dequeue record. If this record has a non-zero xidsize field (i.e., there is a + * valid XID), then this header is followed by the XID of xidsize bytes and a rec_tail. If, + * on the other hand, this record has a zero xidsize (i.e., there is no XID), then the rec_tail + * is absent. + * + * Note that this record had its own rid distinct from the rid of the record it is dequeueing. + * The rid field below is the rid of the dequeue record itself; the deq-rid field is the rid of a + * previous enqueue record being dequeued by this record. + * + * Record header info in binary format (40 bytes): + * <pre> + * 0 7 + * +---+---+---+---+---+---+---+---+ -+ + * | magic | ver | flags | | + * +---+---+---+---+---+---+---+---+ | + * | serial | | struct rec_hdr_t + * +---+---+---+---+---+---+---+---+ | + * | rid | | + * +---+---+---+---+---+---+---+---+ -+ + * | deq-rid | + * +---+---+---+---+---+---+---+---+ + * | xidsize | + * +---+---+---+---+---+---+---+---+ + * + * deq-rid = dequeue record ID + * </pre> + */ +typedef struct deq_hdr_t { + rec_hdr_t _rhdr; /**< Common record header struct */ + uint64_t _deq_rid; /**< Record ID of record being dequeued */ + uint64_t _xidsize; /**< XID size */ +} deq_hdr_t; + +static const uint16_t DEQ_HDR_TXNCMPLCOMMIT_MASK = 0x10; + +void deq_hdr_init(deq_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag, + const uint64_t serial, const uint64_t rid, const uint64_t deq_rid, const uint64_t xidsize); +void deq_hdr_copy(deq_hdr_t* dest, const deq_hdr_t* src); +bool is_txn_coml_commit(const deq_hdr_t *dh); +void set_txn_coml_commit(deq_hdr_t *dh, const bool commit); + +#pragma pack() + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef QPID_LINEARSTORE_JOURNAL_UTILS_DEQ_HDR_H */ diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/enq_hdr.c b/qpid/cpp/src/qpid/linearstore/journal/utils/enq_hdr.c new file mode 100644 index 0000000000..b4e8b62ff1 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/utils/enq_hdr.c @@ -0,0 +1,63 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "enq_hdr.h" + +//static const uint16_t ENQ_HDR_TRANSIENT_MASK = 0x10; +//static const uint16_t ENQ_HDR_EXTERNAL_MASK = 0x20; + +void enq_hdr_init(enq_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag, + const uint64_t serial, const uint64_t rid, const uint64_t xidsize, const uint64_t dsize) { + rec_hdr_init(&dest->_rhdr, magic, version, uflag, serial, rid); + dest->_xidsize = xidsize; + dest->_dsize = dsize; +} + +void enq_hdr_copy(enq_hdr_t* dest, const enq_hdr_t* src) { + rec_hdr_copy(&dest->_rhdr, &src->_rhdr); + dest->_xidsize = src->_xidsize; + dest->_dsize = src->_dsize; +} + +bool is_enq_transient(const enq_hdr_t *eh) { + return eh->_rhdr._uflag & ENQ_HDR_TRANSIENT_MASK; +} + +void set_enq_transient(enq_hdr_t *eh, const bool transient) { + eh->_rhdr._uflag = transient ? eh->_rhdr._uflag | ENQ_HDR_TRANSIENT_MASK : + eh->_rhdr._uflag & (~ENQ_HDR_TRANSIENT_MASK); +} + +bool is_enq_external(const enq_hdr_t *eh) { + return eh->_rhdr._uflag & ENQ_HDR_EXTERNAL_MASK; +} + +void set_enq_external(enq_hdr_t *eh, const bool external) { + eh->_rhdr._uflag = external ? eh->_rhdr._uflag | ENQ_HDR_EXTERNAL_MASK : + eh->_rhdr._uflag & (~ENQ_HDR_EXTERNAL_MASK); +} + +bool validate_enq_hdr(enq_hdr_t *eh, const uint32_t magic, const uint16_t version, const uint64_t rid) { + return eh->_rhdr._magic == magic && + eh->_rhdr._version == version && + rid > 0 ? eh->_rhdr._rid == rid /* If rid == 0, don't compare rids */ + : true; +} diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/enq_hdr.h b/qpid/cpp/src/qpid/linearstore/journal/utils/enq_hdr.h new file mode 100644 index 0000000000..00108792bc --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/utils/enq_hdr.h @@ -0,0 +1,83 @@ +#ifndef QPID_LINEARSTORE_JOURNAL_UTILS_ENQ_HDR_H +#define QPID_LINEARSTORE_JOURNAL_UTILS_ENQ_HDR_H +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include <stdbool.h> +#include "rec_hdr.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#pragma pack(1) + +/** + * \brief Struct for enqueue record. + * + * Struct for enqueue record. In addition to the common data, this header includes both the + * xid and data blob sizes. + * + * This header precedes all enqueue data in journal files. + * + * Record header info in binary format (40 bytes): + * <pre> + * 0 7 + * +---+---+---+---+---+---+---+---+ -+ + * | magic | ver | flags | | + * +---+---+---+---+---+---+---+---+ | + * | serial | | struct rec_hdr_t + * +---+---+---+---+---+---+---+---+ | + * | rid | | + * +---+---+---+---+---+---+---+---+ -+ + * | xidsize | + * +---+---+---+---+---+---+---+---+ + * | dsize | + * +---+---+---+---+---+---+---+---+ + * v = file version (If the format or encoding of this file changes, then this + * number should be incremented) + * </pre> + */ +typedef struct enq_hdr_t { + rec_hdr_t _rhdr; /**< Common record header struct */ + uint64_t _xidsize; /**< XID size in octets */ + uint64_t _dsize; /**< Record data size in octets */ +} enq_hdr_t; + +static const uint16_t ENQ_HDR_TRANSIENT_MASK = 0x10; +static const uint16_t ENQ_HDR_EXTERNAL_MASK = 0x20; + +void enq_hdr_init(enq_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag, + const uint64_t serial, const uint64_t rid, const uint64_t xidsize, const uint64_t dsize); +void enq_hdr_copy(enq_hdr_t* dest, const enq_hdr_t* src); +bool is_enq_transient(const enq_hdr_t *eh); +void set_enq_transient(enq_hdr_t *eh, const bool transient); +bool is_enq_external(const enq_hdr_t *eh); +void set_enq_external(enq_hdr_t *eh, const bool external); +bool validate_enq_hdr(enq_hdr_t *eh, const uint32_t magic, const uint16_t version, const uint64_t rid); + +#pragma pack() + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef QPID_LINEARSTORE_JOURNAL_UTILS_ENQ_HDR_H */ diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/file_hdr.c b/qpid/cpp/src/qpid/linearstore/journal/utils/file_hdr.c new file mode 100644 index 0000000000..4e6cf1b8fa --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/utils/file_hdr.c @@ -0,0 +1,115 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "file_hdr.h" +#include <string.h> + +void file_hdr_create(file_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t fhdr_size_sblks, + const uint16_t efp_partition, const uint64_t file_size) { + rec_hdr_init(&dest->_rhdr, magic, version, 0, 0, 0); + dest->_fhdr_size_sblks = fhdr_size_sblks; + dest->_efp_partition = efp_partition; + dest->_reserved = 0; + dest->_data_size_kib = file_size; + dest->_fro = 0; + dest->_ts_nsec = 0; + dest->_ts_sec = 0; + dest->_file_number = 0; + dest->_queue_name_len = 0; +} + +int file_hdr_init(void* dest, const uint64_t dest_len, const uint16_t uflag, const uint64_t serial, const uint64_t rid, + const uint64_t fro, const uint64_t file_number, const uint16_t queue_name_len, const char* queue_name) { + file_hdr_t* fhp = (file_hdr_t*)dest; + fhp->_rhdr._uflag = uflag; + fhp->_rhdr._serial = serial; + fhp->_rhdr._rid = rid; + fhp->_fro = fro; + fhp->_file_number = file_number; + if (sizeof(file_hdr_t) + queue_name_len < MAX_FILE_HDR_LEN) { + fhp->_queue_name_len = queue_name_len; + } else { + fhp->_queue_name_len = MAX_FILE_HDR_LEN - sizeof(file_hdr_t); + } + fhp->_queue_name_len = queue_name_len; + memcpy((char*)dest + sizeof(file_hdr_t), queue_name, queue_name_len); + memset((char*)dest + sizeof(file_hdr_t) + queue_name_len, 0, dest_len - sizeof(file_hdr_t) - queue_name_len); + return set_time_now(dest); +} + +int file_hdr_check(file_hdr_t* hdr, const uint32_t magic, const uint16_t version, const uint64_t data_size_kib, const uint16_t max_queue_name_len) { + int err = rec_hdr_check_base(&hdr->_rhdr, magic, version); + if (data_size_kib && hdr->_data_size_kib != data_size_kib) err |= 0x1000; + if (hdr->_queue_name_len > max_queue_name_len) err |= 0x10000; + return err; +} + +void file_hdr_copy(file_hdr_t* dest, const file_hdr_t* src) { + rec_hdr_copy(&dest->_rhdr, &src->_rhdr); + dest->_fhdr_size_sblks = src->_fhdr_size_sblks; // Should this be copied? + dest->_efp_partition = src->_efp_partition; // Should this be copied? + dest->_data_size_kib = src->_data_size_kib; + dest->_fro = src->_fro; + dest->_ts_sec = src->_ts_sec; + dest->_ts_nsec = src->_ts_nsec; + dest->_file_number = src->_file_number; +} + +void file_hdr_reset(file_hdr_t* target) { + target->_rhdr._uflag = 0; + target->_rhdr._serial = 0; + target->_rhdr._rid = 0; + target->_fro = 0; + target->_ts_sec = 0; + target->_ts_nsec = 0; + target->_file_number = 0; + target->_queue_name_len = 0; +} + +int is_file_hdr_reset(file_hdr_t* target) { + return target->_rhdr._uflag == 0 && + target->_rhdr._serial == 0 && + target->_rhdr._rid == 0 && + target->_ts_sec == 0 && + target->_ts_nsec == 0 && + target->_file_number == 0 && + target->_queue_name_len == 0; +} + +int set_time_now(file_hdr_t *fh) +{ + struct timespec ts; + int err = clock_gettime(CLOCK_REALTIME, &ts); + if (err) + return err; + fh->_ts_sec = ts.tv_sec; + fh->_ts_nsec = ts.tv_nsec; + return 0; +} + + +void set_time(file_hdr_t *fh, struct timespec *ts) +{ + fh->_ts_sec = ts->tv_sec; + fh->_ts_nsec = ts->tv_nsec; +} + + diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/file_hdr.h b/qpid/cpp/src/qpid/linearstore/journal/utils/file_hdr.h new file mode 100644 index 0000000000..5987e1871e --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/utils/file_hdr.h @@ -0,0 +1,111 @@ +#ifndef QPID_LINEARSTORE_JOURNAL_UTILS_FILE_HDR_H +#define QPID_LINEARSTORE_JOURNAL_UTILS_FILE_HDR_H +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include <time.h> +#include "rec_hdr.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#define MAX_FILE_HDR_LEN 4096 // Set to 1 sblk + +#pragma pack(1) + +/** + * \brief Struct for data common to the head of all journal files. In addition to + * the common data, this includes the record ID and offset of the first record in + * the file. + * + * This header precedes all data in journal files and occupies the first complete + * block in the file. The record ID and offset are updated on each overwrite of the + * file. + * + * File header info in binary format (74 bytes + size of file name in octets): + * <pre> + * 0 7 + * +---+---+---+---+---+---+---+---+ -+ + * | magic | ver | flags | | + * +---+---+---+---+---+---+---+---+ | + * | serial | | struct rec_hdr_t + * +---+---+---+---+---+---+---+---+ | + * | rid | | + * +---+---+---+---+---+---+---+---+ -+ + * | fhs | partn | reserved | + * +---+---+---+---+---+---+---+---+ + * | data-size | + * +---+---+---+---+---+---+---+---+ + * | fro | + * +---+---+---+---+---+---+---+---+ + * | timestamp (sec) | + * +---+---+---+---+---+---+---+---+ + * | timestamp (ns) | + * +---+---+---+---+---+---+---+---+ + * | file-number | + * +---+---+---+---+---+---+---+---+ + * | qnl | Queue Name... | + * +-------+ | + * | | + * +---+---+---+---+---+---+---+---+ + * + * ver = Journal version + * rid = Record ID + * fhs = File header size in sblks (defined by JRNL_SBLK_SIZE) + * partn = EFP partition from which this file came + * fro = First Record Offset + * qnl = Length of the queue name in octets. + * </pre> + */ +typedef struct file_hdr_t { + rec_hdr_t _rhdr; /**< Common record header struct, but rid field is used for rid of first compete record in file */ + uint16_t _fhdr_size_sblks; /**< File header size in sblks (defined by JRNL_SBLK_SIZE) */ + uint16_t _efp_partition; /**< EFP Partition number from which this file was obtained */ + uint32_t _reserved; + uint64_t _data_size_kib; /**< Size of the data part of this file in KiB. (ie file size excluding file header sblk) */ + uint64_t _fro; /**< First Record Offset (FRO) */ + uint64_t _ts_sec; /**< Time stamp (seconds part) */ + uint64_t _ts_nsec; /**< Time stamp (nanoseconds part) */ + uint64_t _file_number; /**< The logical number of this file in a monotonically increasing sequence */ + uint16_t _queue_name_len; /**< Length of the queue name in octets, which follows this struct in the header */ +} file_hdr_t; + +void file_hdr_create(file_hdr_t* dest, const uint32_t magic, const uint16_t version, + const uint16_t fhdr_size_sblks, const uint16_t efp_partition, const uint64_t file_size); +int file_hdr_init(void* dest, const uint64_t dest_len, const uint16_t uflag, const uint64_t serial, const uint64_t rid, + const uint64_t fro, const uint64_t file_number, const uint16_t queue_name_len, + const char* queue_name); +int file_hdr_check(file_hdr_t* hdr, const uint32_t magic, const uint16_t version, const uint64_t data_size_kib, + const uint16_t max_queue_name_len); +void file_hdr_reset(file_hdr_t* target); +int is_file_hdr_reset(file_hdr_t* target); +void file_hdr_copy(file_hdr_t* dest, const file_hdr_t* src); +int set_time_now(file_hdr_t *fh); +void set_time(file_hdr_t *fh, struct timespec *ts); + +#pragma pack() + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef QPID_LINEARSTORE_JOURNAL_UTILS_FILE_HDR_H */ diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/rec_hdr.c b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_hdr.c new file mode 100644 index 0000000000..32eda8de5a --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_hdr.c @@ -0,0 +1,51 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "rec_hdr.h" + +void rec_hdr_init(rec_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag, const uint64_t serial, const uint64_t rid) { + dest->_magic = magic; + dest->_version = version; + dest->_uflag = uflag; + dest->_serial = serial; + dest->_rid = rid; +} + +void rec_hdr_copy(rec_hdr_t* dest, const rec_hdr_t* src) { + dest->_magic = src->_magic; + dest->_version = src->_version; + dest->_uflag = src->_uflag; + dest->_serial = src->_serial; + dest->_rid = src->_rid; +} + +int rec_hdr_check_base(rec_hdr_t* header, const uint32_t magic, const uint16_t version) { + int err = 0; + if (header->_magic != magic) err |= 0x1; + if (header->_version != version) err |= 0x10; + return err; +} + +int rec_hdr_check(rec_hdr_t* header, const uint32_t magic, const uint16_t version, const uint64_t serial) { + int err = rec_hdr_check_base(header, magic, version); + if (header->_serial != serial) err |= 0x100; + return err; +} diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/rec_hdr.h b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_hdr.h new file mode 100644 index 0000000000..64349b5ab8 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_hdr.h @@ -0,0 +1,72 @@ +#ifndef QPID_LINEARSTORE_JOURNAL_UTILS_REC_HDR_H +#define QPID_LINEARSTORE_JOURNAL_UTILS_REC_HDR_H +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include <stdint.h> + +#ifdef __cplusplus +extern "C"{ +#endif + +#pragma pack(1) + +/** + * \brief Struct for data common to the head of all journal files and records. + * This includes identification for the file type, the encoding version, endian + * indicator and a record ID. + * + * File header info in binary format (24 bytes): + * <pre> + * 0 7 + * +---+---+---+---+---+---+---+---+ + * | magic | ver | uflag | + * +---+---+---+---+---+---+---+---+ + * | serial | + * +---+---+---+---+---+---+---+---+ + * | rid | + * +---+---+---+---+---+---+---+---+ + * + * ver = file version (If the format or encoding of this file changes, then this + * number should be incremented) + * rid = Record ID + * </pre> + */ +typedef struct rec_hdr_t { + uint32_t _magic; /**< File type identifier (magic number) */ + uint16_t _version; /**< File encoding version */ + uint16_t _uflag; /**< User-defined flags */ + uint64_t _serial; /**< Serial number for this journal file */ + uint64_t _rid; /**< Record ID (rotating 64-bit counter) */ +} rec_hdr_t; + +void rec_hdr_init(rec_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag, const uint64_t serial, const uint64_t rid); +void rec_hdr_copy(rec_hdr_t* dest, const rec_hdr_t* src); +int rec_hdr_check_base(rec_hdr_t* header, const uint32_t magic, const uint16_t version); +int rec_hdr_check(rec_hdr_t* header, const uint32_t magic, const uint16_t version, const uint64_t serial); + +#pragma pack() + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef QPID_LINEARSTORE_JOURNAL_UTILS_REC_HDR_H */ diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/rec_tail.c b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_tail.c new file mode 100644 index 0000000000..7128c96f32 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_tail.c @@ -0,0 +1,46 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "rec_tail.h" + +void rec_tail_init(rec_tail_t* dest, const uint32_t xmagic, const uint32_t checksum, const uint64_t serial, + const uint64_t rid) { + dest->_xmagic = xmagic; + dest->_checksum = checksum; + dest->_serial = serial; + dest->_rid = rid; +} + +void rec_tail_copy(rec_tail_t* dest, const rec_hdr_t* src, const uint32_t checksum) { + dest->_xmagic = ~(src->_magic); + dest->_checksum = checksum; + dest->_serial = src->_serial; + dest->_rid = src->_rid; +} + +uint16_t rec_tail_check(const rec_tail_t* tail, const rec_hdr_t* header, const uint32_t checksum) { + uint16_t err = 0; + if (tail->_xmagic != ~header->_magic) err |= REC_TAIL_MAGIC_ERR_MASK; + if (tail->_serial != header->_serial) err |= REC_TAIL_SERIAL_ERR_MASK; + if (tail->_rid != header->_rid) err |= REC_TAIL_RID_ERR_MASK; + if (tail->_checksum != checksum) err |= REC_TAIL_CHECKSUM_ERR_MASK; + return err; +} diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/rec_tail.h b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_tail.h new file mode 100644 index 0000000000..afc71c104a --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/utils/rec_tail.h @@ -0,0 +1,82 @@ +#ifndef QPID_LINEARSTORE_JOURNAL_UTILS_REC_TAIL_H +#define QPID_LINEARSTORE_JOURNAL_UTILS_REC_TAIL_H +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include <stdint.h> +#include "rec_hdr.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#pragma pack(1) + +/** + * \brief Struct for data common to the tail of all records. The magic number + * used here is the binary inverse (1's complement) of the magic used in the + * record header; this minimizes possible confusion with other headers that may + * be present during recovery. The tail is used with all records that have either + * XIDs or data - ie any size-variable content. Currently the only records that + * do NOT use the tail are non-transactional dequeues and filler records. + * + * The checksum is used to verify the xid and/or data portion of the record + * on recovery, and excludes the header and tail. + * + * Record header info in binary format (24 bytes): + * <pre> + * 0 7 + * +---+---+---+---+---+---+---+---+ + * | ~(magic) | checksum | + * +---+---+---+---+---+---+---+---+ + * | serial | + * +---+---+---+---+---+---+---+---+ + * | rid | + * +---+---+---+---+---+---+---+---+ + * + * ~(magic) = 1's compliment of magic of matching record header + * rid = Record ID of matching record header + * </pre> + */ +typedef struct rec_tail_t { + uint32_t _xmagic; /**< Binary inverse (1's complement) of hdr magic number */ + uint32_t _checksum; /**< Checksum of xid and data (excluding header itself) */ + uint64_t _serial; /**< Serial number for this journal file */ + uint64_t _rid; /**< Record ID (rotating 64-bit counter) */ +} rec_tail_t; + +static const uint16_t REC_TAIL_MAGIC_ERR_MASK = 0x01; +static const uint16_t REC_TAIL_SERIAL_ERR_MASK = 0x02; +static const uint16_t REC_TAIL_RID_ERR_MASK = 0x04; +static const uint16_t REC_TAIL_CHECKSUM_ERR_MASK = 0x08; + +void rec_tail_init(rec_tail_t* dest, const uint32_t xmagic, const uint32_t checksum, const uint64_t serial, + const uint64_t rid); +void rec_tail_copy(rec_tail_t* dest, const rec_hdr_t* src, const uint32_t checksum); +uint16_t rec_tail_check(const rec_tail_t* tail, const rec_hdr_t* header, const uint32_t checksum); + +#pragma pack() + +#ifdef __cplusplus +} +#endif + +#endif /* ifnedf QPID_LINEARSTORE_JOURNAL_UTILS_REC_TAIL_H */ diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/txn_hdr.c b/qpid/cpp/src/qpid/linearstore/journal/utils/txn_hdr.c new file mode 100644 index 0000000000..58d4cdebe4 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/utils/txn_hdr.c @@ -0,0 +1,33 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "txn_hdr.h" + +void txn_hdr_init(txn_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag, + const uint64_t serial, const uint64_t rid, const uint64_t xidsize) { + rec_hdr_init(&dest->_rhdr, magic, version, uflag, serial, rid); + dest->_xidsize = xidsize; +} + +void txn_hdr_copy(txn_hdr_t* dest, const txn_hdr_t* src) { + rec_hdr_copy(&dest->_rhdr, &src->_rhdr); + dest->_xidsize = src->_xidsize; +} diff --git a/qpid/cpp/src/qpid/linearstore/journal/utils/txn_hdr.h b/qpid/cpp/src/qpid/linearstore/journal/utils/txn_hdr.h new file mode 100644 index 0000000000..442a1d373d --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/utils/txn_hdr.h @@ -0,0 +1,72 @@ +#ifndef QPID_LINEARSTORE_JOURNAL_UTILS_TXN_HDR_H +#define QPID_LINEARSTORE_JOURNAL_UTILS_TXN_HDR_H +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "rec_hdr.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#pragma pack(1) + +/** + * \brief Struct for transaction commit and abort records. + * + * Struct for local and DTX commit and abort records. Only the magic distinguishes between them. + * Since this record must be used in the context of a valid XID, the xidsize field must not be + * zero. Immediately following this record is the XID itself which is xidsize bytes long, + * followed by a rec_tail. + * + * Note that this record had its own rid distinct from the rids of the record(s) making up the + * transaction it is committing or aborting. + * + * Record header info in binary format (32 bytes): + * <pre> + * 0 7 + * +---+---+---+---+---+---+---+---+ -+ + * | magic | ver | flags | | + * +---+---+---+---+---+---+---+---+ | + * | serial | | struct rec_hdr_t + * +---+---+---+---+---+---+---+---+ | + * | rid | | + * +---+---+---+---+---+---+---+---+ -+ + * | xidsize | + * +---+---+---+---+---+---+---+---+ + * </pre> + */ +typedef struct txn_hdr_t { + rec_hdr_t _rhdr; /**< Common record header struct */ + uint64_t _xidsize; /**< XID size */ +} txn_hdr_t; + +void txn_hdr_init(txn_hdr_t* dest, const uint32_t magic, const uint16_t version, const uint16_t uflag, + const uint64_t serial, const uint64_t rid, const uint64_t xidsize); +void txn_hdr_copy(txn_hdr_t* dest, const txn_hdr_t* src); + +#pragma pack() + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef QPID_LINEARSTORE_JOURNAL_UTILS_TXN_HDR_H */ diff --git a/qpid/cpp/src/qpid/linearstore/journal/wmgr.cpp b/qpid/cpp/src/qpid/linearstore/journal/wmgr.cpp new file mode 100644 index 0000000000..1ff18da663 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/wmgr.cpp @@ -0,0 +1,1086 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "qpid/linearstore/journal/wmgr.h" + +#include <cassert> +#include "qpid/linearstore/journal/aio_callback.h" +#include "qpid/linearstore/journal/Checksum.h" +#include "qpid/linearstore/journal/data_tok.h" +#include "qpid/linearstore/journal/jcntl.h" +#include "qpid/linearstore/journal/JournalFile.h" +#include "qpid/linearstore/journal/LinearFileController.h" +#include "qpid/linearstore/journal/utils/file_hdr.h" + +namespace qpid { +namespace linearstore { +namespace journal { + +wmgr::wmgr(jcntl* jc, + enq_map& emap, + txn_map& tmap, + LinearFileController& lfc): + pmgr(jc, emap, tmap), + _lfc(lfc), + _max_dtokpp(0), + _max_io_wait_us(0), + _cached_offset_dblks(0), + _enq_busy(false), + _deq_busy(false), + _abort_busy(false), + _commit_busy(false), + _txn_pending_map() +{} + +wmgr::wmgr(jcntl* jc, + enq_map& emap, + txn_map& tmap, + LinearFileController& lfc, + const uint32_t max_dtokpp, + const uint32_t max_iowait_us): + pmgr(jc, emap, tmap), + _lfc(lfc), + _max_dtokpp(max_dtokpp), + _max_io_wait_us(max_iowait_us), + _cached_offset_dblks(0), + _enq_busy(false), + _deq_busy(false), + _abort_busy(false), + _commit_busy(false), + _txn_pending_map() +{} + +wmgr::~wmgr() +{ + wmgr::clean(); +} + +void +wmgr::initialize(aio_callback* const cbp, + const uint32_t wcache_pgsize_sblks, + const uint16_t wcache_num_pages, + const uint32_t max_dtokpp, + const uint32_t max_iowait_us, + std::size_t end_offset) +{ + _enq_busy = false; + _deq_busy = false; + _abort_busy = false; + _commit_busy = false; + _max_dtokpp = max_dtokpp; + _max_io_wait_us = max_iowait_us; + + initialize(cbp, wcache_pgsize_sblks, wcache_num_pages); + + if (end_offset) + { + if(!aio::is_aligned((const void*)end_offset, QLS_AIO_ALIGN_BOUNDARY_BYTES)) { + std::ostringstream oss; + oss << "Recovery using misaligned end_offset (0x" << std::hex << end_offset << std::dec << ")" << std::endl; + throw jexception(jerrno::JERR_WMGR_NOTSBLKALIGNED, oss.str(), "wmgr", "initialize"); + } + const uint32_t wr_pg_size_dblks = _cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS; + uint32_t data_dblks = (end_offset / QLS_DBLK_SIZE_BYTES) - (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_DBLKS); // exclude file header + _pg_cntr = data_dblks / wr_pg_size_dblks; // Must be set to get file rotation synchronized (this is determined by value of _pg_cntr) + _pg_offset_dblks = data_dblks - (_pg_cntr * wr_pg_size_dblks); + } +} + +iores +wmgr::enqueue(const void* const data_buff, + const std::size_t tot_data_len, + const std::size_t this_data_len, + data_tok* dtokp, + const void* const xid_ptr, + const std::size_t xid_len, + const bool tpc_flag, + const bool transient, + const bool external) +{ +//std::cout << _lfc.status(10) << std::endl; + if (xid_len) + assert(xid_ptr != 0); + + if (_deq_busy || _abort_busy || _commit_busy) { + std::ostringstream oss; + oss << "RHM_IORES_BUSY: enqueue while part way through another op:"; + oss << " _deq_busy=" << (_deq_busy?"T":"F"); + oss << " _abort_busy=" << (_abort_busy?"T":"F"); + oss << " _commit_busy=" << (_commit_busy?"T":"F"); + throw jexception(oss.str()); // TODO: complete exception + } + + if (this_data_len != tot_data_len && !external) { + throw jexception("RHM_IORES_NOTIMPL: partial enqueues not implemented"); // TODO: complete exception; + } + + iores res = pre_write_check(WMGR_ENQUEUE, dtokp, xid_len, tot_data_len, external); + if (res != RHM_IORES_SUCCESS) + return res; + + bool cont = false; + if (_enq_busy) // If enqueue() exited last time with RHM_IORES_FULL or RHM_IORES_PAGE_AIOWAIT + { + if (dtokp->wstate() == data_tok::ENQ_PART) + cont = true; + else + { + std::ostringstream oss; + oss << "This data_tok: id=" << dtokp->id() << " state=" << dtokp->wstate_str(); + throw jexception(jerrno::JERR_WMGR_ENQDISCONT, oss.str(), "wmgr", "enqueue"); + } + } + + uint64_t rid = (dtokp->external_rid() | cont) ? dtokp->rid() : _lfc.getNextRecordId(); + _enq_rec.reset(_lfc.getCurrentSerial(), rid, data_buff, tot_data_len, xid_ptr, xid_len, transient, external); + if (!cont) + { + dtokp->set_rid(rid); + dtokp->set_dequeue_rid(0); + if (xid_len) + dtokp->set_xid(xid_ptr, xid_len); + else + dtokp->clear_xid(); + _enq_busy = true; + } +//std::cout << "---+++ wmgr::enqueue() ENQ rid=0x" << std::hex << rid << " po=0x" << _pg_offset_dblks << " cs=0x" << (_cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS) << " " << std::dec << std::flush; // DEBUG + bool done = false; + Checksum checksum; + while (!done) + { +//std::cout << "*" << std::flush; // DEBUG + assert(_pg_offset_dblks < _cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS); + void* wptr = (void*)((char*)_page_ptr_arr[_pg_index] + _pg_offset_dblks * QLS_DBLK_SIZE_BYTES); + uint32_t data_offs_dblks = dtokp->dblocks_written(); + uint32_t ret = _enq_rec.encode(wptr, data_offs_dblks, + (_cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS) - _pg_offset_dblks, checksum); + + // Remember fid which contains the record header in case record is split over several files + if (data_offs_dblks == 0) { + dtokp->set_fid(_lfc.getCurrentFileSeqNum()); + } + _pg_offset_dblks += ret; + _cached_offset_dblks += ret; + dtokp->incr_dblocks_written(ret); + dtokp->incr_pg_cnt(); + _page_cb_arr[_pg_index]._pdtokl->push_back(dtokp); + + // Is the encoding of this record complete? + if (dtokp->dblocks_written() >= _enq_rec.rec_size_dblks()) + { +//std::cout << "!" << std::flush; // DEBUG + // TODO: Incorrect - must set state to ENQ_CACHED; ENQ_SUBM is set when AIO returns. + dtokp->set_wstate(data_tok::ENQ_SUBM); + dtokp->set_dsize(tot_data_len); + // Only add this data token to page token list when submit is complete, this way + // long multi-page messages have their token on the page containing the END of the + // message. AIO callbacks will then only process this token when entire message is + // enqueued. + _lfc.incrEnqueuedRecordCount(dtokp->fid()); +//std::cout << "[0x" << std::hex << _lfc.getEnqueuedRecordCount(dtokp->fid()) << std::dec << std::flush; // DEBUG + + if (xid_len) // If part of transaction, add to transaction map + { + std::string xid((const char*)xid_ptr, xid_len); + _tmap.insert_txn_data(xid, txn_data_t(rid, 0, dtokp->fid(), 0, true, tpc_flag, false)); + } + else + { + if (_emap.insert_pfid(rid, dtokp->fid(), 0) < enq_map::EMAP_OK) // fail + { + // The only error code emap::insert_pfid() returns is enq_map::EMAP_DUP_RID. + std::ostringstream oss; + oss << std::hex << "rid=0x" << rid << " _pfid=0x" << dtokp->fid(); + throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "wmgr", "enqueue"); + } + } + + done = true; + } else { +//std::cout << "$" << std::flush; // DEBUG + dtokp->set_wstate(data_tok::ENQ_PART); + } + + file_header_check(rid, cont, _enq_rec.rec_size_dblks() - data_offs_dblks); + flush_check(res, cont, done, rid); + } + if (dtokp->wstate() >= data_tok::ENQ_SUBM) + _enq_busy = false; +//std::cout << " res=" << iores_str(res) << " _enq_busy=" << (_enq_busy?"T":"F") << std::endl << std::flush; // DEBUG + return res; +} + +iores +wmgr::dequeue(data_tok* dtokp, + const void* const xid_ptr, + const std::size_t xid_len, + const bool tpc_flag, + const bool txn_coml_commit) +{ + if (xid_len) + assert(xid_ptr != 0); + + if (_enq_busy || _abort_busy || _commit_busy) { + std::ostringstream oss; + oss << "RHM_IORES_BUSY: dequeue while part way through another op:"; + oss << " _enq_busy=" << (_enq_busy?"T":"F"); + oss << " _abort_busy=" << (_abort_busy?"T":"F"); + oss << " _commit_busy=" << (_commit_busy?"T":"F"); + throw jexception(oss.str()); // TODO: complete exception + } + + iores res = pre_write_check(WMGR_DEQUEUE, dtokp); + if (res != RHM_IORES_SUCCESS) + return res; + + bool cont = false; + if (_deq_busy) // If dequeue() exited last time with RHM_IORES_FULL or RHM_IORES_PAGE_AIOWAIT + { + if (dtokp->wstate() == data_tok::DEQ_PART) + cont = true; + else + { + std::ostringstream oss; + oss << "This data_tok: id=" << dtokp->id() << " state=" << dtokp->wstate_str(); + throw jexception(jerrno::JERR_WMGR_DEQDISCONT, oss.str(), "wmgr", "dequeue"); + } + } + + const bool ext_rid = dtokp->external_rid(); + uint64_t rid = (ext_rid | cont) ? dtokp->rid() : _lfc.getNextRecordId(); + uint64_t dequeue_rid = (ext_rid | cont) ? dtokp->dequeue_rid() : dtokp->rid(); + _deq_rec.reset(_lfc.getCurrentSerial(), rid, dequeue_rid, xid_ptr, xid_len, txn_coml_commit); + if (!cont) + { + if (!ext_rid) + { + dtokp->set_rid(rid); + dtokp->set_dequeue_rid(dequeue_rid); + } + if (xid_len) + dtokp->set_xid(xid_ptr, xid_len); + else + dtokp->clear_xid(); + dequeue_check(dtokp->xid(), dequeue_rid); + dtokp->set_dblocks_written(0); // Reset dblks_written from previous op + _deq_busy = true; + } +//std::cout << "---+++ wmgr::dequeue() DEQ rid=0x" << std::hex << rid << " drid=0x" << dequeue_rid << " " << std::dec << std::flush; // DEBUG + std::string xid((const char*)xid_ptr, xid_len); + bool done = false; + Checksum checksum; + while (!done) + { +//std::cout << "*" << std::flush; // DEBUG + assert(_pg_offset_dblks < _cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS); + void* wptr = (void*)((char*)_page_ptr_arr[_pg_index] + _pg_offset_dblks * QLS_DBLK_SIZE_BYTES); + uint32_t data_offs_dblks = dtokp->dblocks_written(); + uint32_t ret = _deq_rec.encode(wptr, data_offs_dblks, + (_cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS) - _pg_offset_dblks, checksum); + + if (data_offs_dblks == 0) { + uint64_t fid; + short eres = _emap.get_pfid(dtokp->dequeue_rid(), fid); + if (eres == enq_map::EMAP_OK) { + dtokp->set_fid(fid); + } else if (xid_len > 0) { + txn_data_list_t tdl = _tmap.get_tdata_list(xid); + bool found = false; + for (tdl_const_itr_t i=tdl.begin(); i!=tdl.end() && !found; ++i) { + if (i->rid_ == dtokp->dequeue_rid()) { + found = true; + dtokp->set_fid(i->fid_); + break; + } + } + if (!found) { + throw jexception("rid found in neither emap nor tmap, transactional"); + } + } else { + throw jexception("rid not found in emap, non-transactional"); + } + } + _pg_offset_dblks += ret; + _cached_offset_dblks += ret; + dtokp->incr_dblocks_written(ret); + dtokp->incr_pg_cnt(); + _page_cb_arr[_pg_index]._pdtokl->push_back(dtokp); + + // Is the encoding of this record complete? + if (dtokp->dblocks_written() >= _deq_rec.rec_size_dblks()) + { +//std::cout << "!" << std::flush; // DEBUG + // TODO: Incorrect - must set state to ENQ_CACHED; ENQ_SUBM is set when AIO returns. + dtokp->set_wstate(data_tok::DEQ_SUBM); + + if (xid_len) // If part of transaction, add to transaction map + { + // If the enqueue is part of a pending txn, it will not yet be in emap + _emap.lock(dequeue_rid); // ignore rid not found error + std::string xid((const char*)xid_ptr, xid_len); + _tmap.insert_txn_data(xid, txn_data_t(rid, dequeue_rid, dtokp->fid(), 0, false, tpc_flag, false)); + } + else + { + uint64_t fid; + short eres = _emap.get_remove_pfid(dtokp->dequeue_rid(), fid); + if (eres < enq_map::EMAP_OK) // fail + { + if (eres == enq_map::EMAP_RID_NOT_FOUND) + { + std::ostringstream oss; + oss << std::hex << "emap: rid=0x" << rid; + throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "wmgr", "dequeue"); + } + if (eres == enq_map::EMAP_LOCKED) + { + std::ostringstream oss; + oss << std::hex << "rid=0x" << rid; + throw jexception(jerrno::JERR_MAP_LOCKED, oss.str(), "wmgr", "dequeue"); + } + } + } + + done = true; + } else { +//std::cout << "$" << std::flush; // DEBUG + dtokp->set_wstate(data_tok::DEQ_PART); + } + + file_header_check(rid, cont, _deq_rec.rec_size_dblks() - data_offs_dblks); + flush_check(res, cont, done, rid); + } + if (dtokp->wstate() >= data_tok::DEQ_SUBM) + _deq_busy = false; +//std::cout << " res=" << iores_str(res) << " _deq_busy=" << (_deq_busy?"T":"F") << std::endl << std::flush; // DEBUG + return res; +} + +iores +wmgr::abort(data_tok* dtokp, + const void* const xid_ptr, + const std::size_t xid_len) +{ + // commit and abort MUST have a valid xid + assert(xid_ptr != 0 && xid_len > 0); + + if (_enq_busy || _deq_busy || _commit_busy) { + std::ostringstream oss; + oss << "RHM_IORES_BUSY: abort while part way through another op:"; + oss << " _enq_busy=" << (_enq_busy?"T":"F"); + oss << " _deq_busy=" << (_deq_busy?"T":"F"); + oss << " _commit_busy=" << (_commit_busy?"T":"F"); + throw jexception(oss.str()); // TODO: complete exception + } + + iores res = pre_write_check(WMGR_ABORT, dtokp); + if (res != RHM_IORES_SUCCESS) + return res; + + bool cont = false; + if (_abort_busy) // If abort() exited last time with RHM_IORES_FULL or RHM_IORES_PAGE_AIOWAIT + { + if (dtokp->wstate() == data_tok::ABORT_PART) + cont = true; + else + { + std::ostringstream oss; + oss << "This data_tok: id=" << dtokp->id() << " state=" << dtokp->wstate_str(); + throw jexception(jerrno::JERR_WMGR_DEQDISCONT, oss.str(), "wmgr", "abort"); + } + } + + uint64_t rid = (dtokp->external_rid() | cont) ? dtokp->rid() : _lfc.getNextRecordId(); + _txn_rec.reset(false, _lfc.getCurrentSerial(), rid, xid_ptr, xid_len); + if (!cont) + { + dtokp->set_rid(rid); + dtokp->set_dequeue_rid(0); + dtokp->set_xid(xid_ptr, xid_len); + dtokp->set_dblocks_written(0); // Reset dblks_written from previous op + _abort_busy = true; + } + bool done = false; + Checksum checksum; + while (!done) + { + assert(_pg_offset_dblks < _cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS); + void* wptr = (void*)((char*)_page_ptr_arr[_pg_index] + _pg_offset_dblks * QLS_DBLK_SIZE_BYTES); + uint32_t data_offs_dblks = dtokp->dblocks_written(); + uint32_t ret = _txn_rec.encode(wptr, data_offs_dblks, + (_cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS) - _pg_offset_dblks, checksum); + + // Remember fid which contains the record header in case record is split over several files + if (data_offs_dblks == 0) + dtokp->set_fid(_lfc.getCurrentFileSeqNum()); + _pg_offset_dblks += ret; + _cached_offset_dblks += ret; + dtokp->incr_dblocks_written(ret); + dtokp->incr_pg_cnt(); + _page_cb_arr[_pg_index]._pdtokl->push_back(dtokp); + + // Is the encoding of this record complete? + if (dtokp->dblocks_written() >= _txn_rec.rec_size_dblks()) + { + dtokp->set_wstate(data_tok::ABORT_SUBM); + + // Delete this txn from tmap, unlock any locked records in emap + std::string xid((const char*)xid_ptr, xid_len); + txn_data_list_t tdl = _tmap.get_remove_tdata_list(xid); // tdl will be empty if xid not found + fidl_t fidl; + for (tdl_itr_t itr = tdl.begin(); itr != tdl.end(); itr++) + { + if (!itr->enq_flag_) + _emap.unlock(itr->drid_); // ignore rid not found error + if (itr->enq_flag_) { + fidl.push_back(itr->fid_); + } + } + std::pair<pending_txn_map_itr_t, bool> res = _txn_pending_map.insert(std::pair<std::string, fidl_t>(xid, fidl)); + if (!res.second) + { + std::ostringstream oss; + oss << std::hex << "_txn_pending_set: xid=\"" << xid << "\""; + throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "wmgr", "abort"); + } + + done = true; + } else { + dtokp->set_wstate(data_tok::ABORT_PART); + } + + file_header_check(rid, cont, _txn_rec.rec_size_dblks() - data_offs_dblks); + flush_check(res, cont, done, rid); + } + if (dtokp->wstate() >= data_tok::ABORT_SUBM) + _abort_busy = false; + return res; +} + +iores +wmgr::commit(data_tok* dtokp, + const void* const xid_ptr, + const std::size_t xid_len) +{ + // commit and abort MUST have a valid xid + assert(xid_ptr != 0 && xid_len > 0); + + if (_enq_busy || _deq_busy || _abort_busy) { + std::ostringstream oss; + oss << "RHM_IORES_BUSY: commit while part way through another op:"; + oss << " _enq_busy=" << (_enq_busy?"T":"F"); + oss << " _deq_busy=" << (_deq_busy?"T":"F"); + oss << " _abort_busy=" << (_abort_busy?"T":"F"); + throw jexception(oss.str()); // TODO: complete exception + } + + iores res = pre_write_check(WMGR_COMMIT, dtokp); + if (res != RHM_IORES_SUCCESS) + return res; + + bool cont = false; + if (_commit_busy) // If commit() exited last time with RHM_IORES_FULL or RHM_IORES_PAGE_AIOWAIT + { + if (dtokp->wstate() == data_tok::COMMIT_PART) + cont = true; + else + { + std::ostringstream oss; + oss << "This data_tok: id=" << dtokp->id() << " state=" << dtokp->wstate_str(); + throw jexception(jerrno::JERR_WMGR_DEQDISCONT, oss.str(), "wmgr", "commit"); + } + } + + uint64_t rid = (dtokp->external_rid() | cont) ? dtokp->rid() : _lfc.getNextRecordId(); + _txn_rec.reset(true, _lfc.getCurrentSerial(), rid, xid_ptr, xid_len); + if (!cont) + { + dtokp->set_rid(rid); + dtokp->set_dequeue_rid(0); + dtokp->set_xid(xid_ptr, xid_len); + dtokp->set_dblocks_written(0); // Reset dblks_written from previous op + _commit_busy = true; + } + bool done = false; + Checksum checksum; + while (!done) + { + assert(_pg_offset_dblks < _cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS); + void* wptr = (void*)((char*)_page_ptr_arr[_pg_index] + _pg_offset_dblks * QLS_DBLK_SIZE_BYTES); + uint32_t data_offs_dblks = dtokp->dblocks_written(); + uint32_t ret = _txn_rec.encode(wptr, data_offs_dblks, + (_cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS) - _pg_offset_dblks, checksum); + + // Remember fid which contains the record header in case record is split over several files + if (data_offs_dblks == 0) + dtokp->set_fid(_lfc.getCurrentFileSeqNum()); + _pg_offset_dblks += ret; + _cached_offset_dblks += ret; + dtokp->incr_dblocks_written(ret); + dtokp->incr_pg_cnt(); + _page_cb_arr[_pg_index]._pdtokl->push_back(dtokp); + + // Is the encoding of this record complete? + if (dtokp->dblocks_written() >= _txn_rec.rec_size_dblks()) + { + dtokp->set_wstate(data_tok::COMMIT_SUBM); + + // Delete this txn from tmap, process records into emap + std::string xid((const char*)xid_ptr, xid_len); + txn_data_list_t tdl = _tmap.get_remove_tdata_list(xid); // tdl will be empty if xid not found + fidl_t fidl; + for (tdl_itr_t itr = tdl.begin(); itr != tdl.end(); itr++) + { + if (itr->enq_flag_) // txn enqueue + { + if (_emap.insert_pfid(itr->rid_, itr->fid_, 0) < enq_map::EMAP_OK) // fail + { + // The only error code emap::insert_pfid() returns is enq_map::EMAP_DUP_RID. + std::ostringstream oss; + oss << std::hex << "rid=0x" << itr->rid_ << " _pfid=0x" << itr->fid_; + throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "wmgr", "commit"); + } + } + else // txn dequeue + { + uint64_t fid; + short eres = _emap.get_remove_pfid(itr->drid_, fid, true); + if (eres < enq_map::EMAP_OK) // fail + { + if (eres == enq_map::EMAP_RID_NOT_FOUND) + { + std::ostringstream oss; + oss << std::hex << "emap: rid=0x" << itr->drid_; + throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "wmgr", "commit"); + } + if (eres == enq_map::EMAP_LOCKED) + { + std::ostringstream oss; + oss << std::hex << "rid=0x" << itr->drid_; + throw jexception(jerrno::JERR_MAP_LOCKED, oss.str(), "wmgr", "commit"); + } + } + fidl.push_back(fid); + } + } + std::pair<pending_txn_map_itr_t, bool> res = _txn_pending_map.insert(std::pair<std::string, fidl_t>(xid, fidl)); + if (!res.second) + { + std::ostringstream oss; + oss << std::hex << "_txn_pending_set: xid=\"" << xid << "\""; + throw jexception(jerrno::JERR_MAP_DUPLICATE, oss.str(), "wmgr", "commit"); + } + + done = true; + } else { + dtokp->set_wstate(data_tok::COMMIT_PART); + } + + file_header_check(rid, cont, _txn_rec.rec_size_dblks() - data_offs_dblks); + flush_check(res, cont, done, rid); + } + if (dtokp->wstate() >= data_tok::COMMIT_SUBM) + _commit_busy = false; + return res; +} + +void +wmgr::file_header_check(const uint64_t rid, + const bool cont, + const uint32_t rec_dblks_rem) +{ + if (_lfc.isEmpty()) // File never written (i.e. no header or data) + { +//std::cout << "e" << std::flush; + std::size_t fro = 0; + if (cont) { + bool file_fit = rec_dblks_rem <= _lfc.dataSize_sblks() * QLS_SBLK_SIZE_DBLKS; // Will fit within this journal file + bool file_full = rec_dblks_rem == _lfc.dataSize_sblks() * QLS_SBLK_SIZE_DBLKS; // Will exactly fill this journal file + if (file_fit && !file_full) { + fro = (rec_dblks_rem + (QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_DBLKS)) * QLS_DBLK_SIZE_BYTES; + } + } else { + fro = QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_BYTES; + } + _lfc.asyncFileHeaderWrite(_ioctx, 0, rid, fro); + _aio_evt_rem++; + } +} + +void +wmgr::flush_check(iores& res, + bool& cont, + bool& done, const uint64_t /*rid*/) // DEBUG +{ + // Is page is full, flush + if (_pg_offset_dblks >= _cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS) + { +//std::cout << "^" << _pg_offset_dblks << ">=" << (_cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS) << std::flush; + res = write_flush(); + assert(res == RHM_IORES_SUCCESS); + + if (_page_cb_arr[_pg_index]._state == AIO_PENDING && !done) + { + res = RHM_IORES_PAGE_AIOWAIT; + done = true; + } + + // If file is full, rotate to next file + uint32_t dataSize_pgs = _lfc.dataSize_sblks() / _cache_pgsize_sblks; + if (_pg_cntr >= dataSize_pgs) + { +//std::cout << _pg_cntr << ">=" << fileSize_pgs << std::flush; + get_next_file(); + if (!done) { + cont = true; + } +//std::cout << "***** wmgr::flush_check(): GET NEXT FILE: rid=0x" << std::hex << rid << std::dec << " res=" << iores_str(res) << " cont=" << (cont?"T":"F") << " done=" << (done?"T":"F") << std::endl; // DEBUG + } + } +} + +iores +wmgr::flush() +{ + iores res = write_flush(); + uint32_t dataSize_pgs = _lfc.dataSize_sblks() / _cache_pgsize_sblks; + if (res == RHM_IORES_SUCCESS && _pg_cntr >= dataSize_pgs) { + get_next_file(); + } + return res; +} + +iores +wmgr::write_flush() +{ + iores res = RHM_IORES_SUCCESS; + // Don't bother flushing an empty page or one that is still in state AIO_PENDING + if (_cached_offset_dblks) + { + if (_page_cb_arr[_pg_index]._state == AIO_PENDING) { +//std::cout << "#" << std::flush; // DEBUG + res = RHM_IORES_PAGE_AIOWAIT; + } else { + if (_page_cb_arr[_pg_index]._state != IN_USE) + { + std::ostringstream oss; + oss << "pg_index=" << _pg_index << " state=" << _page_cb_arr[_pg_index].state_str(); + throw jexception(jerrno::JERR_WMGR_BADPGSTATE, oss.str(), "wmgr", "write_flush"); + } + + // Send current page using AIO + + // In manual flushes, dblks may not coincide with sblks, add filler records ("RHMx") if necessary. + dblk_roundup(); + + std::size_t pg_offs = (_pg_offset_dblks - _cached_offset_dblks) * QLS_DBLK_SIZE_BYTES; + aio_cb* aiocbp = &_aio_cb_arr[_pg_index]; + _lfc.asyncPageWrite(_ioctx, aiocbp, (char*)_page_ptr_arr[_pg_index] + pg_offs, _cached_offset_dblks); + _page_cb_arr[_pg_index]._state = AIO_PENDING; + _aio_evt_rem++; +//std::cout << "." << _aio_evt_rem << std::flush; // DEBUG + _cached_offset_dblks = 0; + _jc->instr_incr_outstanding_aio_cnt(); + + rotate_page(); // increments _pg_index, resets _pg_offset_dblks if req'd + if (_page_cb_arr[_pg_index]._state == UNUSED) + _page_cb_arr[_pg_index]._state = IN_USE; + } + } + get_events(0, false); + if (_page_cb_arr[_pg_index]._state == UNUSED) + _page_cb_arr[_pg_index]._state = IN_USE; + return res; +} + +void +wmgr::get_next_file() +{ + _pg_cntr = 0; +//std::cout << "&&&&& wmgr::get_next_file(): " << status_str() << std::flush << std::endl; // DEBUG + _lfc.getNextJournalFile(); +} + +int32_t +wmgr::get_events(timespec* const timeout, + bool flush) +{ + if (_aio_evt_rem == 0) // no events to get + return 0; + + int ret = 0; + if ((ret = aio::getevents(_ioctx, flush ? _aio_evt_rem : 1, _aio_evt_rem, _aio_event_arr, timeout)) < 0) + { + if (ret == -EINTR) // Interrupted by signal + return 0; + std::ostringstream oss; + oss << "io_getevents() failed: " << std::strerror(-ret) << " (" << ret << ") ctx_id=" << _ioctx; + oss << " min_nr=" << (flush ? _aio_evt_rem : 1) << " nr=" << _aio_evt_rem; + throw jexception(jerrno::JERR__AIO, oss.str(), "wmgr", "get_events"); + } + + if (ret == 0 && timeout) + return jerrno::AIO_TIMEOUT; + + int32_t tot_data_toks = 0; + for (int i=0; i<ret; i++) // Index of returned AIOs + { + if (_aio_evt_rem == 0) + { + std::ostringstream oss; + oss << "_aio_evt_rem; evt " << (i + 1) << " of " << ret; + throw jexception(jerrno::JERR__UNDERFLOW, oss.str(), "wmgr", "get_events"); + } + _aio_evt_rem--; +//std::cout << "'" << _aio_evt_rem; // DEBUG + aio_cb* aiocbp = _aio_event_arr[i].obj; // This I/O control block (iocb) + page_cb* pcbp = (page_cb*)(aiocbp->data); // This page control block (pcb) + long aioret = (long)_aio_event_arr[i].res; + if (aioret < 0) { + std::ostringstream oss; + oss << "AIO write operation failed: " << std::strerror(-aioret) << " (" << aioret << ")" << std::endl; + oss << " data=" << _aio_event_arr[i].data << std::endl; + oss << " obj=" << _aio_event_arr[i].obj << std::endl; + oss << " res=" << _aio_event_arr[i].res << std::endl; + oss << " res2=" << _aio_event_arr[i].res2 << std::endl; + oss << " iocb->data=" << aiocbp->data << std::endl; + oss << " iocb->key=" << aiocbp->key << std::endl; + oss << " iocb->aio_lio_opcode=" << aiocbp->aio_lio_opcode << std::endl; + oss << " iocb->aio_reqprio=" << aiocbp->aio_reqprio << std::endl; + oss << " iocb->aio_fildes=" << aiocbp->aio_fildes << std::endl; + oss << " iocb->u.c.buf=" << aiocbp->u.c.buf << std::endl; + oss << " iocb->u.c.nbytes=0x" << std::hex << aiocbp->u.c.nbytes << std::dec << " (" << aiocbp->u.c.nbytes << ")" << std::endl; + oss << " iocb->u.c.offset=0x" << std::hex << aiocbp->u.c.offset << std::dec << " (" << aiocbp->u.c.offset << ")" << std::endl; + oss << " iocb->u.c.flags=0x" << std::hex << aiocbp->u.c.flags << std::dec << " (" << aiocbp->u.c.flags << ")" << std::endl; + oss << " iocb->u.c.resfd=" << aiocbp->u.c.resfd << std::endl; + if (pcbp) { + oss << " Page Control Block: (iocb->data):" << std::endl; + oss << " pcb.index=" << pcbp->_index << std::endl; + oss << " pcb.state=" << pcbp->_state << " (" << pmgr::page_state_str(pcbp->_state) << ")" << std::endl; + oss << " pcb.frid=0x" << std::hex << pcbp->_frid << std::dec << std::endl; + oss << " pcb.wdblks=0x" << std::hex << pcbp->_wdblks << std::dec << std::endl; + oss << " pcb.pdtokl.size=" << pcbp->_pdtokl->size() << std::endl; + oss << " pcb.pbuff=" << pcbp->_pbuff << std::endl; + oss << " JournalFile (pcb.jfp):" << std::endl; + oss << pcbp->_jfp->status_str(6) << std::endl; + } else { + file_hdr_t* fhp = (file_hdr_t*)aiocbp->u.c.buf; + oss << "fnum=" << fhp->_file_number; + oss << " qname=" << std::string((char*)fhp + sizeof(file_hdr_t), fhp->_queue_name_len); + } + throw jexception(jerrno::JERR__AIO, oss.str(), "wmgr", "get_events"); + } + if (pcbp) // Page writes have pcb + { +//std::cout << "p"; // DEBUG + uint32_t s = pcbp->_pdtokl->size(); + std::vector<data_tok*> dtokl; + dtokl.reserve(s); + for (uint32_t k=0; k<s; k++) + { + data_tok* dtokp = pcbp->_pdtokl->at(k); + if (dtokp->decr_pg_cnt() == 0) + { + pending_txn_map_itr_t it; + switch (dtokp->wstate()) + { + case data_tok::ENQ_SUBM: + dtokl.push_back(dtokp); + tot_data_toks++; + dtokp->set_wstate(data_tok::ENQ); + if (dtokp->has_xid()) + // Ignoring return value here. A non-zero return can signify that the transaction + // has committed or aborted, and which was completed prior to the aio returning. + _tmap.set_aio_compl(dtokp->xid(), dtokp->rid()); + break; + case data_tok::DEQ_SUBM: + if (!dtokp->has_xid()) { + _lfc.decrEnqueuedRecordCount(dtokp->fid()); + } + dtokl.push_back(dtokp); + tot_data_toks++; + dtokp->set_wstate(data_tok::DEQ); + if (dtokp->has_xid()) + // Ignoring return value - see note above. + _tmap.set_aio_compl(dtokp->xid(), dtokp->rid()); + break; + case data_tok::ABORT_SUBM: + dtokl.push_back(dtokp); + tot_data_toks++; + dtokp->set_wstate(data_tok::ABORTED); + it = _txn_pending_map.find(dtokp->xid()); + if (it == _txn_pending_map.end()) + { + std::ostringstream oss; + oss << std::hex << "_txn_pending_set: abort xid=\"" + << qpid::linearstore::journal::jcntl::str2hexnum(dtokp->xid()) << "\""; + throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "wmgr", "get_events"); + } + for (fidl_itr_t i=it->second.begin(); i!=it->second.end(); ++i) { + _lfc.decrEnqueuedRecordCount(*i); + } + _txn_pending_map.erase(it); + break; + case data_tok::COMMIT_SUBM: + dtokl.push_back(dtokp); + tot_data_toks++; + dtokp->set_wstate(data_tok::COMMITTED); + it = _txn_pending_map.find(dtokp->xid()); + if (it == _txn_pending_map.end()) + { + std::ostringstream oss; + oss << std::hex << "_txn_pending_set: commit xid=\"" + << qpid::linearstore::journal::jcntl::str2hexnum(dtokp->xid()) << "\""; + throw jexception(jerrno::JERR_MAP_NOTFOUND, oss.str(), "wmgr", "get_events"); + } + for (fidl_itr_t i=it->second.begin(); i!=it->second.end(); ++i) { + _lfc.decrEnqueuedRecordCount(*i); + } + _txn_pending_map.erase(it); + break; + case data_tok::ENQ_PART: + case data_tok::DEQ_PART: + case data_tok::ABORT_PART: + case data_tok::COMMIT_PART: + // ignore these + break; + default: + // throw for anything else + std::ostringstream oss; + oss << "dtok_id=" << dtokp->id() << " dtok_state=" << dtokp->wstate_str(); + throw jexception(jerrno::JERR_WMGR_BADDTOKSTATE, oss.str(), "wmgr", + "get_events"); + } + } + } + + // Increment the completed write offset + // NOTE: We cannot use _wrfc here, as it may have rotated since submitting count. + // Use stored pointer to fcntl in the pcb instead. + pcbp->_jfp->addCompletedDblkCount(pcbp->_wdblks); + pcbp->_jfp->decrOutstandingAioOperationCount(); + _jc->instr_decr_outstanding_aio_cnt(); + + // Clean up this pcb's data_tok list + pcbp->_pdtokl->clear(); + pcbp->_state = UNUSED; +//std::cout << "c" << pcbp->_index << pcbp->state_str(); // DEBUG + + // Perform AIO return callback + if (_cbp && tot_data_toks) + _cbp->wr_aio_cb(dtokl); + } + else // File header writes have no pcb + { +//std::cout << "f"; // DEBUG + file_hdr_t* fhp = (file_hdr_t*)aiocbp->u.c.buf; + _lfc.addWriteCompletedDblkCount(fhp->_file_number, QLS_JRNL_FHDR_RES_SIZE_SBLKS * QLS_SBLK_SIZE_DBLKS); + _lfc.decrOutstandingAioOperationCount(fhp->_file_number); + } + } + + return tot_data_toks; +} + +bool +wmgr::is_txn_synced(const std::string& xid) +{ + // Ignore xid not found error here + if (_tmap.is_txn_synced(xid) == txn_map::TMAP_NOT_SYNCED) + return false; + // Check for outstanding commit/aborts + pending_txn_map_itr_t it = _txn_pending_map.find(xid); + return it == _txn_pending_map.end(); +} + +void +wmgr::initialize(aio_callback* const cbp, + const uint32_t wcache_pgsize_sblks, + const uint16_t wcache_num_pages) +{ + + pmgr::initialize(cbp, wcache_pgsize_sblks, wcache_num_pages); + wmgr::clean(); + _page_cb_arr[0]._state = IN_USE; + _cached_offset_dblks = 0; + _enq_busy = false; +} + +iores +wmgr::pre_write_check(const _op_type op, + const data_tok* const dtokp, + const std::size_t /*xidsize*/, + const std::size_t /*dsize*/, + const bool /*external*/) const +{ + // Check status of current file + // TODO: Replace for LFC +/* + if (!_wrfc.is_wr_reset()) + { + if (!_wrfc.wr_reset()) + return RHM_IORES_FULL; + } +*/ + + // Check status of current page is ok for writing + if (_page_cb_arr[_pg_index]._state != IN_USE) + { + if (_page_cb_arr[_pg_index]._state == UNUSED) + _page_cb_arr[_pg_index]._state = IN_USE; + else if (_page_cb_arr[_pg_index]._state == AIO_PENDING) + return RHM_IORES_PAGE_AIOWAIT; + else + { + std::ostringstream oss; + oss << "jrnl=" << _jc->id() << " op=" << _op_str[op]; + oss << " index=" << _pg_index << " pg_state=" << _page_cb_arr[_pg_index].state_str(); + throw jexception(jerrno::JERR_WMGR_BADPGSTATE, oss.str(), "wmgr", "pre_write_check"); + } + } + + // operation-specific checks + switch (op) + { + case WMGR_ENQUEUE: + { + if (!dtokp->is_writable()) + { + std::ostringstream oss; + oss << "jrnl=" << _jc->id() << " op=" << _op_str[op]; + oss << " dtok_id=" << dtokp->id() << " dtok_state=" << dtokp->wstate_str(); + throw jexception(jerrno::JERR_WMGR_BADDTOKSTATE, oss.str(), "wmgr", + "pre_write_check"); + } + } + break; + case WMGR_DEQUEUE: + if (!dtokp->is_dequeueable()) + { + std::ostringstream oss; + oss << "jrnl=" << _jc->id() << " op=" << _op_str[op]; + oss << " dtok_id=" << dtokp->id() << " dtok_state=" << dtokp->wstate_str(); + throw jexception(jerrno::JERR_WMGR_BADDTOKSTATE, oss.str(), "wmgr", + "pre_write_check"); + } + break; + case WMGR_ABORT: + break; + case WMGR_COMMIT: + break; + } + + return RHM_IORES_SUCCESS; +} + +void +wmgr::dequeue_check(const std::string& xid, + const uint64_t drid) +{ + // First check emap + bool found = false; + uint64_t fid; + short eres = _emap.get_pfid(drid, fid); + if (eres < enq_map::EMAP_OK) { // fail + if (eres == enq_map::EMAP_RID_NOT_FOUND) { + if (xid.size()) { + found = _tmap.data_exists(xid, drid); + } + } else if (eres == enq_map::EMAP_LOCKED) { + std::ostringstream oss; + oss << std::hex << "drid=0x" << drid; + throw jexception(jerrno::JERR_MAP_LOCKED, oss.str(), "wmgr", "dequeue_check"); + } + } else { + found = true; + } + if (!found) { + std::ostringstream oss; + oss << "jrnl=" << _jc->id() << " drid=0x" << std::hex << drid; + throw jexception(jerrno::JERR_WMGR_DEQRIDNOTENQ, oss.str(), "wmgr", "dequeue_check"); + } +} + +void +wmgr::dblk_roundup() +{ + const uint32_t xmagic = QLS_EMPTY_MAGIC; + uint32_t wdblks = jrec::size_blks(_cached_offset_dblks, QLS_SBLK_SIZE_DBLKS) * QLS_SBLK_SIZE_DBLKS; + while (_cached_offset_dblks < wdblks) + { +//std::cout << "^0x" << std::hex << _cached_offset_dblks << "<0x" << wdblks << std::dec << std::flush; + void* wptr = (void*)((char*)_page_ptr_arr[_pg_index] + _pg_offset_dblks * QLS_DBLK_SIZE_BYTES); + std::memcpy(wptr, (const void*)&xmagic, sizeof(xmagic)); +#ifdef QLS_CLEAN + std::memset((char*)wptr + sizeof(xmagic), QLS_CLEAN_CHAR, QLS_DBLK_SIZE_BYTES - sizeof(xmagic)); +#endif + _pg_offset_dblks++; + _cached_offset_dblks++; + } +} + +void +wmgr::rotate_page() +{ +//std::cout << "^^^^^ wmgr::rotate_page() " << status_str() << " pi=" << _pg_index; // DEBUG + if (_pg_offset_dblks >= _cache_pgsize_sblks * QLS_SBLK_SIZE_DBLKS) + { + _pg_offset_dblks = 0; + _pg_cntr++; + } + if (++_pg_index >= _cache_num_pages) + _pg_index = 0; +//std::cout << "->" << _pg_index << std::endl; // DEBUG +} + +void +wmgr::clean() { + // Clean up allocated memory here +} + +const std::string +wmgr::status_str() const +{ + std::ostringstream oss; + oss << "wmgr: pi=" << _pg_index << " pc=" << _pg_cntr; + oss << " po=" << _pg_offset_dblks << " aer=" << _aio_evt_rem; + oss << " edac=" << (_enq_busy?"T":"F") << (_deq_busy?"T":"F"); + oss << (_abort_busy?"T":"F") << (_commit_busy?"T":"F"); + oss << " ps=["; + for (int i=0; i<_cache_num_pages; i++) + { + switch (_page_cb_arr[i]._state) + { + case UNUSED: oss << "-"; break; + case IN_USE: oss << "U"; break; + case AIO_PENDING: oss << "A"; break; + default: oss << _page_cb_arr[i]._state; + } + } + oss << "] "; + return oss.str(); +} + +// static + +const char* wmgr::_op_str[] = {"enqueue", "dequeue", "abort", "commit"}; + +}}} diff --git a/qpid/cpp/src/qpid/linearstore/journal/wmgr.h b/qpid/cpp/src/qpid/linearstore/journal/wmgr.h new file mode 100644 index 0000000000..99da20bab9 --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/journal/wmgr.h @@ -0,0 +1,156 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#ifndef QPID_LINEARSTORE_JOURNAL_WMGR_H +#define QPID_LINEARSTORE_JOURNAL_WMGR_H + +#include <deque> +#include <map> +#include "qpid/linearstore/journal/enums.h" +#include "qpid/linearstore/journal/pmgr.h" +#include <vector> + +namespace qpid { +namespace linearstore { +namespace journal { + +class LinearFileController; + +/** +* \brief Class for managing a write page cache of arbitrary size and number of pages. +* +* The write page cache works on the principle of caching the write data within a page until +* that page is either full or flushed; this initiates a single AIO write operation to store +* the data on disk. +* +* The maximum disk throughput is achieved by keeping the write operations of uniform size. +* Waiting for a page cache to fill achieves this; and in high data volume/throughput situations +* achieves the optimal disk throughput. Calling flush() forces a write of the current page cache +* no matter how full it is, and disrupts the uniformity of the write operations. This should +* normally only be done if throughput drops and there is a danger of a page of unwritten data +* waiting around for excessive time. +* +* The usual tradeoff between data storage latency and throughput performance applies. +*/ +class wmgr : public pmgr +{ +private: + typedef std::vector<uint64_t> fidl_t; + typedef fidl_t::iterator fidl_itr_t; + typedef std::map<std::string, fidl_t> pending_txn_map_t; + typedef pending_txn_map_t::iterator pending_txn_map_itr_t; + + LinearFileController& _lfc; ///< Linear File Controller ref + uint32_t _max_dtokpp; ///< Max data writes per page + uint32_t _max_io_wait_us; ///< Max wait in microseconds till submit + uint32_t _cached_offset_dblks; ///< Amount of unwritten data in page (dblocks) + + // TODO: Convert _enq_busy etc into a proper threadsafe lock + // TODO: Convert to enum? Are these encodes mutually exclusive? + bool _enq_busy; ///< Flag true if enqueue is in progress + bool _deq_busy; ///< Flag true if dequeue is in progress + bool _abort_busy; ///< Flag true if abort is in progress + bool _commit_busy; ///< Flag true if commit is in progress + + enum _op_type { WMGR_ENQUEUE = 0, WMGR_DEQUEUE, WMGR_ABORT, WMGR_COMMIT }; + static const char* _op_str[]; + + enq_rec _enq_rec; ///< Enqueue record used for encoding/decoding + deq_rec _deq_rec; ///< Dequeue record used for encoding/decoding + txn_rec _txn_rec; ///< Transaction record used for encoding/decoding + pending_txn_map_t _txn_pending_map; ///< Set containing xids of pending commits/aborts + +public: + wmgr(jcntl* jc, + enq_map& emap, + txn_map& tmap, + LinearFileController& lfc); + wmgr(jcntl* jc, + enq_map& emap, + txn_map& tmap, + LinearFileController& lfc, + const uint32_t max_dtokpp, + const uint32_t max_iowait_us); + virtual ~wmgr(); + + void initialize(aio_callback* const cbp, + const uint32_t wcache_pgsize_sblks, + const uint16_t wcache_num_pages, + const uint32_t max_dtokpp, + const uint32_t max_iowait_us, + std::size_t end_offset); + iores enqueue(const void* const data_buff, + const std::size_t tot_data_len, + const std::size_t this_data_len, + data_tok* dtokp, + const void* const xid_ptr, + const std::size_t xid_len, + const bool tpc_flag, + const bool transient, + const bool external); + iores dequeue(data_tok* dtokp, + const void* const xid_ptr, + const std::size_t xid_len, + const bool tpc_flag, + const bool txn_coml_commit); + iores abort(data_tok* dtokp, + const void* const xid_ptr, + const std::size_t xid_len); + iores commit(data_tok* dtokp, + const void* const xid_ptr, + const std::size_t xid_len); + iores flush(); + int32_t get_events(timespec* const timeout, + bool flush); + bool is_txn_synced(const std::string& xid); + inline bool curr_pg_blocked() const { return _page_cb_arr[_pg_index]._state != UNUSED; } + inline uint32_t unflushed_dblks() { return _cached_offset_dblks; } + + // Debug aid + const std::string status_str() const; + +private: + void initialize(aio_callback* const cbp, + const uint32_t wcache_pgsize_sblks, + const uint16_t wcache_num_pages); + iores pre_write_check(const _op_type op, + const data_tok* const dtokp, + const std::size_t xidsize = 0, + const std::size_t dsize = 0, + const bool external = false) const; + void dequeue_check(const std::string& xid, + const uint64_t drid); + void file_header_check(const uint64_t rid, + const bool cont, + const uint32_t rec_dblks_rem); + void flush_check(iores& res, + bool& cont, + bool& done, const uint64_t rid); + iores write_flush(); + void get_next_file(); + void dblk_roundup(); + void rotate_page(); + void clean(); +}; + +}}} + +#endif // ifndef QPID_LINEARSTORE_JOURNAL_WMGR_H diff --git a/qpid/cpp/src/qpid/linearstore/management-schema.xml b/qpid/cpp/src/qpid/linearstore/management-schema.xml new file mode 100644 index 0000000000..ebd388593e --- /dev/null +++ b/qpid/cpp/src/qpid/linearstore/management-schema.xml @@ -0,0 +1,54 @@ +<schema package="org.apache.qpid.linearstore"> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + + <class name="Store"> + <property name="brokerRef" type="objId" access="RO" references="qpid.Broker" index="y" parentRef="y"/> + <property name="storeDir" type="sstr" access="RO" desc="Logical directory on disk"/> + <property name="tplIsInitialized" type="bool" access="RO" desc="Transaction prepared list has been initialized by a transactional prepare"/> + <property name="tplDirectory" type="sstr" access="RO" desc="Transaction prepared list directory"/> + <property name="tplWritePageSize" type="uint32" access="RO" unit="byte" desc="Page size in transaction prepared list write-page-cache"/> + <property name="tplWritePages" type="uint32" access="RO" unit="wpage" desc="Number of pages in transaction prepared list write-page-cache"/> + + <statistic name="tplTransactionDepth" type="hilo32" unit="txn" desc="Number of currently enqueued prepared transactions"/> + <statistic name="tplTxnPrepares" type="count64" unit="record" desc="Total transaction prepares on transaction prepared list"/> + <statistic name="tplTxnCommits" type="count64" unit="record" desc="Total transaction commits on transaction prepared list"/> + <statistic name="tplTxnAborts" type="count64" unit="record" desc="Total transaction aborts on transaction prepared list"/> + </class> + + <class name="Journal"> + <property name="queueRef" type="objId" access="RO" references="qpid.Queue" isGeneralReference="y"/> + <property name="queueName" type="sstr" access="RC" index="y"/> + <property name="directory" type="sstr" access="RO" desc="Directory containing journal files"/> + <property name="writePageSize" type="uint32" access="RO" unit="byte" desc="Deprecated"/> + <property name="writePages" type="uint32" access="RO" unit="wpage" desc="Deprecated"/> + + <statistic name="recordDepth" type="hilo32" unit="record" desc="Number of currently enqueued records (durable messages)"/> + <statistic name="enqueues" type="count64" unit="record" desc="Total enqueued records on journal"/> + <statistic name="dequeues" type="count64" unit="record" desc="Total dequeued records on journal"/> + <statistic name="txn" type="count32" unit="record" desc="Total open transactions (xids) on journal"/> + <statistic name="txnEnqueues" type="count64" unit="record" desc="Total transactional enqueued records on journal"/> + <statistic name="txnDequeues" type="count64" unit="record" desc="Total transactional dequeued records on journal"/> + <statistic name="txnCommits" type="count64" unit="record" desc="Total transactional commit records on journal"/> + <statistic name="txnAborts" type="count64" unit="record" desc="Total transactional abort records on journal"/> + <statistic name="outstandingAIOs" type="hilo32" unit="aio_op" desc="Number of currently outstanding AIO requests in Async IO system"/> + + </class> +</schema> |