diff options
author | Kim van der Riet <kpvdr@apache.org> | 2014-03-06 19:07:24 +0000 |
---|---|---|
committer | Kim van der Riet <kpvdr@apache.org> | 2014-03-06 19:07:24 +0000 |
commit | 1bf2530dd1dc7327274a1d46a0480c889002b39e (patch) | |
tree | f55f341abfd8433f29798d4b53b883126df75801 | |
parent | c6164f7c6a0c605535592f280dd8ecbb82c4c5ec (diff) | |
download | qpid-python-1bf2530dd1dc7327274a1d46a0480c889002b39e.tar.gz |
QPID-5607: [linearstore] Recovery of store results in jexception 0x010b LinearFileController::getCurrentSerial() threw JERR_NULL
git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@1575009 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r-- | qpid/cpp/src/qpid/linearstore/ISSUES | 88 | ||||
-rw-r--r-- | qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp | 9 | ||||
-rw-r--r-- | qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp | 20 | ||||
-rw-r--r-- | qpid/cpp/src/qpid/linearstore/journal/jerrno.h | 1 |
4 files changed, 83 insertions, 35 deletions
diff --git a/qpid/cpp/src/qpid/linearstore/ISSUES b/qpid/cpp/src/qpid/linearstore/ISSUES index 7f4d7750d0..a9908e882e 100644 --- a/qpid/cpp/src/qpid/linearstore/ISSUES +++ b/qpid/cpp/src/qpid/linearstore/ISSUES @@ -27,6 +27,7 @@ Current/pending: 5360 - Linearstore: Evaluate and rework logging to produce a consistent log output 5361 - Linearstore: No tests for linearstore functionality currently exist svn r.1564893 2014-02-05: Added tx-test-soak.sh + svn r.1564935 2014-02-05: Added license text to tx-test-soak.sh * No existing tests for linearstore: ** Basic broker-level tests for txn and non-txn recovery ** Store-level tests which check write boundary conditions @@ -34,7 +35,7 @@ Current/pending: ** Unit tests ** Basic performance tests 5362 - Linearstore: No store tools exist for examining the journals - svn r.1558888 2014-01-09: WIP checkin for linearstore version of qpid_qls_analyze. Needs testing and tidy-up. + svn r.1556888 2014-01-09: WIP checkin for linearstore version of qpid_qls_analyze. Needs testing and tidy-up. svn r.1560530 2014-01-22: Bugfixes for qpid_qls_analyze svn r.1561848 2014-01-27: Bugfixes and enhancements for qpid_qls_analyze svn r.1564808 2014-02-05: Bugfixes and enhancements for qpid_qls_analyze @@ -50,56 +51,95 @@ Current/pending: UNABLE TO REPRODUCE - but Frantizek has additional info - 1039522 Qpid crashes while recovering from linear store around apid::linearstore::journal::JournalFile::getFqFileName() including enq_rec::decode() threw JERR_JREC_BAD_RECTAIL * Possible dup of 1039525 - * May be fixed by QPID-5483 - waiting for needinfo, recommend rebuilding with QPID-5483 fix and re-testing + * May be fixed by QPID-5483 - waiting for needinfo, recommend rebuilding with QPID-5483 fix and re-testing. Marked POST. - 1039525 Qpid crashes while recovering from linear store around apid::linearstore::journal::jexception::format including enq_rec::decode() threw JERR_JREC_BAD_REC_TAIL * Possible dup of 1039522 - * May be fixed by QPID-5483 - waiting for needinfo, recommend rebuilding with QPID-5483 fix and re-testing + * May be fixed by QPID-5483 - waiting for needinfo, recommend rebuilding with QPID-5483 fix and re-testing. Marked POST. # - 1049870 [LinearStore] auto-delete property does not survive restart +# 5480 1053749 [linearstore] Recovery of store failure with "JERR_MAP_NOTFOUND: Key not found in map." error message + svn r.1564877 2014-02-05: Proposed fix + * Probability: 6 of 600 (1.0%) using tx-test-soak.sh + * If broker is started a second time after failure, it starts correctly and test completes ok. + * Problem: File is being recycled to EFP with still-locked enqueues in it (ie dequeued transactionally). + * Problem: Record alignment check writes filler records to wrong file when decoding bad record moves across a file boundary + * Test of fix failed on RHEL-7 +# - 1064181 [linearstore] Qpidd closes transactional client session&connection with async_dequeue() failed + * jexception 0x010b LinearFileController::getCurrentSerial() threw JERR_NULL +# - 1064230 [linearstore] Qpidd linearstore recovery sometimes fail to recover messages with recoverMessages() failed + * jexception 0x0701 RecoveryManager::readNextRemainingRecord() threw JERR_JREC_BADRECTAIL + * possible dup of 1063700 Fixed/closed (in commit order): =============================== Q-JIRA RHBZ Description / Comments ------ ------- ---------------------- 5357 1052518 Linearstore: Empty file recycling not functional - svn r.1545563 2013-11-26: Propsed fix + svn r.1545563 2013-11-26: Propsed fix. VERIFIED 5358 1052727 Linearstore: Checksums not implemented in record tail - svn r.1547601 2013-12-03: Propsed fix + svn r.1547601 2013-12-03: Propsed fix. NEEDINFO on algorithm 5387 1036071 Linearstore: Segmentation fault when deleting queue - svn r.1547641 2013-12-03: Propsed fix + svn r.1547641 2013-12-03: Propsed fix. VERIFIED 5388 1035802 Linearstore: Segmentation fault when recovering empty queue - svn r.1547921 2013-12-04: Propsed fix + svn r.1547921 2013-12-04: Propsed fix. VERIFIED NO-JIRA - Added missing Apache copyright/license text svn r.1551304 2013-12-16: Propsed fix 5425 1052445 Linearstore: Transaction Prepared List (TPL) fails with jexception 0x0402 AtomicCounter::addLimit() threw JERR_JNLF_FILEOFFSOVFL - svn r.1551361 2013-12-16: Proposed fix + svn r.1551361 2013-12-16: Proposed fix VERIFIED 5442 1039949 Linearstore: Dtx recover test fails - svn r.1552772 2013-12-20: Proposed fix + svn r.1552772 2013-12-20: Proposed fix VERIFIED 5444 1052775 Linearstore: Recovering from qpid-txtest fails with "Inconsistent TPL 2PC count" error message - svn r.1553148 2013-12-23: Proposed fix + svn r.1553148 2013-12-23: Proposed fix NEEDIFNO on reproduction and testing - 1038599 [LinearStore] Abort when deleting used queue after restart CLOSED-NOTABUG 2014-01-06 5460 1051097 [linearstore] Recovery of store which contains prepared but incomplete transactions results in message loss - svn r.1556892 2014-01-09: Proposed fix + svn r.1556892 2014-01-09: Proposed fix VERIFIED 5473 1051924 [linearstore] Recovery of journal in which last logical file contains truncated record causes crash - svn r.1557620 2014-01-12: Proposed fix + svn r.1557620 2014-01-12: Proposed fix MODIFIED 5483 - [linearstore] Recovery of journal with partly written record fails with "JERR_JREC_BADRECTAIL: Invalid data record tail" error message svn r.1558589 2014-01-15: Proposed fix - * May be linked to RHBZ 1039522 - waiting for needinfo - * May be linked to RHBZ 1039525 - waiting for needinfo + * May be linked to RHBZ 1039522 - VERIFIED + * May be linked to RHBZ 1039525 - VERIFIED 5487 1054448 [linearstore] Replace use of /dev/urandom with c random generator calls - svn r.1558913 2014-01-16: Proposed fix - 5480 1053749 [linearstore] Recovery of store failure with "JERR_MAP_NOTFOUND: Key not found in map." error message - svn r.1564877 2014-02-05: Proposed fix - * Probability: 6 of 600 (1.0%) using tx-test-soak.sh - * If broker is started a second time after failure, it starts correctly and test completes ok. - * Problem: File is being recycled to EFP with still-locked enqueues in it (ie dequeued transactionally). - * Problem: Record alignment check writes filler records to wrong file when decoding bad record moves across a file boundary + svn r.1558913 2014-01-16: Proposed fix VEFIFIED 5479 1053701 [linearstore] Using recovered store results in "JERR_JNLF_FILEOFFSOVFL: Attempted to increase submitted offset past file size. (JournalFile::submittedDblkCount)" error message * Probability: 2 of 600 (0.3%) using tx-test-soak.sh - * Fixed by checkin for QPID-5480, no longer able to reproduce. Marked POST. + * Fixed by checkin for QPID-5480, no longer able to reproduce. VERIFIED + 5603 1063700 [linearstore] broker restart fails under stress test + svn r.1574513 2014-03-05: Proposed fix. POST + * jexception 0x0701 RecoveryManager::readNextRemainingRecord() threw JERR_JREC_BADRECTAIL + +Ordered checkin list: +===================== +In order to port the linearstore changes from trunk to a branch, the following svn checkins need to be ported in order: + +no. svn r Q-JIRA RHBZ Date +--- ------- ------- -------- ---------- + 1. 1545563 5357 1052518 2013-11-26 + 2. 1547601 5358 1052727 2013-12-03 + 3. 1547641 5387 1036071 2013-12-03 + 4. 1547921 5388 1035802 2013-12-04 + 5. 1551304 NO-JIRA - 2013-12-16 + 6. 1551361 5425 1052445 2013-12-16 + 7. 1552772 5442 1039949 2013-12-20 + 8. 1553148 5444 1052775 2013-12-23 + 9. 1556888 5362 - 2014-01-09 +10. 1556892 5460 1051097 2014-01-09 +11. 1557620 5473 1051924 2014-01-12 +12. 1558589 5483 - 2014-01-15 +13. 1558592 5484 1035843 2014-01-15 +14. 1558913 5487 1054448 2014-01-16 +15. 1560530 5362 - 2014-01-22 +16. 1561848 5362 - 2014-01-27 +17. 1564808 5362 - 2014-02-05 +18. 1564877 5480 1053749 2014-02-05 +19. 1564893 5361 - 2014-02-05 +20. 1564935 5361 - 2014-02-05 +21. 1574513 5603 1063700 2014-03-05 + +See above sections for details on these checkins. -Future: -======= +Future work: +============ * One journal file lost when queue deleted. All files except for one are recycled back to the EFP. * Complete exceptions - several exceptions thrown using jexception have no exception numbers * Investigate ability of store to detect missing journal files, especially from logical end of a journal diff --git a/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp b/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp index b8d7ae63bd..59b3d1ced5 100644 --- a/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp +++ b/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp @@ -281,6 +281,9 @@ void RecoveryManager::setLinearFileControllerJournals(lfcAddJournalFileFn fnPtr, lfcPtr->restoreEmptyFile(uninitFile); } } else { + if (initial_fid_ == 0) { + throw jexception(jerrno::JERR_RCVM_NULLFID, "RecoveryManager", "setLinearFileControllerJournals"); + } for (fileNumberMapConstItr_t i = fileNumberMap_.begin(); i != fileNumberMap_.end(); ++i) { (lfcPtr->*fnPtr)(i->second->journalFilePtr_, i->second->completedDblkCount_, i->first == initial_fid_); } @@ -589,11 +592,12 @@ bool RecoveryManager::getNextRecordHeader() rec_hdr_t h; bool hdr_ok = false; - uint64_t file_id = 0; - std::streampos file_pos = 0; + uint64_t file_id = currentJournalFileItr_->second->journalFilePtr_->getFileSeqNum(); + std::streampos file_pos = inFileStream_.tellg(); while (!hdr_ok) { if (needNextFile()) { if (!getNextFile(true)) { + lastRecord(file_id, file_pos); return false; } } @@ -610,6 +614,7 @@ bool RecoveryManager::getNextRecordHeader() } else { if (needNextFile()) { if (!getNextFile(true)) { + lastRecord(file_id, file_pos); return false; } } diff --git a/qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp b/qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp index 8765396b31..e176278d87 100644 --- a/qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp +++ b/qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp @@ -87,12 +87,13 @@ const uint32_t jerrno::JERR_WMGR_DEQRIDNOTENQ = 0x0805; const uint32_t jerrno::JERR_WMGR_BADFH = 0x0806; // class RecoveryManager -const uint32_t jerrno::JERR_RCVM_OPENRD = 0x0900; ///< Unable to open file for read -const uint32_t jerrno::JERR_RCVM_STREAMBAD = 0x0901; ///< Read/write stream error -const uint32_t jerrno::JERR_RCVM_READ = 0x0902; ///< Read error: no or insufficient data to read -const uint32_t jerrno::JERR_RCVM_WRITE = 0x0903; ///< Write error -const uint32_t jerrno::JERR_RCVM_NULLXID = 0x0904; ///< Null XID when XID length non-null in header -const uint32_t jerrno::JERR_RCVM_NOTDBLKALIGNED = 0x0905; ///< Offset is not data block (dblk)-aligned +const uint32_t jerrno::JERR_RCVM_OPENRD = 0x0900; +const uint32_t jerrno::JERR_RCVM_STREAMBAD = 0x0901; +const uint32_t jerrno::JERR_RCVM_READ = 0x0902; +const uint32_t jerrno::JERR_RCVM_WRITE = 0x0903; +const uint32_t jerrno::JERR_RCVM_NULLXID = 0x0904; +const uint32_t jerrno::JERR_RCVM_NOTDBLKALIGNED = 0x0905; +const uint32_t jerrno::JERR_RCVM_NULLFID = 0x0906; // class data_tok @@ -109,11 +110,11 @@ const uint32_t jerrno::JERR_EFP_BADPARTITIONNAME = 0x0d01; const uint32_t jerrno::JERR_EFP_BADPARTITIONDIR = 0x0d02; const uint32_t jerrno::JERR_EFP_BADEFPDIRNAME = 0x0d03; const uint32_t jerrno::JERR_EFP_NOEFP = 0x0d04; -const uint32_t jerrno::JERR_EFP_EMPTY = 0x0d05; +const uint32_t jerrno::JERR_EFP_EMPTY = 0x0d05; // Negative returns for some functions -const int32_t jerrno::AIO_TIMEOUT = -1; -const int32_t jerrno::LOCK_TAKEN = -2; +const int32_t jerrno::AIO_TIMEOUT = -1; +const int32_t jerrno::LOCK_TAKEN = -2; // static initialization fn @@ -185,6 +186,7 @@ jerrno::__init() _err_map[JERR_RCVM_WRITE] = "JERR_RCVM_WRITE: Write error"; _err_map[JERR_RCVM_NULLXID] = "JERR_RCVM_NULLXID: Null XID when XID length non-null in header"; _err_map[JERR_RCVM_NOTDBLKALIGNED] = "JERR_RCVM_NOTDBLKALIGNED: Offset is not data block (dblk)-aligned"; + _err_map[JERR_RCVM_NULLFID] = "JERR_RCVM_NULLFID: Null file id (FID)"; // class data_tok _err_map[JERR_DTOK_ILLEGALSTATE] = "JERR_MTOK_ILLEGALSTATE: Attempted to change to illegal state."; diff --git a/qpid/cpp/src/qpid/linearstore/journal/jerrno.h b/qpid/cpp/src/qpid/linearstore/journal/jerrno.h index 62f18c1878..7c4602b6dd 100644 --- a/qpid/cpp/src/qpid/linearstore/journal/jerrno.h +++ b/qpid/cpp/src/qpid/linearstore/journal/jerrno.h @@ -111,6 +111,7 @@ namespace journal { static const uint32_t JERR_RCVM_WRITE; ///< Write error static const uint32_t JERR_RCVM_NULLXID; ///< Null XID when XID length non-null in header static const uint32_t JERR_RCVM_NOTDBLKALIGNED; ///< Offset is not data block (dblk)-aligned + static const uint32_t JERR_RCVM_NULLFID; ///< Null file ID (FID) // class data_tok static const uint32_t JERR_DTOK_ILLEGALSTATE; ///< Attempted to change to illegal state |