summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKim van der Riet <kpvdr@apache.org>2014-03-06 19:07:24 +0000
committerKim van der Riet <kpvdr@apache.org>2014-03-06 19:07:24 +0000
commit1bf2530dd1dc7327274a1d46a0480c889002b39e (patch)
treef55f341abfd8433f29798d4b53b883126df75801
parentc6164f7c6a0c605535592f280dd8ecbb82c4c5ec (diff)
downloadqpid-python-1bf2530dd1dc7327274a1d46a0480c889002b39e.tar.gz
QPID-5607: [linearstore] Recovery of store results in jexception 0x010b LinearFileController::getCurrentSerial() threw JERR_NULL
git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@1575009 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--qpid/cpp/src/qpid/linearstore/ISSUES88
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp9
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp20
-rw-r--r--qpid/cpp/src/qpid/linearstore/journal/jerrno.h1
4 files changed, 83 insertions, 35 deletions
diff --git a/qpid/cpp/src/qpid/linearstore/ISSUES b/qpid/cpp/src/qpid/linearstore/ISSUES
index 7f4d7750d0..a9908e882e 100644
--- a/qpid/cpp/src/qpid/linearstore/ISSUES
+++ b/qpid/cpp/src/qpid/linearstore/ISSUES
@@ -27,6 +27,7 @@ Current/pending:
5360 - Linearstore: Evaluate and rework logging to produce a consistent log output
5361 - Linearstore: No tests for linearstore functionality currently exist
svn r.1564893 2014-02-05: Added tx-test-soak.sh
+ svn r.1564935 2014-02-05: Added license text to tx-test-soak.sh
* No existing tests for linearstore:
** Basic broker-level tests for txn and non-txn recovery
** Store-level tests which check write boundary conditions
@@ -34,7 +35,7 @@ Current/pending:
** Unit tests
** Basic performance tests
5362 - Linearstore: No store tools exist for examining the journals
- svn r.1558888 2014-01-09: WIP checkin for linearstore version of qpid_qls_analyze. Needs testing and tidy-up.
+ svn r.1556888 2014-01-09: WIP checkin for linearstore version of qpid_qls_analyze. Needs testing and tidy-up.
svn r.1560530 2014-01-22: Bugfixes for qpid_qls_analyze
svn r.1561848 2014-01-27: Bugfixes and enhancements for qpid_qls_analyze
svn r.1564808 2014-02-05: Bugfixes and enhancements for qpid_qls_analyze
@@ -50,56 +51,95 @@ Current/pending:
UNABLE TO REPRODUCE - but Frantizek has additional info
- 1039522 Qpid crashes while recovering from linear store around apid::linearstore::journal::JournalFile::getFqFileName() including enq_rec::decode() threw JERR_JREC_BAD_RECTAIL
* Possible dup of 1039525
- * May be fixed by QPID-5483 - waiting for needinfo, recommend rebuilding with QPID-5483 fix and re-testing
+ * May be fixed by QPID-5483 - waiting for needinfo, recommend rebuilding with QPID-5483 fix and re-testing. Marked POST.
- 1039525 Qpid crashes while recovering from linear store around apid::linearstore::journal::jexception::format including enq_rec::decode() threw JERR_JREC_BAD_REC_TAIL
* Possible dup of 1039522
- * May be fixed by QPID-5483 - waiting for needinfo, recommend rebuilding with QPID-5483 fix and re-testing
+ * May be fixed by QPID-5483 - waiting for needinfo, recommend rebuilding with QPID-5483 fix and re-testing. Marked POST.
# - 1049870 [LinearStore] auto-delete property does not survive restart
+# 5480 1053749 [linearstore] Recovery of store failure with "JERR_MAP_NOTFOUND: Key not found in map." error message
+ svn r.1564877 2014-02-05: Proposed fix
+ * Probability: 6 of 600 (1.0%) using tx-test-soak.sh
+ * If broker is started a second time after failure, it starts correctly and test completes ok.
+ * Problem: File is being recycled to EFP with still-locked enqueues in it (ie dequeued transactionally).
+ * Problem: Record alignment check writes filler records to wrong file when decoding bad record moves across a file boundary
+ * Test of fix failed on RHEL-7
+# - 1064181 [linearstore] Qpidd closes transactional client session&connection with async_dequeue() failed
+ * jexception 0x010b LinearFileController::getCurrentSerial() threw JERR_NULL
+# - 1064230 [linearstore] Qpidd linearstore recovery sometimes fail to recover messages with recoverMessages() failed
+ * jexception 0x0701 RecoveryManager::readNextRemainingRecord() threw JERR_JREC_BADRECTAIL
+ * possible dup of 1063700
Fixed/closed (in commit order):
===============================
Q-JIRA RHBZ Description / Comments
------ ------- ----------------------
5357 1052518 Linearstore: Empty file recycling not functional
- svn r.1545563 2013-11-26: Propsed fix
+ svn r.1545563 2013-11-26: Propsed fix. VERIFIED
5358 1052727 Linearstore: Checksums not implemented in record tail
- svn r.1547601 2013-12-03: Propsed fix
+ svn r.1547601 2013-12-03: Propsed fix. NEEDINFO on algorithm
5387 1036071 Linearstore: Segmentation fault when deleting queue
- svn r.1547641 2013-12-03: Propsed fix
+ svn r.1547641 2013-12-03: Propsed fix. VERIFIED
5388 1035802 Linearstore: Segmentation fault when recovering empty queue
- svn r.1547921 2013-12-04: Propsed fix
+ svn r.1547921 2013-12-04: Propsed fix. VERIFIED
NO-JIRA - Added missing Apache copyright/license text
svn r.1551304 2013-12-16: Propsed fix
5425 1052445 Linearstore: Transaction Prepared List (TPL) fails with jexception 0x0402 AtomicCounter::addLimit() threw JERR_JNLF_FILEOFFSOVFL
- svn r.1551361 2013-12-16: Proposed fix
+ svn r.1551361 2013-12-16: Proposed fix VERIFIED
5442 1039949 Linearstore: Dtx recover test fails
- svn r.1552772 2013-12-20: Proposed fix
+ svn r.1552772 2013-12-20: Proposed fix VERIFIED
5444 1052775 Linearstore: Recovering from qpid-txtest fails with "Inconsistent TPL 2PC count" error message
- svn r.1553148 2013-12-23: Proposed fix
+ svn r.1553148 2013-12-23: Proposed fix NEEDIFNO on reproduction and testing
- 1038599 [LinearStore] Abort when deleting used queue after restart
CLOSED-NOTABUG 2014-01-06
5460 1051097 [linearstore] Recovery of store which contains prepared but incomplete transactions results in message loss
- svn r.1556892 2014-01-09: Proposed fix
+ svn r.1556892 2014-01-09: Proposed fix VERIFIED
5473 1051924 [linearstore] Recovery of journal in which last logical file contains truncated record causes crash
- svn r.1557620 2014-01-12: Proposed fix
+ svn r.1557620 2014-01-12: Proposed fix MODIFIED
5483 - [linearstore] Recovery of journal with partly written record fails with "JERR_JREC_BADRECTAIL: Invalid data record tail" error message
svn r.1558589 2014-01-15: Proposed fix
- * May be linked to RHBZ 1039522 - waiting for needinfo
- * May be linked to RHBZ 1039525 - waiting for needinfo
+ * May be linked to RHBZ 1039522 - VERIFIED
+ * May be linked to RHBZ 1039525 - VERIFIED
5487 1054448 [linearstore] Replace use of /dev/urandom with c random generator calls
- svn r.1558913 2014-01-16: Proposed fix
- 5480 1053749 [linearstore] Recovery of store failure with "JERR_MAP_NOTFOUND: Key not found in map." error message
- svn r.1564877 2014-02-05: Proposed fix
- * Probability: 6 of 600 (1.0%) using tx-test-soak.sh
- * If broker is started a second time after failure, it starts correctly and test completes ok.
- * Problem: File is being recycled to EFP with still-locked enqueues in it (ie dequeued transactionally).
- * Problem: Record alignment check writes filler records to wrong file when decoding bad record moves across a file boundary
+ svn r.1558913 2014-01-16: Proposed fix VEFIFIED
5479 1053701 [linearstore] Using recovered store results in "JERR_JNLF_FILEOFFSOVFL: Attempted to increase submitted offset past file size. (JournalFile::submittedDblkCount)" error message
* Probability: 2 of 600 (0.3%) using tx-test-soak.sh
- * Fixed by checkin for QPID-5480, no longer able to reproduce. Marked POST.
+ * Fixed by checkin for QPID-5480, no longer able to reproduce. VERIFIED
+ 5603 1063700 [linearstore] broker restart fails under stress test
+ svn r.1574513 2014-03-05: Proposed fix. POST
+ * jexception 0x0701 RecoveryManager::readNextRemainingRecord() threw JERR_JREC_BADRECTAIL
+
+Ordered checkin list:
+=====================
+In order to port the linearstore changes from trunk to a branch, the following svn checkins need to be ported in order:
+
+no. svn r Q-JIRA RHBZ Date
+--- ------- ------- -------- ----------
+ 1. 1545563 5357 1052518 2013-11-26
+ 2. 1547601 5358 1052727 2013-12-03
+ 3. 1547641 5387 1036071 2013-12-03
+ 4. 1547921 5388 1035802 2013-12-04
+ 5. 1551304 NO-JIRA - 2013-12-16
+ 6. 1551361 5425 1052445 2013-12-16
+ 7. 1552772 5442 1039949 2013-12-20
+ 8. 1553148 5444 1052775 2013-12-23
+ 9. 1556888 5362 - 2014-01-09
+10. 1556892 5460 1051097 2014-01-09
+11. 1557620 5473 1051924 2014-01-12
+12. 1558589 5483 - 2014-01-15
+13. 1558592 5484 1035843 2014-01-15
+14. 1558913 5487 1054448 2014-01-16
+15. 1560530 5362 - 2014-01-22
+16. 1561848 5362 - 2014-01-27
+17. 1564808 5362 - 2014-02-05
+18. 1564877 5480 1053749 2014-02-05
+19. 1564893 5361 - 2014-02-05
+20. 1564935 5361 - 2014-02-05
+21. 1574513 5603 1063700 2014-03-05
+
+See above sections for details on these checkins.
-Future:
-=======
+Future work:
+============
* One journal file lost when queue deleted. All files except for one are recycled back to the EFP.
* Complete exceptions - several exceptions thrown using jexception have no exception numbers
* Investigate ability of store to detect missing journal files, especially from logical end of a journal
diff --git a/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp b/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp
index b8d7ae63bd..59b3d1ced5 100644
--- a/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp
+++ b/qpid/cpp/src/qpid/linearstore/journal/RecoveryManager.cpp
@@ -281,6 +281,9 @@ void RecoveryManager::setLinearFileControllerJournals(lfcAddJournalFileFn fnPtr,
lfcPtr->restoreEmptyFile(uninitFile);
}
} else {
+ if (initial_fid_ == 0) {
+ throw jexception(jerrno::JERR_RCVM_NULLFID, "RecoveryManager", "setLinearFileControllerJournals");
+ }
for (fileNumberMapConstItr_t i = fileNumberMap_.begin(); i != fileNumberMap_.end(); ++i) {
(lfcPtr->*fnPtr)(i->second->journalFilePtr_, i->second->completedDblkCount_, i->first == initial_fid_);
}
@@ -589,11 +592,12 @@ bool RecoveryManager::getNextRecordHeader()
rec_hdr_t h;
bool hdr_ok = false;
- uint64_t file_id = 0;
- std::streampos file_pos = 0;
+ uint64_t file_id = currentJournalFileItr_->second->journalFilePtr_->getFileSeqNum();
+ std::streampos file_pos = inFileStream_.tellg();
while (!hdr_ok) {
if (needNextFile()) {
if (!getNextFile(true)) {
+ lastRecord(file_id, file_pos);
return false;
}
}
@@ -610,6 +614,7 @@ bool RecoveryManager::getNextRecordHeader()
} else {
if (needNextFile()) {
if (!getNextFile(true)) {
+ lastRecord(file_id, file_pos);
return false;
}
}
diff --git a/qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp b/qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp
index 8765396b31..e176278d87 100644
--- a/qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp
+++ b/qpid/cpp/src/qpid/linearstore/journal/jerrno.cpp
@@ -87,12 +87,13 @@ const uint32_t jerrno::JERR_WMGR_DEQRIDNOTENQ = 0x0805;
const uint32_t jerrno::JERR_WMGR_BADFH = 0x0806;
// class RecoveryManager
-const uint32_t jerrno::JERR_RCVM_OPENRD = 0x0900; ///< Unable to open file for read
-const uint32_t jerrno::JERR_RCVM_STREAMBAD = 0x0901; ///< Read/write stream error
-const uint32_t jerrno::JERR_RCVM_READ = 0x0902; ///< Read error: no or insufficient data to read
-const uint32_t jerrno::JERR_RCVM_WRITE = 0x0903; ///< Write error
-const uint32_t jerrno::JERR_RCVM_NULLXID = 0x0904; ///< Null XID when XID length non-null in header
-const uint32_t jerrno::JERR_RCVM_NOTDBLKALIGNED = 0x0905; ///< Offset is not data block (dblk)-aligned
+const uint32_t jerrno::JERR_RCVM_OPENRD = 0x0900;
+const uint32_t jerrno::JERR_RCVM_STREAMBAD = 0x0901;
+const uint32_t jerrno::JERR_RCVM_READ = 0x0902;
+const uint32_t jerrno::JERR_RCVM_WRITE = 0x0903;
+const uint32_t jerrno::JERR_RCVM_NULLXID = 0x0904;
+const uint32_t jerrno::JERR_RCVM_NOTDBLKALIGNED = 0x0905;
+const uint32_t jerrno::JERR_RCVM_NULLFID = 0x0906;
// class data_tok
@@ -109,11 +110,11 @@ const uint32_t jerrno::JERR_EFP_BADPARTITIONNAME = 0x0d01;
const uint32_t jerrno::JERR_EFP_BADPARTITIONDIR = 0x0d02;
const uint32_t jerrno::JERR_EFP_BADEFPDIRNAME = 0x0d03;
const uint32_t jerrno::JERR_EFP_NOEFP = 0x0d04;
-const uint32_t jerrno::JERR_EFP_EMPTY = 0x0d05;
+const uint32_t jerrno::JERR_EFP_EMPTY = 0x0d05;
// Negative returns for some functions
-const int32_t jerrno::AIO_TIMEOUT = -1;
-const int32_t jerrno::LOCK_TAKEN = -2;
+const int32_t jerrno::AIO_TIMEOUT = -1;
+const int32_t jerrno::LOCK_TAKEN = -2;
// static initialization fn
@@ -185,6 +186,7 @@ jerrno::__init()
_err_map[JERR_RCVM_WRITE] = "JERR_RCVM_WRITE: Write error";
_err_map[JERR_RCVM_NULLXID] = "JERR_RCVM_NULLXID: Null XID when XID length non-null in header";
_err_map[JERR_RCVM_NOTDBLKALIGNED] = "JERR_RCVM_NOTDBLKALIGNED: Offset is not data block (dblk)-aligned";
+ _err_map[JERR_RCVM_NULLFID] = "JERR_RCVM_NULLFID: Null file id (FID)";
// class data_tok
_err_map[JERR_DTOK_ILLEGALSTATE] = "JERR_MTOK_ILLEGALSTATE: Attempted to change to illegal state.";
diff --git a/qpid/cpp/src/qpid/linearstore/journal/jerrno.h b/qpid/cpp/src/qpid/linearstore/journal/jerrno.h
index 62f18c1878..7c4602b6dd 100644
--- a/qpid/cpp/src/qpid/linearstore/journal/jerrno.h
+++ b/qpid/cpp/src/qpid/linearstore/journal/jerrno.h
@@ -111,6 +111,7 @@ namespace journal {
static const uint32_t JERR_RCVM_WRITE; ///< Write error
static const uint32_t JERR_RCVM_NULLXID; ///< Null XID when XID length non-null in header
static const uint32_t JERR_RCVM_NOTDBLKALIGNED; ///< Offset is not data block (dblk)-aligned
+ static const uint32_t JERR_RCVM_NULLFID; ///< Null file ID (FID)
// class data_tok
static const uint32_t JERR_DTOK_ILLEGALSTATE; ///< Attempted to change to illegal state