summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
Diffstat (limited to 'storage')
-rw-r--r--storage/ndb/include/kernel/signaldata/BackupContinueB.hpp3
-rw-r--r--storage/ndb/include/mgmapi/mgmapi_config_parameters.h4
-rw-r--r--storage/ndb/src/kernel/blocks/backup/Backup.cpp175
-rw-r--r--storage/ndb/src/kernel/blocks/backup/Backup.hpp19
-rw-r--r--storage/ndb/src/kernel/blocks/backup/BackupInit.cpp22
-rw-r--r--storage/ndb/src/mgmsrv/ConfigInfo.cpp54
-rw-r--r--storage/ndb/src/mgmsrv/InitConfigFileParser.cpp12
7 files changed, 242 insertions, 47 deletions
diff --git a/storage/ndb/include/kernel/signaldata/BackupContinueB.hpp b/storage/ndb/include/kernel/signaldata/BackupContinueB.hpp
index fe3f48444ec..9035c6f8140 100644
--- a/storage/ndb/include/kernel/signaldata/BackupContinueB.hpp
+++ b/storage/ndb/include/kernel/signaldata/BackupContinueB.hpp
@@ -32,7 +32,8 @@ private:
BUFFER_FULL_SCAN = 2,
BUFFER_FULL_FRAG_COMPLETE = 3,
BUFFER_FULL_META = 4,
- BACKUP_FRAGMENT_INFO = 5
+ BACKUP_FRAGMENT_INFO = 5,
+ RESET_DISK_SPEED_COUNTER = 6
};
};
diff --git a/storage/ndb/include/mgmapi/mgmapi_config_parameters.h b/storage/ndb/include/mgmapi/mgmapi_config_parameters.h
index 78d34b31bbb..d1feaa1a7d3 100644
--- a/storage/ndb/include/mgmapi/mgmapi_config_parameters.h
+++ b/storage/ndb/include/mgmapi/mgmapi_config_parameters.h
@@ -92,6 +92,10 @@
#define CFG_DB_DISK_PAGE_BUFFER_MEMORY 160
#define CFG_DB_STRING_MEMORY 161
+#define CFG_DB_DISK_SYNCH_SIZE 163
+#define CFG_DB_CHECKPOINT_SPEED 164
+#define CFG_DB_CHECKPOINT_SPEED_SR 165
+
#define CFG_DB_SGA 198 /* super pool mem */
#define CFG_DB_DATA_MEM_2 199 /* used in special build in 5.1 */
diff --git a/storage/ndb/src/kernel/blocks/backup/Backup.cpp b/storage/ndb/src/kernel/blocks/backup/Backup.cpp
index 2e8d8b548ce..d170b3f5a6a 100644
--- a/storage/ndb/src/kernel/blocks/backup/Backup.cpp
+++ b/storage/ndb/src/kernel/blocks/backup/Backup.cpp
@@ -84,6 +84,16 @@ Backup::execSTTOR(Signal* signal)
const Uint32 startphase = signal->theData[1];
const Uint32 typeOfStart = signal->theData[7];
+ if (startphase == 1)
+ {
+ m_curr_disk_write_speed = c_defaults.m_disk_write_speed_sr;
+ m_overflow_disk_write = 0;
+ m_reset_disk_speed_time = NdbTick_CurrentMillisecond();
+ m_reset_delay_used = Backup::DISK_SPEED_CHECK_DELAY;
+ signal->theData[0] = BackupContinueB::RESET_DISK_SPEED_COUNTER;
+ sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal,
+ Backup::DISK_SPEED_CHECK_DELAY, 1);
+ }
if (startphase == 3) {
jam();
g_TypeOfStart = typeOfStart;
@@ -92,6 +102,11 @@ Backup::execSTTOR(Signal* signal)
return;
}//if
+ if (startphase == 7)
+ {
+ m_curr_disk_write_speed = c_defaults.m_disk_write_speed;
+ }
+
if(startphase == 7 && g_TypeOfStart == NodeState::ST_INITIAL_START &&
c_masterNodeId == getOwnNodeId()){
jam();
@@ -170,6 +185,42 @@ Backup::execCONTINUEB(Signal* signal)
const Uint32 Tdata2 = signal->theData[2];
switch(Tdata0) {
+ case BackupContinueB::RESET_DISK_SPEED_COUNTER:
+ {
+ /*
+ Adjust for upto 10 millisecond delay of this signal. Longer
+ delays will not be handled, in this case the system is most
+ likely under too high load and it won't matter very much that
+ we decrease the speed of checkpoints.
+
+ We use a technique where we allow an overflow write in one
+ period. This overflow will be removed from the next period
+ such that the load will at average be as specified.
+ */
+ int delay_time = m_reset_delay_used;
+ NDB_TICKS curr_time = NdbTick_CurrentMillisecond();
+ int sig_delay = curr_time - m_reset_disk_speed_time;
+
+ m_words_written_this_period = m_overflow_disk_write;
+ m_overflow_disk_write = 0;
+ m_reset_disk_speed_time = curr_time;
+
+ if (sig_delay > delay_time + 10)
+ delay_time = Backup::DISK_SPEED_CHECK_DELAY - 10;
+ else if (sig_delay < delay_time - 10)
+ delay_time = Backup::DISK_SPEED_CHECK_DELAY + 10;
+ else
+ delay_time = Backup::DISK_SPEED_CHECK_DELAY - (sig_delay - delay_time);
+ m_reset_delay_used= delay_time;
+ signal->theData[0] = BackupContinueB::RESET_DISK_SPEED_COUNTER;
+ sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, delay_time, 1);
+#if 0
+ ndbout << "Signal delay was = " << sig_delay;
+ ndbout << " Current time = " << curr_time << endl;
+ ndbout << " Delay time will be = " << delay_time << endl << endl;
+#endif
+ break;
+ }
case BackupContinueB::BACKUP_FRAGMENT_INFO:
{
const Uint32 ptr_I = Tdata1;
@@ -202,8 +253,8 @@ Backup::execCONTINUEB(Signal* signal)
fragInfo->FragmentNo = htonl(fragPtr_I);
fragInfo->NoOfRecordsLow = htonl(fragPtr.p->noOfRecords & 0xFFFFFFFF);
fragInfo->NoOfRecordsHigh = htonl(fragPtr.p->noOfRecords >> 32);
- fragInfo->FilePosLow = htonl(0 & 0xFFFFFFFF);
- fragInfo->FilePosHigh = htonl(0 >> 32);
+ fragInfo->FilePosLow = htonl(0);
+ fragInfo->FilePosHigh = htonl(0);
filePtr.p->operation.dataBuffer.updateWritePtr(sz);
@@ -938,7 +989,7 @@ Backup::execBACKUP_REQ(Signal* signal)
return;
}//if
- if (m_diskless)
+ if (c_defaults.m_diskless)
{
sendBackupRef(senderRef, flags, signal, senderData,
BackupRef::CannotBackupDiskless);
@@ -2610,9 +2661,10 @@ Backup::openFiles(Signal* signal, BackupRecordPtr ptr)
FsOpenReq::OM_WRITEONLY |
FsOpenReq::OM_TRUNCATE |
FsOpenReq::OM_CREATE |
- FsOpenReq::OM_APPEND;
+ FsOpenReq::OM_APPEND |
+ FsOpenReq::OM_AUTOSYNC;
FsOpenReq::v2_setCount(req->fileNumber, 0xFFFFFFFF);
-
+ req->auto_sync_size = c_defaults.m_disk_synch_size;
/**
* Ctl file
*/
@@ -3881,6 +3933,69 @@ Backup::execFSAPPENDCONF(Signal* signal)
checkFile(signal, filePtr);
}
+/*
+ This routine handles two problems with writing to disk during local
+ checkpoints and backups. The first problem is that we need to limit
+ the writing to ensure that we don't use too much CPU and disk resources
+ for backups and checkpoints. The perfect solution to this is to use
+ a dynamic algorithm that adapts to the environment. Until we have
+ implemented this we can satisfy ourselves with an algorithm that
+ uses a configurable limit.
+
+ The second problem is that in Linux we can get severe problems if we
+ write very much to the disk without synching. In the worst case we
+ can have Gigabytes of data in the Linux page cache before we reach
+ the limit of how much we can write. If this happens the performance
+ will drop significantly when we reach this limit since the Linux flush
+ daemon will spend a few minutes on writing out the page cache to disk.
+ To avoid this we ensure that a file never have more than a certain
+ amount of data outstanding before synch. This variable is also
+ configurable.
+*/
+bool
+Backup::ready_to_write(bool ready, Uint32 sz, bool eof, BackupFile *fileP)
+{
+#if 0
+ ndbout << "ready_to_write: ready = " << ready << " eof = " << eof;
+ ndbout << " sz = " << sz << endl;
+ ndbout << "words this period = " << m_words_written_this_period;
+ ndbout << endl << "overflow disk write = " << m_overflow_disk_write;
+ ndbout << endl << "Current Millisecond is = ";
+ ndbout << NdbTick_CurrentMillisecond() << endl;
+#endif
+ if ((ready || eof) &&
+ m_words_written_this_period <= m_curr_disk_write_speed)
+ {
+ /*
+ We have a buffer ready to write or we have reached end of
+ file and thus we must write the last before closing the
+ file.
+ We have already check that we are allowed to write at this
+ moment. We only worry about history of last 100 milliseconds.
+ What happened before that is of no interest since a disk
+ write that was issued more than 100 milliseconds should be
+ completed by now.
+ */
+ int overflow;
+ m_words_written_this_period += sz;
+ overflow = m_words_written_this_period - m_curr_disk_write_speed;
+ if (overflow > 0)
+ m_overflow_disk_write = overflow;
+#if 0
+ ndbout << "Will write with " << endl;
+ ndbout << endl;
+#endif
+ return true;
+ }
+ else
+ {
+#if 0
+ ndbout << "Will not write now" << endl << endl;
+#endif
+ return false;
+ }
+}
+
void
Backup::checkFile(Signal* signal, BackupFilePtr filePtr)
{
@@ -3890,35 +4005,23 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr)
#endif
OperationRecord & op = filePtr.p->operation;
-
- Uint32 * tmp, sz; bool eof;
- if(op.dataBuffer.getReadPtr(&tmp, &sz, &eof))
+ Uint32 *tmp = NULL;
+ Uint32 sz = 0;
+ bool eof = FALSE;
+ bool ready = op.dataBuffer.getReadPtr(&tmp, &sz, &eof);
+#if 0
+ ndbout << "Ptr to data = " << hex << tmp << endl;
+#endif
+ if (!ready_to_write(ready, sz, eof, filePtr.p))
{
jam();
-
- jam();
- FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend();
- req->filePointer = filePtr.p->filePointer;
- req->userPointer = filePtr.i;
- req->userReference = reference();
- req->varIndex = 0;
- req->offset = tmp - c_startOfPages;
- req->size = sz;
-
- sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal,
- FsAppendReq::SignalLength, JBA);
- return;
- }
-
- if(!eof) {
- jam();
signal->theData[0] = BackupContinueB::BUFFER_UNDERFLOW;
signal->theData[1] = filePtr.i;
- sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 50, 2);
+ sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 20, 2);
return;
- }//if
-
- if(sz > 0) {
+ }
+ else if (sz > 0)
+ {
jam();
FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend();
req->filePointer = filePtr.p->filePointer;
@@ -3926,13 +4029,14 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr)
req->userReference = reference();
req->varIndex = 0;
req->offset = tmp - c_startOfPages;
- req->size = sz; // Round up
+ req->size = sz;
+ req->synch_flag = 0;
sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal,
FsAppendReq::SignalLength, JBA);
return;
- }//if
-
+ }
+
#ifdef DEBUG_ABORT
Uint32 running= filePtr.p->fileRunning;
Uint32 closing= filePtr.p->fileClosing;
@@ -4214,16 +4318,15 @@ Backup::closeFiles(Signal* sig, BackupRecordPtr ptr)
continue;
}//if
+ filePtr.p->operation.dataBuffer.eof();
if(filePtr.p->fileRunning == 1){
jam();
#ifdef DEBUG_ABORT
ndbout_c("Close files fileRunning == 1, filePtr.i=%u", filePtr.i);
#endif
- filePtr.p->operation.dataBuffer.eof();
} else {
jam();
filePtr.p->fileClosing = 1;
- filePtr.p->operation.dataBuffer.eof();
checkFile(sig, filePtr); // make sure we write everything before closing
FsCloseReq * req = (FsCloseReq *)sig->getDataPtrSend();
@@ -4712,8 +4815,10 @@ Backup::lcp_open_file(Signal* signal, BackupRecordPtr ptr)
FsOpenReq::OM_WRITEONLY |
FsOpenReq::OM_TRUNCATE |
FsOpenReq::OM_CREATE |
- FsOpenReq::OM_APPEND;
+ FsOpenReq::OM_APPEND |
+ FsOpenReq::OM_AUTOSYNC;
FsOpenReq::v2_setCount(req->fileNumber, 0xFFFFFFFF);
+ req->auto_sync_size = c_defaults.m_disk_synch_size;
TablePtr tabPtr;
FragmentPtr fragPtr;
diff --git a/storage/ndb/src/kernel/blocks/backup/Backup.hpp b/storage/ndb/src/kernel/blocks/backup/Backup.hpp
index afacf01ab2f..4f54918ed9d 100644
--- a/storage/ndb/src/kernel/blocks/backup/Backup.hpp
+++ b/storage/ndb/src/kernel/blocks/backup/Backup.hpp
@@ -33,6 +33,7 @@
#include <blocks/mutexes.hpp>
#include <NdbTCP.h>
+#include <NdbTick.h>
#include <Array.hpp>
/**
@@ -522,6 +523,11 @@ public:
Uint32 m_minWriteSize;
Uint32 m_maxWriteSize;
Uint32 m_lcp_buffer_size;
+
+ Uint32 m_disk_write_speed_sr;
+ Uint32 m_disk_write_speed;
+ Uint32 m_disk_synch_size;
+ Uint32 m_diskless;
};
/**
@@ -533,8 +539,17 @@ public:
NdbNodeBitmask c_aliveNodes;
DLList<BackupRecord> c_backups;
Config c_defaults;
- Uint32 m_diskless;
+ /*
+ Variables that control checkpoint to disk speed
+ */
+ Uint32 m_curr_disk_write_speed;
+ Uint32 m_words_written_this_period;
+ Uint32 m_overflow_disk_write;
+ Uint32 m_reset_delay_used;
+ NDB_TICKS m_reset_disk_speed_time;
+ static const int DISK_SPEED_CHECK_DELAY = 100;
+
STATIC_CONST(NO_OF_PAGES_META_FILE = MAX_WORDS_META_FILE/BACKUP_WORDS_PER_PAGE);
/**
@@ -631,6 +646,8 @@ public:
void lcp_open_file_done(Signal*, BackupRecordPtr);
void lcp_close_file_conf(Signal* signal, BackupRecordPtr);
void lcp_send_end_lcp_conf(Signal* signal, BackupRecordPtr);
+
+ bool ready_to_write(bool ready, Uint32 sz, bool eof, BackupFile *fileP);
};
inline
diff --git a/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp b/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp
index 38a60ac04d6..4cbe0c32a29 100644
--- a/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp
+++ b/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp
@@ -146,8 +146,28 @@ Backup::execREAD_CONFIG_REQ(Signal* signal)
m_ctx.m_config.getOwnConfigIterator();
ndbrequire(p != 0);
+ c_defaults.m_disk_write_speed = 10 * (1024 * 1024);
+ c_defaults.m_disk_write_speed_sr = 100 * (1024 * 1024);
+ c_defaults.m_disk_synch_size = 4 * (1024 * 1024);
+
Uint32 noBackups = 0, noTables = 0, noAttribs = 0, noFrags = 0;
- ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS, &m_diskless));
+ ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS,
+ &c_defaults.m_diskless));
+ ndb_mgm_get_int_parameter(p, CFG_DB_CHECKPOINT_SPEED_SR,
+ &c_defaults.m_disk_write_speed_sr);
+ ndb_mgm_get_int_parameter(p, CFG_DB_CHECKPOINT_SPEED,
+ &c_defaults.m_disk_write_speed);
+ ndb_mgm_get_int_parameter(p, CFG_DB_DISK_SYNCH_SIZE,
+ &c_defaults.m_disk_synch_size);
+
+ /*
+ We adjust the disk speed parameters from bytes per second to rather be
+ words per 100 milliseconds. We convert disk synch size from bytes per
+ second to words per second.
+ */
+ c_defaults.m_disk_write_speed /= (4 * 10);
+ c_defaults.m_disk_write_speed_sr /= (4 * 10);
+
ndb_mgm_get_int_parameter(p, CFG_DB_PARALLEL_BACKUPS, &noBackups);
// ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_NO_TABLES, &noTables));
ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DICT_TABLE, &noTables));
diff --git a/storage/ndb/src/mgmsrv/ConfigInfo.cpp b/storage/ndb/src/mgmsrv/ConfigInfo.cpp
index 6c172a29819..fb15e35ecc7 100644
--- a/storage/ndb/src/mgmsrv/ConfigInfo.cpp
+++ b/storage/ndb/src/mgmsrv/ConfigInfo.cpp
@@ -877,7 +877,7 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = {
ConfigInfo::CI_USED,
false,
ConfigInfo::CI_INT,
- "8",
+ "16",
"3",
STR_VALUE(MAX_INT_RNIL) },
@@ -952,8 +952,8 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = {
CFG_DB_LCP_DISC_PAGES_TUP_SR,
"NoOfDiskPagesToDiskDuringRestartTUP",
DB_TOKEN,
- "?",
- ConfigInfo::CI_USED,
+ "DiskCheckpointSpeedSr",
+ ConfigInfo::CI_DEPRICATED,
true,
ConfigInfo::CI_INT,
"40",
@@ -964,8 +964,8 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = {
CFG_DB_LCP_DISC_PAGES_TUP,
"NoOfDiskPagesToDiskAfterRestartTUP",
DB_TOKEN,
- "?",
- ConfigInfo::CI_USED,
+ "DiskCheckpointSpeed",
+ ConfigInfo::CI_DEPRICATED,
true,
ConfigInfo::CI_INT,
"40",
@@ -976,8 +976,8 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = {
CFG_DB_LCP_DISC_PAGES_ACC_SR,
"NoOfDiskPagesToDiskDuringRestartACC",
DB_TOKEN,
- "?",
- ConfigInfo::CI_USED,
+ "DiskCheckpointSpeedSr",
+ ConfigInfo::CI_DEPRICATED,
true,
ConfigInfo::CI_INT,
"20",
@@ -988,8 +988,8 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = {
CFG_DB_LCP_DISC_PAGES_ACC,
"NoOfDiskPagesToDiskAfterRestartACC",
DB_TOKEN,
- "?",
- ConfigInfo::CI_USED,
+ "DiskCheckpointSpeed",
+ ConfigInfo::CI_DEPRICATED,
true,
ConfigInfo::CI_INT,
"20",
@@ -1192,6 +1192,42 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = {
0, 0 },
{
+ CFG_DB_DISK_SYNCH_SIZE,
+ "DiskSyncSize",
+ DB_TOKEN,
+ "Data written to a file before a synch is forced",
+ ConfigInfo::CI_USED,
+ false,
+ ConfigInfo::CI_INT,
+ "4M",
+ "32k",
+ STR_VALUE(MAX_INT_RNIL) },
+
+ {
+ CFG_DB_CHECKPOINT_SPEED,
+ "DiskCheckpointSpeed",
+ DB_TOKEN,
+ "Bytes per second allowed to be written by checkpoint",
+ ConfigInfo::CI_USED,
+ false,
+ ConfigInfo::CI_INT,
+ "10M",
+ "1M",
+ STR_VALUE(MAX_INT_RNIL) },
+
+ {
+ CFG_DB_CHECKPOINT_SPEED_SR,
+ "DiskCheckpointSpeedInRestart",
+ DB_TOKEN,
+ "Bytes per second allowed to be written by checkpoint during restart",
+ ConfigInfo::CI_USED,
+ false,
+ ConfigInfo::CI_INT,
+ "100M",
+ "1M",
+ STR_VALUE(MAX_INT_RNIL) },
+
+ {
CFG_DB_BACKUP_MEM,
"BackupMemory",
DB_TOKEN,
diff --git a/storage/ndb/src/mgmsrv/InitConfigFileParser.cpp b/storage/ndb/src/mgmsrv/InitConfigFileParser.cpp
index 68a5f02f4c5..bf5cb9d726e 100644
--- a/storage/ndb/src/mgmsrv/InitConfigFileParser.cpp
+++ b/storage/ndb/src/mgmsrv/InitConfigFileParser.cpp
@@ -655,6 +655,18 @@ InitConfigFileParser::store_in_properties(Vector<struct my_option>& options,
m_info->getMax(ctx.m_currentInfo, fname));
return false;
}
+
+ ConfigInfo::Status status = m_info->getStatus(ctx.m_currentInfo, fname);
+ if (status == ConfigInfo::CI_DEPRICATED) {
+ const char * desc = m_info->getDescription(ctx.m_currentInfo, fname);
+ if(desc && desc[0]){
+ ctx.reportWarning("[%s] %s is depricated, use %s instead",
+ ctx.fname, fname, desc);
+ } else if (desc == 0){
+ ctx.reportWarning("[%s] %s is depricated", ctx.fname, fname);
+ }
+ }
+
if (options[i].var_type == GET_INT)
ctx.m_currentSection->put(options[i].name, (Uint32)value_int);
else