summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2011-09-16 12:35:12 +0200
committerantirez <antirez@gmail.com>2011-09-19 17:43:49 +0200
commit4c2d5f0980ba0666e075684473d6f50689c078de (patch)
tree867cd81a4b6d5cc4e50a11ec46420a5360b2bf3f
parent056c2acaea190c2cf55d78c9aac69ac166b23f45 (diff)
downloadredis-4c2d5f0980ba0666e075684473d6f50689c078de.tar.gz
postpone the AOF fsync if policy is everysec and there is a background fsync already going.
-rw-r--r--src/aof.c53
-rw-r--r--src/redis.c8
-rw-r--r--src/redis.h1
3 files changed, 54 insertions, 8 deletions
diff --git a/src/aof.c b/src/aof.c
index 85f97a84c..85330d18d 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -18,7 +18,7 @@ void aof_background_fsync(int fd) {
/* Called when the user switches from "appendonly yes" to "appendonly no"
* at runtime using the CONFIG command. */
void stopAppendOnly(void) {
- flushAppendOnlyFile();
+ flushAppendOnlyFile(1);
aof_fsync(server.appendfd);
close(server.appendfd);
@@ -63,12 +63,50 @@ int startAppendOnly(void) {
* and the only way the client socket can get a write is entering when the
* the event loop, we accumulate all the AOF writes in a memory
* buffer and write it on disk using this function just before entering
- * the event loop again. */
-void flushAppendOnlyFile(void) {
+ * the event loop again.
+ *
+ * About the 'force' argument:
+ *
+ * When the fsync policy is set to 'everysec' we may delay the flush if there
+ * is still an fsync() going on in the background thread, since for instance
+ * on Linux write(2) will be blocked by the background fsync anyway.
+ * When this happens we remember that there is some aof buffer to be
+ * flushed ASAP, and will try to do that in the serverCron() function.
+ *
+ * However if force is set to 1 we'll write regardless of the background
+ * fsync. */
+void flushAppendOnlyFile(int force) {
ssize_t nwritten;
+ int sync_in_progress = 0;
if (sdslen(server.aofbuf) == 0) return;
+ if (server.appendfsync == APPENDFSYNC_EVERYSEC)
+ sync_in_progress = bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC) != 0;
+
+ if (server.appendfsync == APPENDFSYNC_EVERYSEC && !force) {
+ /* With this append fsync policy we do background fsyncing.
+ * If the fsync is still in progress we can try to delay
+ * the write for a couple of seconds. */
+ if (sync_in_progress) {
+ if (server.aof_flush_postponed_start == 0) {
+ /* No previous write postponinig, remember that we are
+ * postponing the flush and return. */
+ server.aof_flush_postponed_start = server.unixtime;
+ return;
+ } else if (server.unixtime - server.aof_flush_postponed_start < 2) {
+ /* We were already writing for fsync to finish, but for less
+ * than two seconds this is still ok. Postpone again. */
+ return;
+ }
+ /* Otherwise fall trough, and go write since we can't wait
+ * over two seconds. */
+ }
+ }
+ /* If you are following this code path, then we are going to write so
+ * set reset the postponed flush sentinel to zero. */
+ server.aof_flush_postponed_start = 0;
+
/* We want to perform a single write. This should be guaranteed atomic
* at least if the filesystem we are writing is a real physical one.
* While this will save us against the server being killed I don't think
@@ -104,14 +142,15 @@ void flushAppendOnlyFile(void) {
return;
/* Perform the fsync if needed. */
- if (server.appendfsync == APPENDFSYNC_ALWAYS ||
- (server.appendfsync == APPENDFSYNC_EVERYSEC &&
- server.unixtime > server.lastfsync))
- {
+ if (server.appendfsync == APPENDFSYNC_ALWAYS) {
/* aof_fsync is defined as fdatasync() for Linux in order to avoid
* flushing metadata. */
aof_fsync(server.appendfd); /* Let's try to get this data on the disk */
server.lastfsync = server.unixtime;
+ } else if ((server.appendfsync == APPENDFSYNC_EVERYSEC &&
+ server.unixtime > server.lastfsync)) {
+ if (!sync_in_progress) aof_background_fsync(server.appendfd);
+ server.lastfsync = server.unixtime;
}
}
diff --git a/src/redis.c b/src/redis.c
index da894599f..7533529f6 100644
--- a/src/redis.c
+++ b/src/redis.c
@@ -647,6 +647,11 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
}
}
+
+ /* If we postponed an AOF buffer flush, let's try to do it every time the
+ * cron function is called. */
+ if (server.aof_flush_postponed_start) flushAppendOnlyFile(0);
+
/* Expire a few keys per cycle, only if this is a master.
* On slaves we wait for DEL operations synthesized by the master
* in order to guarantee a strict consistency. */
@@ -730,7 +735,7 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
}
/* Write the AOF buffer on disk */
- flushAppendOnlyFile();
+ flushAppendOnlyFile(0);
}
/* =========================== Server initialization ======================== */
@@ -815,6 +820,7 @@ void initServerConfig() {
server.lastfsync = time(NULL);
server.appendfd = -1;
server.appendseldb = -1; /* Make sure the first time will not match */
+ server.aof_flush_postponed_start = 0;
server.pidfile = zstrdup("/var/run/redis.pid");
server.dbfilename = zstrdup("dump.rdb");
server.appendfilename = zstrdup("appendonly.aof");
diff --git a/src/redis.h b/src/redis.h
index 14cdf897c..7c0da22e0 100644
--- a/src/redis.h
+++ b/src/redis.h
@@ -428,6 +428,7 @@ struct redisServer {
time_t lastfsync;
int appendfd;
int appendseldb;
+ time_t aof_flush_postponed_start;
char *pidfile;
pid_t bgsavechildpid;
pid_t bgrewritechildpid;