summaryrefslogtreecommitdiff
path: root/src/aof.c
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2014-02-12 12:47:10 +0100
committerantirez <antirez@gmail.com>2014-02-12 16:11:36 +0100
commitfe8352540fa5d6157648427b0651de9d5574e48d (patch)
treea773d9c35e2d38be9e95f17c815535f3d4b2cf6a /src/aof.c
parentdb6d628c3ee9f39a196026b5bb5ac47cb8551aef (diff)
downloadredis-fe8352540fa5d6157648427b0651de9d5574e48d.tar.gz
AOF: don't abort on write errors unless fsync is 'always'.
A system similar to the RDB write error handling is used, in which when we can't write to the AOF file, writes are no longer accepted until we are able to write again. For fsync == always we still abort on errors since there is currently no easy way to avoid replying with success to the user otherwise, and this would violate the contract with the user of only acknowledging data already secured on disk.
Diffstat (limited to 'src/aof.c')
-rw-r--r--src/aof.c80
1 files changed, 65 insertions, 15 deletions
diff --git a/src/aof.c b/src/aof.c
index d59e4061f..dbe7bfa6d 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -226,6 +226,7 @@ int startAppendOnly(void) {
*
* However if force is set to 1 we'll write regardless of the background
* fsync. */
+#define AOF_WRITE_LOG_ERROR_RATE 30 /* Seconds between errors logging. */
void flushAppendOnlyFile(int force) {
ssize_t nwritten;
int sync_in_progress = 0;
@@ -267,27 +268,76 @@ void flushAppendOnlyFile(int force) {
* or alike */
nwritten = write(server.aof_fd,server.aof_buf,sdslen(server.aof_buf));
if (nwritten != (signed)sdslen(server.aof_buf)) {
- /* Ooops, we are in troubles. The best thing to do for now is
- * aborting instead of giving the illusion that everything is
- * working as expected. */
+ static time_t last_write_error_log = 0;
+ int can_log = 0;
+
+ /* Limit logging rate to 1 line per AOF_WRITE_LOG_ERROR_RATE seconds. */
+ if ((server.unixtime - last_write_error_log) > AOF_WRITE_LOG_ERROR_RATE) {
+ can_log = 1;
+ last_write_error_log = server.unixtime;
+ }
+
+ /* Lof the AOF write error and record the error code. */
if (nwritten == -1) {
- redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno));
+ if (can_log) {
+ redisLog(REDIS_WARNING,"Error writing to the AOF file: %s",
+ strerror(errno));
+ server.aof_last_write_errno = errno;
+ }
} else {
- redisLog(REDIS_WARNING,"Exiting on short write while writing to "
- "the append-only file: %s (nwritten=%ld, "
- "expected=%ld)",
- strerror(errno),
- (long)nwritten,
- (long)sdslen(server.aof_buf));
+ if (can_log) {
+ redisLog(REDIS_WARNING,"Short write while writing to "
+ "the AOF file: (nwritten=%lld, "
+ "expected=%lld)",
+ (long long)nwritten,
+ (long long)sdslen(server.aof_buf));
+ }
if (ftruncate(server.aof_fd, server.aof_current_size) == -1) {
- redisLog(REDIS_WARNING, "Could not remove short write "
- "from the append-only file. Redis may refuse "
- "to load the AOF the next time it starts. "
- "ftruncate: %s", strerror(errno));
+ if (can_log) {
+ redisLog(REDIS_WARNING, "Could not remove short write "
+ "from the append-only file. Redis may refuse "
+ "to load the AOF the next time it starts. "
+ "ftruncate: %s", strerror(errno));
+ }
+ } else {
+ /* If the ftrunacate() succeeded we can set nwritten to
+ * -1 since there is no longer partial data into the AOF. */
+ nwritten = -1;
}
+ server.aof_last_write_errno = ENOSPC;
+ }
+
+ /* Handle the AOF write error. */
+ if (server.aof_fsync == AOF_FSYNC_ALWAYS) {
+ /* We can't recover when the fsync policy is ALWAYS since the
+ * reply for the client is already in the output buffers, and we
+ * have the contract with the user that on acknowledged write data
+ * is synched on disk. */
+ redisLog(REDIS_WARNING,"Can't recover from AOF write error when the AOF fsync policy is 'always'. Exiting...");
+ exit(1);
+ } else {
+ /* Recover from failed write leaving data into the buffer. However
+ * set an error to stop accepting writes as long as the error
+ * condition is not cleared. */
+ server.aof_last_write_status = REDIS_ERR;
+
+ /* Trim the sds buffer if there was a partial write, and there
+ * was no way to undo it with ftruncate(2). */
+ if (nwritten > 0) {
+ server.aof_current_size += nwritten;
+ sdsrange(server.aof_buf,nwritten,-1);
+ }
+ return; /* We'll try again on the next call... */
+ }
+ } else {
+ /* Successful write(2). If AOF was in error state, restore the
+ * OK state and log the event. */
+ if (server.aof_last_write_status == REDIS_ERR) {
+ redisLog(REDIS_WARNING,
+ "AOF write error looks solved, Redis can write again.");
+ server.aof_last_write_status = REDIS_OK;
}
- exit(1);
}
server.aof_current_size += nwritten;