summaryrefslogtreecommitdiff
path: root/src/syncio.c
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2012-05-02 22:41:50 +0200
committerantirez <antirez@gmail.com>2012-05-02 22:41:50 +0200
commitaf3853c3bf6cbb4f4b75afbc56dc2ba0c17c4d5e (patch)
treebc05a18d0e43334c2fe2428e39a2b2452c8d0c12 /src/syncio.c
parent299290d3a42e92d73a22169c87aa21d2fe6e3b34 (diff)
downloadredis-af3853c3bf6cbb4f4b75afbc56dc2ba0c17c4d5e.tar.gz
syncio.c read / write functions reworked for correctness and performance.
The new implementation start reading / writing before blocking with aeWait(), likely the descriptor can accept writes or has buffered data inside and we can go faster, otherwise we get an error and wait. This change has effects on speed but also on correctness: on socket errors when we perform non blocking connect(2) write is performed ASAP and the error is returned ASAP before waiting. So the practical effect is that now a Redis slave is more available if it can not connect to the master, previously the slave continued to block on syncWrite() trying to send SYNC, and serving commands very slowly.
Diffstat (limited to 'src/syncio.c')
-rw-r--r--src/syncio.c42
1 files changed, 28 insertions, 14 deletions
diff --git a/src/syncio.c b/src/syncio.c
index b0c2969e2..0c202c9e2 100644
--- a/src/syncio.c
+++ b/src/syncio.c
@@ -42,10 +42,10 @@
#define REDIS_SYNCIO_RESOLUTION 10 /* Resolution in milliseconds */
-/* Write the specified payload to 'fd'. If writing the whole payload will be done
- * within 'timeout' milliseconds the operation succeeds and 'size' is returned.
- * Otherwise the operation fails, -1 is returned, and an unspecified partial write
- * could be performed against the file descriptor. */
+/* Write the specified payload to 'fd'. If writing the whole payload will be
+ * done within 'timeout' milliseconds the operation succeeds and 'size' is
+ * returned. Otherwise the operation fails, -1 is returned, and an unspecified
+ * partial write could be performed against the file descriptor. */
ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout) {
ssize_t nwritten, ret = size;
long long start = mstime();
@@ -56,13 +56,19 @@ ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout) {
remaining : REDIS_SYNCIO_RESOLUTION;
long long elapsed;
- if (aeWait(fd,AE_WRITABLE,wait) & AE_WRITABLE) {
- nwritten = write(fd,ptr,size);
- if (nwritten == -1) return -1;
+ /* Optimistically try to write before checking if the file descriptor
+ * is actually writable. At worst we get EAGAIN. */
+ nwritten = write(fd,ptr,size);
+ if (nwritten == -1) {
+ if (errno != EAGAIN) return -1;
+ } else {
ptr += nwritten;
size -= nwritten;
- if (size == 0) return ret;
}
+ if (size == 0) return ret;
+
+ /* Wait */
+ aeWait(fd,AE_WRITABLE,wait);
elapsed = mstime() - start;
if (elapsed >= timeout) {
errno = ETIMEDOUT;
@@ -72,8 +78,8 @@ ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout) {
}
}
-/* Read the specified amount of bytes from 'fd'. If all the bytes are read within
- * 'timeout' milliseconds the operation succeed and 'size' is returned.
+/* Read the specified amount of bytes from 'fd'. If all the bytes are read
+ * within 'timeout' milliseconds the operation succeed and 'size' is returned.
* Otherwise the operation fails, -1 is returned, and an unspecified amount of
* data could be read from the file descriptor. */
ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout) {
@@ -81,19 +87,27 @@ ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout) {
long long start = mstime();
long long remaining = timeout;
+ if (size == 0) return 0;
while(1) {
long long wait = (remaining > REDIS_SYNCIO_RESOLUTION) ?
remaining : REDIS_SYNCIO_RESOLUTION;
long long elapsed;
- if (aeWait(fd,AE_READABLE,wait) & AE_READABLE) {
- nread = read(fd,ptr,size);
- if (nread <= 0) return -1;
+ /* Optimistically try to read before checking if the file descriptor
+ * is actually readable. At worst we get EAGAIN. */
+ nread = read(fd,ptr,size);
+ if (nread == 0) return -1; /* short read. */
+ if (nread == -1) {
+ if (errno != EAGAIN) return -1;
+ } else {
ptr += nread;
size -= nread;
totread += nread;
- if (size == 0) return totread;
}
+ if (size == 0) return totread;
+
+ /* Wait */
+ aeWait(fd,AE_READABLE,wait);
elapsed = mstime() - start;
if (elapsed >= timeout) {
errno = ETIMEDOUT;