diff options
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | doc/config/lighttpd.conf | 4 | ||||
-rw-r--r-- | src/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/Makefile.am | 1 | ||||
-rw-r--r-- | src/SConscript | 1 | ||||
-rw-r--r-- | src/chunk.c | 41 | ||||
-rw-r--r-- | src/chunk.h | 16 | ||||
-rw-r--r-- | src/connections.c | 39 | ||||
-rw-r--r-- | src/network.c | 37 | ||||
-rw-r--r-- | src/network_backends.h | 98 | ||||
-rw-r--r-- | src/network_freebsd_sendfile.c | 231 | ||||
-rw-r--r-- | src/network_linux_sendfile.c | 258 | ||||
-rw-r--r-- | src/network_openssl.c | 350 | ||||
-rw-r--r-- | src/network_solaris_sendfilev.c | 234 | ||||
-rw-r--r-- | src/network_write.c | 267 | ||||
-rw-r--r-- | src/network_write_mmap.c | 157 | ||||
-rw-r--r-- | src/network_write_no_mmap.c | 135 | ||||
-rw-r--r-- | src/network_writev.c | 366 | ||||
-rw-r--r-- | src/sys-mmap.h | 30 |
19 files changed, 869 insertions, 1398 deletions
@@ -9,6 +9,7 @@ NEWS * fix out-of-filedescriptors when uploading "large" files (fixes #2660, thx rmilecki) * increase upload temporary chunk file size from 1MB to 16MB * fix undefined integer shift + * rewrite network sendfile/mmap/writev/write backends - 1.4.36 - 2015-07-26 * use keep-alive timeout while waiting for HTTP headers; use always the read timeout while waiting for the HTTP body diff --git a/doc/config/lighttpd.conf b/doc/config/lighttpd.conf index 60b0ae1e..68cfea94 100644 --- a/doc/config/lighttpd.conf +++ b/doc/config/lighttpd.conf @@ -185,10 +185,10 @@ server.event-handler = "linux-sysepoll" ## and write(). Every modern OS provides its own syscall to help network ## servers transfer files as fast as possible ## -## linux-sendfile - is recommended for small files. +## sendfile - is recommended for small files. ## writev - is recommended for sending many large files ## -server.network-backend = "linux-sendfile" +server.network-backend = "sendfile" ## ## As lighttpd is a single-threaded server, its main resource limit is diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0d85b741..d45d14b6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -436,6 +436,7 @@ SET(COMMON_SRC configfile-glue.c http-header-glue.c splaytree.c network_writev.c + network_write_mmap.c network_write_no_mmap.c network_write.c network_linux_sendfile.c network_freebsd_sendfile.c network_solaris_sendfilev.c network_openssl.c diff --git a/src/Makefile.am b/src/Makefile.am index 85367805..d33b7081 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -72,6 +72,7 @@ common_src=buffer.c log.c \ configfile-glue.c \ http-header-glue.c \ network_write.c network_linux_sendfile.c \ + network_write_mmap.c network_write_no_mmap.c \ network_freebsd_sendfile.c network_writev.c \ network_solaris_sendfilev.c network_openssl.c \ splaytree.c status_counter.c diff --git a/src/SConscript b/src/SConscript index bb507a5c..eb7c78aa 100644 --- a/src/SConscript +++ b/src/SConscript @@ -20,6 +20,7 @@ common_src = Split("buffer.c log.c \ configfile-glue.c \ http-header-glue.c \ splaytree.c network_writev.c \ + network_write_mmap.c network_write_no_mmap.c \ network_write.c network_linux_sendfile.c \ network_freebsd_sendfile.c \ network_solaris_sendfilev.c network_openssl.c \ diff --git a/src/chunk.c b/src/chunk.c index 9d797125..fef61259 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -10,7 +10,7 @@ #include <sys/types.h> #include <sys/stat.h> -#include <sys/mman.h> +#include "sys-mmap.h" #include <stdlib.h> #include <fcntl.h> @@ -579,6 +579,45 @@ int chunkqueue_is_empty(chunkqueue *cq) { return NULL == cq->first; } +void chunkqueue_mark_written(chunkqueue *cq, off_t len) { + off_t written = len; + chunk *c; + + for (c = cq->first; NULL != c; c = cq->first) { + off_t c_len = 0; + + switch (c->type) { + case MEM_CHUNK: + c_len = buffer_string_length(c->mem); + break; + case FILE_CHUNK: + c_len = c->file.length; + break; + } + force_assert(c_len >= c->offset); + c_len -= c->offset; + + if (0 == written && 0 != c_len) break; /* no more finished chunks */ + + if (written >= c_len) { /* chunk got finished */ + c->offset += c_len; + written -= c_len; + + cq->first = c->next; + if (c == cq->last) cq->last = NULL; + + chunkqueue_push_unused_chunk(cq, c); + } else { /* partial chunk */ + c->offset += written; + written = 0; + break; /* chunk not finished */ + } + } + + force_assert(0 == written); + cq->bytes_out += len; +} + void chunkqueue_remove_finished_chunks(chunkqueue *cq) { chunk *c; diff --git a/src/chunk.h b/src/chunk.h index 33b7e27b..b263a3e1 100644 --- a/src/chunk.h +++ b/src/chunk.h @@ -3,7 +3,6 @@ #include "buffer.h" #include "array.h" -#include "sys-mmap.h" typedef struct chunk { enum { MEM_CHUNK, FILE_CHUNK } type; @@ -26,11 +25,11 @@ typedef struct chunk { int is_temp; /* file is temporary and will be deleted if on cleanup */ } file; - off_t offset; /* octets sent from this chunk - the size of the chunk is either - - mem-chunk: mem->used - 1 - - file-chunk: file.length - */ + /* the size of the chunk is either: + * - mem-chunk: buffer_string_length(chunk::mem) + * - file-chunk: chunk::file.length + */ + off_t offset; /* octets sent from this chunk */ struct chunk *next; } chunk; @@ -69,6 +68,11 @@ void chunkqueue_get_memory(chunkqueue *cq, char **mem, size_t *len, size_t min_s */ void chunkqueue_use_memory(chunkqueue *cq, size_t len); +/* mark first "len" bytes as written (incrementing chunk offsets) + * and remove finished chunks + */ +void chunkqueue_mark_written(chunkqueue *cq, off_t len); + void chunkqueue_remove_finished_chunks(chunkqueue *cq); void chunkqueue_steal(chunkqueue *dest, chunkqueue *src, off_t len); diff --git a/src/connections.c b/src/connections.c index 8f26a30a..e01700a1 100644 --- a/src/connections.c +++ b/src/connections.c @@ -334,7 +334,7 @@ static int connection_handle_read(server *srv, connection *con) { chunkqueue_get_memory(con->read_queue, &mem, &mem_len, 0, 4096); len = recv(con->fd, mem, mem_len, 0); -#else +#else /* __WIN32 */ if (ioctl(con->fd, FIONREAD, &toread) || toread == 0 || toread <= 4*1024) { if (toread > MAX_READ_LIMIT) toread = MAX_READ_LIMIT; } else { @@ -343,24 +343,47 @@ static int connection_handle_read(server *srv, connection *con) { chunkqueue_get_memory(con->read_queue, &mem, &mem_len, 0, toread); len = read(con->fd, mem, mem_len); -#endif +#endif /* __WIN32 */ chunkqueue_use_memory(con->read_queue, len > 0 ? len : 0); if (len < 0) { con->is_readable = 0; - if (errno == EAGAIN) return 0; - if (errno == EINTR) { +#if defined(__WIN32) + { + int lastError = WSAGetLastError(); + switch (lastError) { + case EAGAIN: + return 0; + case EINTR: + /* we have been interrupted before we could read */ + con->is_readable = 1; + return 0; + case ECONNRESET: + /* suppress logging for this error, expected for keep-alive */ + break; + default: + log_error_write(srv, __FILE__, __LINE__, "sd", "connection closed - recv failed: ", lastError); + break; + } + } +#else /* __WIN32 */ + switch (errno) { + case EAGAIN: + return 0; + case EINTR: /* we have been interrupted before we could read */ con->is_readable = 1; return 0; - } - - if (errno != ECONNRESET) { - /* expected for keep-alive */ + case ECONNRESET: + /* suppress logging for this error, expected for keep-alive */ + break; + default: log_error_write(srv, __FILE__, __LINE__, "ssd", "connection closed - read failed: ", strerror(errno), errno); + break; } +#endif /* __WIN32 */ connection_set_state(srv, con, CON_STATE_ERROR); diff --git a/src/network.c b/src/network.c index f1c94898..24a435c2 100644 --- a/src/network.c +++ b/src/network.c @@ -518,9 +518,7 @@ typedef enum { NETWORK_BACKEND_UNSET, NETWORK_BACKEND_WRITE, NETWORK_BACKEND_WRITEV, - NETWORK_BACKEND_LINUX_SENDFILE, - NETWORK_BACKEND_FREEBSD_SENDFILE, - NETWORK_BACKEND_SOLARIS_SENDFILEV + NETWORK_BACKEND_SENDFILE, } network_backend_t; #ifdef USE_OPENSSL @@ -675,20 +673,23 @@ int network_init(server *srv) { const char *name; } network_backends[] = { /* lowest id wins */ +#if defined USE_SENDFILE + { NETWORK_BACKEND_SENDFILE, "sendfile" }, +#endif #if defined USE_LINUX_SENDFILE - { NETWORK_BACKEND_LINUX_SENDFILE, "linux-sendfile" }, + { NETWORK_BACKEND_SENDFILE, "linux-sendfile" }, #endif #if defined USE_FREEBSD_SENDFILE - { NETWORK_BACKEND_FREEBSD_SENDFILE, "freebsd-sendfile" }, + { NETWORK_BACKEND_SENDFILE, "freebsd-sendfile" }, #endif #if defined USE_SOLARIS_SENDFILEV - { NETWORK_BACKEND_SOLARIS_SENDFILEV, "solaris-sendfilev" }, + { NETWORK_BACKEND_SENDFILE, "solaris-sendfilev" }, #endif #if defined USE_WRITEV - { NETWORK_BACKEND_WRITEV, "writev" }, + { NETWORK_BACKEND_WRITEV, "writev" }, #endif - { NETWORK_BACKEND_WRITE, "write" }, - { NETWORK_BACKEND_UNSET, NULL } + { NETWORK_BACKEND_WRITE, "write" }, + { NETWORK_BACKEND_UNSET, NULL } }; #ifdef USE_OPENSSL @@ -967,24 +968,14 @@ int network_init(server *srv) { case NETWORK_BACKEND_WRITE: srv->network_backend_write = network_write_chunkqueue_write; break; -#ifdef USE_WRITEV +#if defined(USE_WRITEV) case NETWORK_BACKEND_WRITEV: srv->network_backend_write = network_write_chunkqueue_writev; break; #endif -#ifdef USE_LINUX_SENDFILE - case NETWORK_BACKEND_LINUX_SENDFILE: - srv->network_backend_write = network_write_chunkqueue_linuxsendfile; - break; -#endif -#ifdef USE_FREEBSD_SENDFILE - case NETWORK_BACKEND_FREEBSD_SENDFILE: - srv->network_backend_write = network_write_chunkqueue_freebsdsendfile; - break; -#endif -#ifdef USE_SOLARIS_SENDFILEV - case NETWORK_BACKEND_SOLARIS_SENDFILEV: - srv->network_backend_write = network_write_chunkqueue_solarissendfilev; +#if defined(USE_SENDFILE) + case NETWORK_BACKEND_SENDFILE: + srv->network_backend_write = network_write_chunkqueue_sendfile; break; #endif default: diff --git a/src/network_backends.h b/src/network_backends.h index 5813253a..2245c749 100644 --- a/src/network_backends.h +++ b/src/network_backends.h @@ -9,39 +9,46 @@ #include <sys/types.h> /* on linux 2.4.x you get either sendfile or LFS */ -#if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILE && (!defined _LARGEFILE_SOURCE || defined HAVE_SENDFILE64) && defined HAVE_WRITEV && defined(__linux__) && !defined HAVE_SENDFILE_BROKEN +#if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILE && (!defined _LARGEFILE_SOURCE || defined HAVE_SENDFILE64) && defined(__linux__) && !defined HAVE_SENDFILE_BROKEN +# ifdef USE_SENDFILE +# error "can't have more than one sendfile implementation" +# endif +# define USE_SENDFILE "linux-sendfile" # define USE_LINUX_SENDFILE -# include <sys/sendfile.h> -# include <sys/uio.h> #endif -#if defined HAVE_SYS_UIO_H && defined HAVE_SENDFILE && defined HAVE_WRITEV && (defined(__FreeBSD__) || defined(__DragonFly__)) +#if defined HAVE_SENDFILE && (defined(__FreeBSD__) || defined(__DragonFly__)) +# ifdef USE_SENDFILE +# error "can't have more than one sendfile implementation" +# endif +# define USE_SENDFILE "freebsd-sendfile" # define USE_FREEBSD_SENDFILE -# include <sys/uio.h> #endif -#if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILEV && defined HAVE_WRITEV && defined(__sun) +#if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILEV && defined(__sun) +# ifdef USE_SENDFILE +# error "can't have more than one sendfile implementation" +# endif +# define USE_SENDFILE "solaris-sendfilev" # define USE_SOLARIS_SENDFILEV -# include <sys/sendfile.h> -# include <sys/uio.h> #endif +/* not supported so far +#if defined HAVE_SEND_FILE && defined(__aix) +# ifdef USE_SENDFILE +# error "can't have more than one sendfile implementation" +# endif +# define USE_SENDFILE "aix-sendfile" +# define USE_AIX_SENDFILE +#endif +*/ + #if defined HAVE_SYS_UIO_H && defined HAVE_WRITEV # define USE_WRITEV -# include <sys/uio.h> #endif #if defined HAVE_SYS_MMAN_H && defined HAVE_MMAP && defined ENABLE_MMAP # define USE_MMAP -# include <sys/mman.h> -/* NetBSD 1.3.x needs it */ -# ifndef MAP_FAILED -# define MAP_FAILED -1 -# endif -#endif - -#if defined HAVE_SYS_UIO_H && defined HAVE_WRITEV && defined HAVE_SEND_FILE && defined(__aix) -# define USE_AIX_SENDFILE #endif #include "base.h" @@ -53,12 +60,57 @@ */ int network_write_chunkqueue_write(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes); -int network_write_chunkqueue_writev(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes); -int network_write_chunkqueue_linuxsendfile(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes); -int network_write_chunkqueue_freebsdsendfile(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes); -int network_write_chunkqueue_solarissendfilev(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes); -#ifdef USE_OPENSSL + +#if defined(USE_WRITEV) +int network_write_chunkqueue_writev(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes); /* fallback to write */ +#endif + +#if defined(USE_SENDFILE) +int network_write_chunkqueue_sendfile(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes); /* fallback to write */ +#endif + +#if defined(USE_OPENSSL) int network_write_chunkqueue_openssl(server *srv, connection *con, SSL *ssl, chunkqueue *cq, off_t max_bytes); #endif +/* write next chunk(s); finished chunks are removed afterwards after successful writes. + * return values: similar as backends (0 succes, -1 error, -2 remote close, -3 try again later (EINTR/EAGAIN)) */ +/* next chunk must be MEM_CHUNK. use write()/send() */ +int network_write_mem_chunk(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes); + +#if defined(USE_WRITEV) +/* next chunk must be MEM_CHUNK. send multiple mem chunks using writev() */ +int network_writev_mem_chunks(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes); +#else +/* fallback to write()/send() */ +static inline int network_writev_mem_chunks(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes) { + return network_write_mem_chunk(srv, con, fd, cq, p_max_bytes); +} +#endif + +/* next chunk must be FILE_CHUNK. use temporary buffer (srv->tmp_buf) to read into, then write()/send() it */ +int network_write_file_chunk_no_mmap(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes); + +#if defined(USE_MMAP) +/* next chunk must be FILE_CHUNK. send mmap()ed file with write() */ +int network_write_file_chunk_mmap(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes); +#else +/* fallback to no_mmap */ +static inline int network_write_file_chunk_mmap(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes) { + return network_write_file_chunk_no_mmap(srv, con, fd, cq, p_max_bytes); +} +#endif + +#if defined(USE_SENDFILE) +int network_write_file_chunk_sendfile(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes); +#else +/* fallback to mmap */ +static inline int network_write_file_chunk_sendfile(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes) { + return network_write_file_chunk_mmap(srv, con, fd, cq, p_max_bytes); +} +#endif + +/* next chunk must be FILE_CHUNK. return values: 0 success (=> -1 != cq->first->file.fd), -1 error */ +int network_open_file_chunk(server *srv, connection *con, chunkqueue *cq); + #endif diff --git a/src/network_freebsd_sendfile.c b/src/network_freebsd_sendfile.c index 62a024d1..c9711b69 100644 --- a/src/network_freebsd_sendfile.c +++ b/src/network_freebsd_sendfile.c @@ -1,219 +1,60 @@ #include "network_backends.h" -#ifdef USE_FREEBSD_SENDFILE +#if defined(USE_FREEBSD_SENDFILE) #include "network.h" -#include "fdevent.h" #include "log.h" -#include "stat_cache.h" #include <sys/types.h> #include <sys/socket.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <sys/resource.h> - -#include <netinet/in.h> -#include <netinet/tcp.h> +#include <sys/uio.h> #include <errno.h> -#include <fcntl.h> -#include <unistd.h> -#include <netdb.h> #include <string.h> -#include <stdlib.h> - - -#ifndef UIO_MAXIOV -# if defined(__FreeBSD__) || defined(__DragonFly__) -/* FreeBSD 4.7, 4.9 defined it in sys/uio.h only if _KERNEL is specified */ -# define UIO_MAXIOV 1024 -# endif -#endif - -int network_write_chunkqueue_freebsdsendfile(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes) { - chunk *c; - - for(c = cq->first; (max_bytes > 0) && (NULL != c); c = c->next) { - int chunk_finished = 0; - - switch(c->type) { - case MEM_CHUNK: { - char * offset; - off_t toSend; - ssize_t r; - - size_t num_chunks, i; - struct iovec chunks[UIO_MAXIOV]; - chunk *tc; - size_t num_bytes = 0; - - /* build writev list - * - * 1. limit: num_chunks < UIO_MAXIOV - * 2. limit: num_bytes < max_bytes - */ - for(num_chunks = 0, tc = c; tc && tc->type == MEM_CHUNK && num_chunks < UIO_MAXIOV; num_chunks++, tc = tc->next); - - for(tc = c, i = 0; i < num_chunks; tc = tc->next, i++) { - if (tc->mem->used == 0) { - chunks[i].iov_base = tc->mem->ptr; - chunks[i].iov_len = 0; - } else { - offset = tc->mem->ptr + tc->offset; - toSend = tc->mem->used - 1 - tc->offset; - - chunks[i].iov_base = offset; - - /* protect the return value of writev() */ - if (toSend > max_bytes || - (off_t) num_bytes + toSend > max_bytes) { - chunks[i].iov_len = max_bytes - num_bytes; - - num_chunks = i + 1; - break; - } else { - chunks[i].iov_len = toSend; - } - - num_bytes += toSend; - } - } - - if ((r = writev(fd, chunks, num_chunks)) < 0) { - switch (errno) { - case EAGAIN: - case EINTR: - r = 0; - break; - case ENOTCONN: - case EPIPE: - case ECONNRESET: - return -2; - default: - log_error_write(srv, __FILE__, __LINE__, "ssd", - "writev failed:", strerror(errno), fd); - - return -1; - } - - r = 0; - } - - /* check which chunks have been written */ - cq->bytes_out += r; - max_bytes -= r; - - for(i = 0, tc = c; i < num_chunks; i++, tc = tc->next) { - if (r >= (ssize_t)chunks[i].iov_len) { - /* written */ - r -= chunks[i].iov_len; - tc->offset += chunks[i].iov_len; - - if (chunk_finished) { - /* skip the chunks from further touches */ - c = c->next; - } else { - /* chunks_written + c = c->next is done in the for()*/ - chunk_finished = 1; - } - } else { - /* partially written */ - tc->offset += r; - chunk_finished = 0; +int network_write_file_chunk_sendfile(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes) { + chunk* const c = cq->first; + off_t offset, written = 0; + off_t toSend; + int r; - break; - } - } + force_assert(NULL != c); + force_assert(FILE_CHUNK == c->type); + force_assert(c->offset >= 0 && c->offset <= c->file.length); - break; - } - case FILE_CHUNK: { - off_t offset, r; - off_t toSend; - stat_cache_entry *sce = NULL; - - if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) { - log_error_write(srv, __FILE__, __LINE__, "sb", - strerror(errno), c->file.name); - return -1; - } - - offset = c->file.start + c->offset; - toSend = c->file.length - c->offset; - if (toSend > max_bytes) toSend = max_bytes; - - if (-1 == c->file.fd) { - if (-1 == (c->file.fd = open(c->file.name->ptr, O_RDONLY))) { - log_error_write(srv, __FILE__, __LINE__, "ss", "open failed: ", strerror(errno)); - - return -1; - } - - fd_close_on_exec(c->file.fd); - } - - r = 0; - - /* FreeBSD sendfile() */ - if (-1 == sendfile(c->file.fd, fd, offset, toSend, NULL, &r, 0)) { - switch(errno) { - case EAGAIN: - case EINTR: - /* for EAGAIN/EINTR r still contains the sent bytes */ - break; /* try again later */ - case EPIPE: - case ENOTCONN: - return -2; - default: - log_error_write(srv, __FILE__, __LINE__, "ssd", "sendfile: ", strerror(errno), errno); - return -1; - } - } else if (r == 0) { - /* We got an event to write but we wrote nothing - * - * - the file shrinked -> error - * - the remote side closed inbetween -> remote-close */ - - if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) { - /* file is gone ? */ - return -1; - } - - if (offset >= sce->st.st_size) { - /* file shrinked, close the connection */ - return -1; - } + offset = c->file.start + c->offset; + toSend = c->file.length - c->offset; + if (toSend > *p_max_bytes) toSend = *p_max_bytes; - return -2; - } - - c->offset += r; - cq->bytes_out += r; - max_bytes -= r; - - if (c->offset == c->file.length) { - chunk_finished = 1; - } + if (0 == toSend) { + chunkqueue_remove_finished_chunks(cq); + return 0; + } - break; - } + if (0 != network_open_file_chunk(srv, con, cq)) return -1; + + /* FreeBSD sendfile() */ + if (-1 == (r = sendfile(c->file.fd, fd, offset, toSend, NULL, &written, 0))) { + switch(errno) { + case EAGAIN: + case EINTR: + /* for EAGAIN/EINTR written still contains the sent bytes */ + break; /* try again later */ + case EPIPE: + case ENOTCONN: + return -2; default: - - log_error_write(srv, __FILE__, __LINE__, "ds", c, "type not known"); - + log_error_write(srv, __FILE__, __LINE__, "ssd", "sendfile: ", strerror(errno), errno); return -1; } + } - if (!chunk_finished) { - /* not finished yet */ - - break; - } + if (written >= 0) { + chunkqueue_mark_written(cq, written); + *p_max_bytes -= written; } - return 0; + return (r >= 0 && written == toSend) ? 0 : -3; } -#endif +#endif /* USE_FREEBSD_SENDFILE */ diff --git a/src/network_linux_sendfile.c b/src/network_linux_sendfile.c index b967f3cc..4e12b2db 100644 --- a/src/network_linux_sendfile.c +++ b/src/network_linux_sendfile.c @@ -1,249 +1,57 @@ #include "network_backends.h" -#ifdef USE_LINUX_SENDFILE +#if defined(USE_LINUX_SENDFILE) #include "network.h" -#include "fdevent.h" #include "log.h" -#include "stat_cache.h" -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <sys/resource.h> - -#include <netinet/in.h> -#include <netinet/tcp.h> +#include <sys/sendfile.h> #include <errno.h> -#include <fcntl.h> -#include <unistd.h> -#include <netdb.h> #include <string.h> -#include <stdlib.h> -#include <fcntl.h> - -/* on linux 2.4.29 + debian/ubuntu we have crashes if this is enabled */ -#undef HAVE_POSIX_FADVISE - -int network_write_chunkqueue_linuxsendfile(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes) { - chunk *c; - - for(c = cq->first; (max_bytes > 0) && (NULL != c); c = c->next) { - int chunk_finished = 0; - - switch(c->type) { - case MEM_CHUNK: { - char * offset; - off_t toSend; - ssize_t r; - - size_t num_chunks, i; - struct iovec chunks[UIO_MAXIOV]; - chunk *tc; - size_t num_bytes = 0; - - /* build writev list - * - * 1. limit: num_chunks < UIO_MAXIOV - * 2. limit: num_bytes < max_bytes - */ - for (num_chunks = 0, tc = c; - tc && tc->type == MEM_CHUNK && num_chunks < UIO_MAXIOV; - tc = tc->next, num_chunks++); - - for (tc = c, i = 0; i < num_chunks; tc = tc->next, i++) { - if (buffer_string_is_empty(tc->mem)) { - chunks[i].iov_base = tc->mem->ptr; - chunks[i].iov_len = 0; - } else { - offset = tc->mem->ptr + tc->offset; - toSend = buffer_string_length(tc->mem) - tc->offset; - - chunks[i].iov_base = offset; - - /* protect the return value of writev() */ - if (toSend > max_bytes || - (off_t) num_bytes + toSend > max_bytes) { - chunks[i].iov_len = max_bytes - num_bytes; - num_chunks = i + 1; - break; - } else { - chunks[i].iov_len = toSend; - } +int network_write_file_chunk_sendfile(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes) { + chunk* const c = cq->first; + ssize_t r; + off_t offset; + off_t toSend; - num_bytes += toSend; - } - } - - if ((r = writev(fd, chunks, num_chunks)) < 0) { - switch (errno) { - case EAGAIN: - case EINTR: - r = 0; - break; - case EPIPE: - case ECONNRESET: - return -2; - default: - log_error_write(srv, __FILE__, __LINE__, "ssd", - "writev failed:", strerror(errno), fd); - - return -1; - } - } - - /* check which chunks have been written */ - cq->bytes_out += r; - max_bytes -= r; - - for(i = 0, tc = c; i < num_chunks; i++, tc = tc->next) { - if (r >= (ssize_t)chunks[i].iov_len) { - /* written */ - r -= chunks[i].iov_len; - tc->offset += chunks[i].iov_len; - - if (chunk_finished) { - /* skip the chunks from further touches */ - c = c->next; - } else { - /* chunks_written + c = c->next is done in the for()*/ - chunk_finished = 1; - } - } else { - /* partially written */ - - tc->offset += r; - chunk_finished = 0; - - break; - } - } - - break; - } - case FILE_CHUNK: { - ssize_t r; - off_t offset; - off_t toSend; - stat_cache_entry *sce = NULL; + force_assert(NULL != c); + force_assert(FILE_CHUNK == c->type); + force_assert(c->offset >= 0 && c->offset <= c->file.length); - offset = c->file.start + c->offset; - toSend = c->file.length - c->offset; - if (toSend > max_bytes) toSend = max_bytes; + offset = c->file.start + c->offset; + toSend = c->file.length - c->offset; + if (toSend > *p_max_bytes) toSend = *p_max_bytes; - /* open file if not already opened */ - if (-1 == c->file.fd) { - if (-1 == (c->file.fd = open(c->file.name->ptr, O_RDONLY))) { - log_error_write(srv, __FILE__, __LINE__, "ss", "open failed: ", strerror(errno)); - - return -1; - } - fd_close_on_exec(c->file.fd); -#ifdef HAVE_POSIX_FADVISE - /* tell the kernel that we want to stream the file */ - if (-1 == posix_fadvise(c->file.fd, 0, 0, POSIX_FADV_SEQUENTIAL)) { - if (ENOSYS != errno) { - log_error_write(srv, __FILE__, __LINE__, "ssd", - "posix_fadvise failed:", strerror(errno), c->file.fd); - } - } -#endif - } - - if (-1 == (r = sendfile(fd, c->file.fd, &offset, toSend))) { - switch (errno) { - case EAGAIN: - case EINTR: - /* ok, we can't send more, let's try later again */ - r = 0; - break; - case EPIPE: - case ECONNRESET: - return -2; - default: - log_error_write(srv, __FILE__, __LINE__, "ssd", - "sendfile failed:", strerror(errno), fd); - return -1; - } - } else if (r == 0) { - int oerrno = errno; - /* We got an event to write but we wrote nothing - * - * - the file shrinked -> error - * - the remote side closed inbetween -> remote-close */ - - if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) { - /* file is gone ? */ - return -1; - } - - if (offset > sce->st.st_size) { - /* file shrinked, close the connection */ - errno = oerrno; - - return -1; - } - - errno = oerrno; - return -2; - } - -#ifdef HAVE_POSIX_FADVISE -#if 0 -#define K * 1024 -#define M * 1024 K -#define READ_AHEAD 4 M - /* check if we need a new chunk */ - if ((c->offset & ~(READ_AHEAD - 1)) != ((c->offset + r) & ~(READ_AHEAD - 1))) { - /* tell the kernel that we want to stream the file */ - if (-1 == posix_fadvise(c->file.fd, (c->offset + r) & ~(READ_AHEAD - 1), READ_AHEAD, POSIX_FADV_NOREUSE)) { - log_error_write(srv, __FILE__, __LINE__, "ssd", - "posix_fadvise failed:", strerror(errno), c->file.fd); - } - } -#endif -#endif - - c->offset += r; - cq->bytes_out += r; - max_bytes -= r; - - if (c->offset == c->file.length) { - chunk_finished = 1; - - /* chunk_free() / chunk_reset() will cleanup for us but it is a ok to be faster :) */ + if (0 == toSend) { + chunkqueue_remove_finished_chunks(cq); + return 0; + } - if (c->file.fd != -1) { - close(c->file.fd); - c->file.fd = -1; - } - } + if (0 != network_open_file_chunk(srv, con, cq)) return -1; + if (-1 == (r = sendfile(fd, c->file.fd, &offset, toSend))) { + switch (errno) { + case EAGAIN: + case EINTR: break; - } + case EPIPE: + case ECONNRESET: + return -2; default: - - log_error_write(srv, __FILE__, __LINE__, "ds", c, "type not known"); - + log_error_write(srv, __FILE__, __LINE__, "ssd", + "sendfile failed:", strerror(errno), fd); return -1; } + } - if (!chunk_finished) { - /* not finished yet */ - - break; - } + if (r >= 0) { + chunkqueue_mark_written(cq, r); + *p_max_bytes -= r; } - return 0; + return (r > 0 && r == toSend) ? 0 : -3; } -#endif -#if 0 -network_linuxsendfile_init(void) { - p->write = network_linuxsendfile_write_chunkset; -} -#endif +#endif /* USE_LINUX_SENDFILE */ diff --git a/src/network_openssl.c b/src/network_openssl.c index d9ae33c9..b731a1f2 100644 --- a/src/network_openssl.c +++ b/src/network_openssl.c @@ -1,36 +1,23 @@ #include "network_backends.h" -#ifdef USE_OPENSSL +#if defined(USE_OPENSSL) #include "network.h" -#include "fdevent.h" #include "log.h" -#include "stat_cache.h" -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <sys/resource.h> - -#include <netinet/in.h> -#include <netinet/tcp.h> +#include <unistd.h> +#include <stdlib.h> #include <errno.h> -#include <fcntl.h> -#include <unistd.h> -#include <netdb.h> #include <string.h> -#include <stdlib.h> -#include <assert.h> # include <openssl/ssl.h> # include <openssl/err.h> -int network_write_chunkqueue_openssl(server *srv, connection *con, SSL *ssl, chunkqueue *cq, off_t max_bytes) { - int ssl_r; - chunk *c; +static int load_next_chunk(server *srv, connection *con, chunkqueue *cq, off_t max_bytes, const char **data, size_t *data_len) { + chunk * const c = cq->first; +#define LOCAL_SEND_BUFSIZE (64 * 1024) /* this is a 64k sendbuffer * * it has to stay at the same location all the time to satisfy the needs @@ -38,260 +25,157 @@ int network_write_chunkqueue_openssl(server *srv, connection *con, SSL *ssl, chu * * the buffer is allocated once, is NOT realloced and is NOT freed at shutdown * -> we expect a 64k block to 'leak' in valgrind - * - * - * In reality we would like to use mmap() but we don't have a guarantee that - * we get the same mmap() address for each call. On openbsd the mmap() address - * even randomized. - * That means either we keep the mmap() open or we do a read() into a - * constant buffer * */ -#define LOCAL_SEND_BUFSIZE (64 * 1024) static char *local_send_buffer = NULL; - /* the remote side closed the connection before without shutdown request - * - IE - * - wget - * if keep-alive is disabled */ - - if (con->keep_alive == 0) { - SSL_set_shutdown(ssl, SSL_RECEIVED_SHUTDOWN); - } - - for(c = cq->first; (max_bytes > 0) && (NULL != c); c = c->next) { - int chunk_finished = 0; + force_assert(NULL != c); - switch(c->type) { - case MEM_CHUNK: { - char * offset; - off_t toSend; - ssize_t r; + switch (c->type) { + case MEM_CHUNK: + { + size_t have; - if (buffer_string_is_empty(c->mem)) { - chunk_finished = 1; - break; - } + force_assert(c->offset >= 0 && c->offset <= (off_t)buffer_string_length(c->mem)); - offset = c->mem->ptr + c->offset; - toSend = buffer_string_length(c->mem) - c->offset; - if (toSend > max_bytes) toSend = max_bytes; - - /** - * SSL_write man-page - * - * WARNING - * When an SSL_write() operation has to be repeated because of - * SSL_ERROR_WANT_READ or SSL_ERROR_WANT_WRITE, it must be - * repeated with the same arguments. - * - */ - - ERR_clear_error(); - r = SSL_write(ssl, offset, toSend); - - if (con->renegotiations > 1 && con->conf.ssl_disable_client_renegotiation) { - log_error_write(srv, __FILE__, __LINE__, "s", "SSL: renegotiation initiated by client, killing connection"); - return -1; - } + have = buffer_string_length(c->mem) - c->offset; + if ((off_t) have > max_bytes) have = max_bytes; - if (r <= 0) { - unsigned long err; - - switch ((ssl_r = SSL_get_error(ssl, r))) { - case SSL_ERROR_WANT_WRITE: - break; - case SSL_ERROR_SYSCALL: - /* perhaps we have error waiting in our error-queue */ - if (0 != (err = ERR_get_error())) { - do { - log_error_write(srv, __FILE__, __LINE__, "sdds", "SSL:", - ssl_r, r, - ERR_error_string(err, NULL)); - } while((err = ERR_get_error())); - } else if (r == -1) { - /* no, but we have errno */ - switch(errno) { - case EPIPE: - case ECONNRESET: - return -2; - default: - log_error_write(srv, __FILE__, __LINE__, "sddds", "SSL:", - ssl_r, r, errno, - strerror(errno)); - break; - } - } else { - /* neither error-queue nor errno ? */ - log_error_write(srv, __FILE__, __LINE__, "sddds", "SSL (error):", - ssl_r, r, errno, - strerror(errno)); - } + *data = c->mem->ptr + c->offset; + *data_len = have; + } + return 0; - return -1; - case SSL_ERROR_ZERO_RETURN: - /* clean shutdown on the remote side */ + case FILE_CHUNK: + if (NULL == local_send_buffer) { + local_send_buffer = malloc(LOCAL_SEND_BUFSIZE); + force_assert(NULL != local_send_buffer); + } - if (r == 0) return -2; + if (0 != network_open_file_chunk(srv, con, cq)) return -1; - /* fall through */ - default: - while((err = ERR_get_error())) { - log_error_write(srv, __FILE__, __LINE__, "sdds", "SSL:", - ssl_r, r, - ERR_error_string(err, NULL)); - } + { + off_t offset, toSend; - return -1; - } - } else { - c->offset += r; - cq->bytes_out += r; - max_bytes -= r; - } + force_assert(c->offset >= 0 && c->offset <= c->file.length); + offset = c->file.start + c->offset; + toSend = c->file.length - c->offset; - if (c->offset == (off_t)buffer_string_length(c->mem)) { - chunk_finished = 1; - } + if (toSend > LOCAL_SEND_BUFSIZE) toSend = LOCAL_SEND_BUFSIZE; + if (toSend > max_bytes) toSend = max_bytes; - break; - } - case FILE_CHUNK: { - char *s; - ssize_t r; - stat_cache_entry *sce = NULL; - int ifd; - int write_wait = 0; - - if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) { - log_error_write(srv, __FILE__, __LINE__, "sb", - strerror(errno), c->file.name); + if (-1 == lseek(c->file.fd, offset, SEEK_SET)) { + log_error_write(srv, __FILE__, __LINE__, "ss", "lseek: ", strerror(errno)); return -1; } - - if (NULL == local_send_buffer) { - local_send_buffer = malloc(LOCAL_SEND_BUFSIZE); - force_assert(local_send_buffer); + if (-1 == (toSend = read(c->file.fd, local_send_buffer, toSend))) { + log_error_write(srv, __FILE__, __LINE__, "ss", "read: ", strerror(errno)); + return -1; } - do { - off_t offset = c->file.start + c->offset; - off_t toSend = c->file.length - c->offset; - if (toSend > max_bytes) toSend = max_bytes; + *data = local_send_buffer; + *data_len = toSend; + } + return 0; + } - if (toSend > LOCAL_SEND_BUFSIZE) toSend = LOCAL_SEND_BUFSIZE; + return -1; +} - if (-1 == (ifd = open(c->file.name->ptr, O_RDONLY))) { - log_error_write(srv, __FILE__, __LINE__, "ss", "open failed:", strerror(errno)); - return -1; - } +int network_write_chunkqueue_openssl(server *srv, connection *con, SSL *ssl, chunkqueue *cq, off_t max_bytes) { + /* the remote side closed the connection before without shutdown request + * - IE + * - wget + * if keep-alive is disabled */ + if (con->keep_alive == 0) { + SSL_set_shutdown(ssl, SSL_RECEIVED_SHUTDOWN); + } - if (-1 == lseek(ifd, offset, SEEK_SET)) { - log_error_write(srv, __FILE__, __LINE__, "ss", "lseek failed:", strerror(errno)); - close(ifd); - return -1; - } - if (-1 == (toSend = read(ifd, local_send_buffer, toSend))) { - log_error_write(srv, __FILE__, __LINE__, "ss", "read failed:", strerror(errno)); - close(ifd); - return -1; - } + chunkqueue_remove_finished_chunks(cq); - s = local_send_buffer; + while (max_bytes > 0 && NULL != cq->first) { + const char *data; + size_t data_len; + int r; - close(ifd); + if (0 != load_next_chunk(srv, con, cq, max_bytes, &data, &data_len)) return -1; - ERR_clear_error(); - r = SSL_write(ssl, s, toSend); + /** + * SSL_write man-page + * + * WARNING + * When an SSL_write() operation has to be repeated because of + * SSL_ERROR_WANT_READ or SSL_ERROR_WANT_WRITE, it must be + * repeated with the same arguments. + */ - if (con->renegotiations > 1 && con->conf.ssl_disable_client_renegotiation) { - log_error_write(srv, __FILE__, __LINE__, "s", "SSL: renegotiation initiated by client, killing connection"); - return -1; - } + ERR_clear_error(); + r = SSL_write(ssl, data, data_len); - if (r <= 0) { - unsigned long err; + if (con->renegotiations > 1 && con->conf.ssl_disable_client_renegotiation) { + log_error_write(srv, __FILE__, __LINE__, "s", "SSL: renegotiation initiated by client, killing connection"); + return -1; + } - switch ((ssl_r = SSL_get_error(ssl, r))) { - case SSL_ERROR_WANT_WRITE: - write_wait = 1; - break; - case SSL_ERROR_SYSCALL: - /* perhaps we have error waiting in our error-queue */ - if (0 != (err = ERR_get_error())) { - do { - log_error_write(srv, __FILE__, __LINE__, "sdds", "SSL:", - ssl_r, r, - ERR_error_string(err, NULL)); - } while((err = ERR_get_error())); - } else if (r == -1) { - /* no, but we have errno */ - switch(errno) { - case EPIPE: - case ECONNRESET: - return -2; - default: - log_error_write(srv, __FILE__, __LINE__, "sddds", "SSL:", - ssl_r, r, errno, - strerror(errno)); - break; - } - } else { - /* neither error-queue nor errno ? */ - log_error_write(srv, __FILE__, __LINE__, "sddds", "SSL (error):", - ssl_r, r, errno, - strerror(errno)); - } - - return -1; - case SSL_ERROR_ZERO_RETURN: - /* clean shutdown on the remote side */ - - if (r == 0) return -2; - - /* fall thourgh */ + if (r <= 0) { + int ssl_r; + unsigned long err; + + switch ((ssl_r = SSL_get_error(ssl, r))) { + case SSL_ERROR_WANT_WRITE: + return 0; /* try again later */ + case SSL_ERROR_SYSCALL: + /* perhaps we have error waiting in our error-queue */ + if (0 != (err = ERR_get_error())) { + do { + log_error_write(srv, __FILE__, __LINE__, "sdds", "SSL:", + ssl_r, r, + ERR_error_string(err, NULL)); + } while((err = ERR_get_error())); + } else if (r == -1) { + /* no, but we have errno */ + switch(errno) { + case EPIPE: + case ECONNRESET: + return -2; default: - while((err = ERR_get_error())) { - log_error_write(srv, __FILE__, __LINE__, "sdds", "SSL:", - ssl_r, r, - ERR_error_string(err, NULL)); - } - - return -1; + log_error_write(srv, __FILE__, __LINE__, "sddds", "SSL:", + ssl_r, r, errno, + strerror(errno)); + break; } } else { - c->offset += r; - cq->bytes_out += r; - max_bytes -= r; + /* neither error-queue nor errno ? */ + log_error_write(srv, __FILE__, __LINE__, "sddds", "SSL (error):", + ssl_r, r, errno, + strerror(errno)); } + break; - if (c->offset == c->file.length) { - chunk_finished = 1; - } - } while (!chunk_finished && !write_wait && max_bytes > 0); + case SSL_ERROR_ZERO_RETURN: + /* clean shutdown on the remote side */ - break; - } - default: - log_error_write(srv, __FILE__, __LINE__, "s", "type not known"); + if (r == 0) return -2; + /* fall through */ + default: + while((err = ERR_get_error())) { + log_error_write(srv, __FILE__, __LINE__, "sdds", "SSL:", + ssl_r, r, + ERR_error_string(err, NULL)); + } + break; + } return -1; } - if (!chunk_finished) { - /* not finished yet */ + chunkqueue_mark_written(cq, r); + max_bytes -= r; - break; - } + if ((size_t) r < data_len) break; /* try again later */ } return 0; } -#endif - -#if 0 -network_openssl_init(void) { - p->write_ssl = network_openssl_write_chunkset; -} -#endif +#endif /* USE_OPENSSL */ diff --git a/src/network_solaris_sendfilev.c b/src/network_solaris_sendfilev.c index 20032006..676e122d 100644 --- a/src/network_solaris_sendfilev.c +++ b/src/network_solaris_sendfilev.c @@ -1,213 +1,71 @@ #include "network_backends.h" -#ifdef USE_SOLARIS_SENDFILEV +#if defined(USE_SOLARIS_SENDFILEV) #include "network.h" -#include "fdevent.h" #include "log.h" -#include "stat_cache.h" -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <sys/resource.h> - -#include <netinet/in.h> -#include <netinet/tcp.h> +#include <sys/sendfile.h> #include <errno.h> -#include <fcntl.h> -#include <unistd.h> -#include <netdb.h> #include <string.h> -#include <stdlib.h> -#include <limits.h> - -#ifndef UIO_MAXIOV -# define UIO_MAXIOV IOV_MAX -#endif /** * a very simple sendfilev() interface for solaris which can be optimised a lot more * as solaris sendfilev() supports 'sending everythin in one syscall()' - * - * If you want such an interface and need the performance, just give me an account on - * a solaris box. - * - jan@kneschke.de */ +int network_write_file_chunk_sendfile(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes) { + chunk* const c = cq->first; + off_t offset; + off_t toSend; + size_t written = 0; + int r; + sendfilevec_t fvec; + + force_assert(NULL != c); + force_assert(FILE_CHUNK == c->type); + force_assert(c->offset >= 0 && c->offset <= c->file.length); + + offset = c->file.start + c->offset; + toSend = c->file.length - c->offset; + if (toSend > *p_max_bytes) toSend = *p_max_bytes; + + if (0 == toSend) { + chunkqueue_remove_finished_chunks(cq); + return 0; + } -int network_write_chunkqueue_solarissendfilev(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes) { - chunk *c; - - for(c = cq->first; (max_bytes > 0) && (NULL != c); c = c->next) { - int chunk_finished = 0; - - switch(c->type) { - case MEM_CHUNK: { - char * offset; - off_t toSend; - ssize_t r; - - size_t num_chunks, i; - struct iovec chunks[UIO_MAXIOV]; - chunk *tc; - - size_t num_bytes = 0; - - /* we can't send more then SSIZE_MAX bytes in one chunk */ - - /* build writev list - * - * 1. limit: num_chunks < UIO_MAXIOV - * 2. limit: num_bytes < SSIZE_MAX - */ - for(num_chunks = 0, tc = c; tc && tc->type == MEM_CHUNK && num_chunks < UIO_MAXIOV; num_chunks++, tc = tc->next); - - for(tc = c, i = 0; i < num_chunks; tc = tc->next, i++) { - if (tc->mem->used == 0) { - chunks[i].iov_base = tc->mem->ptr; - chunks[i].iov_len = 0; - } else { - offset = tc->mem->ptr + tc->offset; - toSend = tc->mem->used - 1 - tc->offset; - - chunks[i].iov_base = offset; - - /* protect the return value of writev() */ - if (toSend > max_bytes || - (off_t) num_bytes + toSend > max_bytes) { - chunks[i].iov_len = max_bytes - num_bytes; - - num_chunks = i + 1; - break; - } else { - chunks[i].iov_len = toSend; - } - - num_bytes += toSend; - } - } - - if ((r = writev(fd, chunks, num_chunks)) < 0) { - switch (errno) { - case EAGAIN: - case EINTR: - r = 0; - break; - case EPIPE: - case ECONNRESET: - return -2; - default: - log_error_write(srv, __FILE__, __LINE__, "ssd", - "writev failed:", strerror(errno), fd); - - return -1; - } - } - - /* check which chunks have been written */ - cq->bytes_out += r; - - for(i = 0, tc = c; i < num_chunks; i++, tc = tc->next) { - if (r >= (ssize_t)chunks[i].iov_len) { - /* written */ - r -= chunks[i].iov_len; - tc->offset += chunks[i].iov_len; - - if (chunk_finished) { - /* skip the chunks from further touches */ - c = c->next; - } else { - /* chunks_written + c = c->next is done in the for()*/ - chunk_finished = 1; - } - } else { - /* partially written */ - - tc->offset += r; - chunk_finished = 0; - - break; - } - } - - break; - } - case FILE_CHUNK: { - ssize_t r; - off_t offset, toSend; - size_t written; - sendfilevec_t fvec; - stat_cache_entry *sce = NULL; - int ifd; - - if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) { - log_error_write(srv, __FILE__, __LINE__, "sb", - strerror(errno), c->file.name); - return -1; - } - - offset = c->file.start + c->offset; - toSend = c->file.length - c->offset; - if (toSend > max_bytes) toSend = max_bytes; - - if (offset > sce->st.st_size) { - log_error_write(srv, __FILE__, __LINE__, "sb", "file was shrinked:", c->file.name); - - return -1; - } - - if (-1 == (ifd = open(c->file.name->ptr, O_RDONLY))) { - log_error_write(srv, __FILE__, __LINE__, "ss", "open failed: ", strerror(errno)); - - return -1; - } - - fvec.sfv_fd = ifd; - fvec.sfv_flag = 0; - fvec.sfv_off = offset; - fvec.sfv_len = toSend; - - /* Solaris sendfilev() */ - if (-1 == (r = sendfilev(fd, &fvec, 1, &written))) { - if (errno != EAGAIN) { - log_error_write(srv, __FILE__, __LINE__, "ssd", "sendfile: ", strerror(errno), errno); - - close(ifd); - return -1; - } - - r = 0; - } - - close(ifd); - c->offset += written; - cq->bytes_out += written; - max_bytes -= written; - - if (c->offset == c->file.length) { - chunk_finished = 1; - } - - break; - } - default: + if (0 != network_open_file_chunk(srv, con, cq)) return -1; - log_error_write(srv, __FILE__, __LINE__, "ds", c, "type not known"); + fvec.sfv_fd = c->file.fd; + fvec.sfv_flag = 0; + fvec.sfv_off = offset; + fvec.sfv_len = toSend; + /* Solaris sendfilev() */ + + if (-1 == (r = sendfilev(fd, &fvec, 1, &written))) { + switch(errno) { + case EAGAIN: + case EINTR: + /* for EAGAIN/EINTR written still contains the sent bytes */ + break; /* try again later */ + case EPIPE: + case ENOTCONN: + return -2; + default: + log_error_write(srv, __FILE__, __LINE__, "ssd", "sendfile: ", strerror(errno), errno); return -1; } + } - if (!chunk_finished) { - /* not finished yet */ - - break; - } + if (written >= 0) { + chunkqueue_mark_written(cq, written); + *p_max_bytes -= written; } - return 0; + return (r >= 0 && (off_t) written == toSend) ? 0 : -3; } -#endif +#endif /* USE_SOLARIS_SENDFILEV */ diff --git a/src/network_write.c b/src/network_write.c index 6a89b50c..2c61cf79 100644 --- a/src/network_write.c +++ b/src/network_write.c @@ -1,220 +1,111 @@ #include "network_backends.h" #include "network.h" -#include "fdevent.h" #include "log.h" -#include "stat_cache.h" #include "sys-socket.h" -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <errno.h> -#include <fcntl.h> #include <unistd.h> + +#include <errno.h> #include <string.h> -#include <stdlib.h> -#ifdef HAVE_SYS_FILIO_H -# include <sys/filio.h> -#endif +int network_write_mem_chunk(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes) { + chunk* const c = cq->first; + off_t c_len; + ssize_t r; + UNUSED(con); -#ifdef HAVE_SYS_RESOURCE_H -# include <sys/resource.h> -#endif + force_assert(NULL != c); + force_assert(MEM_CHUNK == c->type); + force_assert(c->offset >= 0 && c->offset <= (off_t)buffer_string_length(c->mem)); -int network_write_chunkqueue_write(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes) { - chunk *c; - - for(c = cq->first; (max_bytes > 0) && (NULL != c); c = c->next) { - int chunk_finished = 0; - - switch(c->type) { - case MEM_CHUNK: { - char * offset; - off_t toSend; - ssize_t r; - - if (buffer_string_is_empty(c->mem)) { - chunk_finished = 1; - break; - } - - offset = c->mem->ptr + c->offset; - toSend = buffer_string_length(c->mem) - c->offset; - if (toSend > max_bytes) toSend = max_bytes; - -#ifdef __WIN32 - if ((r = send(fd, offset, toSend, 0)) < 0) { - /* no error handling for windows... */ - log_error_write(srv, __FILE__, __LINE__, "ssd", "send failed: ", strerror(errno), fd); - - return -1; - } -#else - if ((r = write(fd, offset, toSend)) < 0) { - switch (errno) { - case EAGAIN: - case EINTR: - r = 0; - break; - case EPIPE: - case ECONNRESET: - return -2; - default: - log_error_write(srv, __FILE__, __LINE__, "ssd", - "write failed:", strerror(errno), fd); - - return -1; - } - } -#endif - - c->offset += r; - cq->bytes_out += r; - max_bytes -= r; - - if (c->offset == (off_t)buffer_string_length(c->mem)) { - chunk_finished = 1; - } + c_len = buffer_string_length(c->mem) - c->offset; + if (c_len > *p_max_bytes) c_len = *p_max_bytes; + if (0 == c_len) { + chunkqueue_remove_finished_chunks(cq); + return 0; + } + +#if defined(__WIN32) + if ((r = send(fd, c->mem->ptr + c->offset, c_len, 0)) < 0) { + int lastError = WSAGetLastError(); + switch (lastError) { + case WSAEINTR: + case WSAEWOULDBLOCK: break; + case WSAECONNRESET: + case WSAETIMEDOUT: + case WSAECONNABORTED: + return -2; + default: + log_error_write(srv, __FILE__, __LINE__, "sdd", + "send failed: ", lastError, fd); + return -1; } - case FILE_CHUNK: { -#ifdef USE_MMAP - char *p = NULL; -#endif - ssize_t r; - off_t offset; - off_t toSend; - stat_cache_entry *sce = NULL; - int ifd; - - if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) { - log_error_write(srv, __FILE__, __LINE__, "sb", - strerror(errno), c->file.name); - return -1; - } - - offset = c->file.start + c->offset; - toSend = c->file.length - c->offset; - - if (toSend > max_bytes) toSend = max_bytes; - - if (offset > sce->st.st_size) { - log_error_write(srv, __FILE__, __LINE__, "sb", "file was shrinked:", c->file.name); - - return -1; - } - - if (-1 == (ifd = open(c->file.name->ptr, O_RDONLY))) { - log_error_write(srv, __FILE__, __LINE__, "ss", "open failed: ", strerror(errno)); - - return -1; - } - -#ifdef USE_MMAP - if (MAP_FAILED == (p = mmap(0, sce->st.st_size, PROT_READ, MAP_SHARED, ifd, 0))) { - log_error_write(srv, __FILE__, __LINE__, "ss", "mmap failed: ", strerror(errno)); - - close(ifd); - - return -1; - } - close(ifd); - - if ((r = write(fd, p + offset, toSend)) <= 0) { - switch (errno) { - case EAGAIN: - case EINTR: - r = 0; - break; - case EPIPE: - case ECONNRESET: - munmap(p, sce->st.st_size); - return -2; - default: - log_error_write(srv, __FILE__, __LINE__, "ssd", - "write failed:", strerror(errno), fd); - munmap(p, sce->st.st_size); - - return -1; - } - } - - munmap(p, sce->st.st_size); -#else /* USE_MMAP */ - buffer_string_prepare_copy(srv->tmp_buf, toSend); - - if (-1 == lseek(ifd, offset, SEEK_SET)) { - log_error_write(srv, __FILE__, __LINE__, "ss", "lseek: ", strerror(errno)); - close(ifd); - return -1; - } - if (-1 == (toSend = read(ifd, srv->tmp_buf->ptr, toSend))) { - log_error_write(srv, __FILE__, __LINE__, "ss", "read: ", strerror(errno)); - close(ifd); - return -1; - } - close(ifd); - -#ifdef __WIN32 - if ((r = send(fd, srv->tmp_buf->ptr, toSend, 0)) < 0) { - /* no error handling for windows... */ - log_error_write(srv, __FILE__, __LINE__, "ssd", "send failed: ", strerror(errno), fd); - - return -1; - } + } #else /* __WIN32 */ - if ((r = write(fd, srv->tmp_buf->ptr, toSend)) < 0) { - switch (errno) { - case EAGAIN: - case EINTR: - r = 0; - break; - case EPIPE: - case ECONNRESET: - return -2; - default: - log_error_write(srv, __FILE__, __LINE__, "ssd", - "write failed:", strerror(errno), fd); - - return -1; - } - } + if ((r = write(fd, c->mem->ptr + c->offset, c_len)) < 0) { + switch (errno) { + case EAGAIN: + case EINTR: + break; + case EPIPE: + case ECONNRESET: + return -2; + default: + log_error_write(srv, __FILE__, __LINE__, "ssd", + "write failed:", strerror(errno), fd); + return -1; + } + } #endif /* __WIN32 */ -#endif /* USE_MMAP */ - c->offset += r; - cq->bytes_out += r; - max_bytes -= r; + if (r >= 0) { + *p_max_bytes -= r; + chunkqueue_mark_written(cq, r); + } - if (c->offset == c->file.length) { - chunk_finished = 1; - } + return (r > 0 && r == c_len) ? 0 : -3; +} + +int network_write_chunkqueue_write(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes) { + while (max_bytes > 0 && NULL != cq->first) { + int r = -1; + switch (cq->first->type) { + case MEM_CHUNK: + r = network_write_mem_chunk(srv, con, fd, cq, &max_bytes); + break; + case FILE_CHUNK: + r = network_write_file_chunk_mmap(srv, con, fd, cq, &max_bytes); break; } - default: - log_error_write(srv, __FILE__, __LINE__, "ds", c, "type not known"); + if (-3 == r) return 0; + if (0 != r) return r; + } - return -1; - } + return 0; +} - if (!chunk_finished) { - /* not finished yet */ +int network_write_chunkqueue_sendfile(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes) { + while (max_bytes > 0 && NULL != cq->first) { + int r = -1; + switch (cq->first->type) { + case MEM_CHUNK: + r = network_writev_mem_chunks(srv, con, fd, cq, &max_bytes); + break; + case FILE_CHUNK: + r = network_write_file_chunk_sendfile(srv, con, fd, cq, &max_bytes); break; } + + if (-3 == r) return 0; + if (0 != r) return r; } return 0; } - -#if 0 -network_write_init(void) { - p->write = network_write_write_chunkset; -} -#endif diff --git a/src/network_write_mmap.c b/src/network_write_mmap.c new file mode 100644 index 00000000..9747d619 --- /dev/null +++ b/src/network_write_mmap.c @@ -0,0 +1,157 @@ +#include "network_backends.h" + +#if defined(USE_MMAP) + +#include "network.h" +#include "log.h" +#include "sys-mmap.h" + +#include <unistd.h> + +#include <errno.h> +#include <string.h> + +#if 0 +/* read mmap()ed data into local buffer */ +#define LOCAL_BUFFERING 1 +#endif + +int network_write_file_chunk_mmap(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes) { + chunk* const c = cq->first; + off_t offset, toSend, file_end; + ssize_t r; + size_t mmap_offset, mmap_avail; + const char *data; + + force_assert(NULL != c); + force_assert(FILE_CHUNK == c->type); + force_assert(c->offset >= 0 && c->offset <= c->file.length); + + offset = c->file.start + c->offset; + toSend = c->file.length - c->offset; + if (toSend > *p_max_bytes) toSend = *p_max_bytes; + file_end = c->file.start + c->file.length; /* offset to file end in this chunk */ + + if (0 == toSend) { + chunkqueue_remove_finished_chunks(cq); + return 0; + } + + if (0 != network_open_file_chunk(srv, con, cq)) return -1; + + /* mmap the buffer if offset is outside old mmap area or not mapped at all */ + if (MAP_FAILED == c->file.mmap.start + || offset < c->file.mmap.offset + || offset >= (off_t)(c->file.mmap.offset + c->file.mmap.length)) { + + if (MAP_FAILED != c->file.mmap.start) { + munmap(c->file.mmap.start, c->file.mmap.length); + c->file.mmap.start = MAP_FAILED; + } + + /* Optimizations for the future: + * + * adaptive mem-mapping + * the problem: + * we mmap() the whole file. If someone has alot large files and 32bit + * machine the virtual address area will be unrun and we will have a failing + * mmap() call. + * solution: + * only mmap 16M in one chunk and move the window as soon as we have finished + * the first 8M + * + * read-ahead buffering + * the problem: + * sending out several large files in parallel trashes the read-ahead of the + * kernel leading to long wait-for-seek times. + * solutions: (increasing complexity) + * 1. use madvise + * 2. use a internal read-ahead buffer in the chunk-structure + * 3. use non-blocking IO for file-transfers + * */ + + c->file.mmap.offset = offset & ~(4095); /* align at 4kb */ + + /* all mmap()ed areas are 512kb except the last which might be smaller */ + c->file.mmap.length = 512*1024; + if (c->file.mmap.offset > file_end - (off_t)c->file.mmap.length) { + c->file.mmap.length = file_end - c->file.mmap.offset; + } + + if (MAP_FAILED == (c->file.mmap.start = mmap(NULL, c->file.mmap.length, PROT_READ, MAP_SHARED, c->file.fd, c->file.mmap.offset))) { + log_error_write(srv, __FILE__, __LINE__, "ssbdoo", "mmap failed:", + strerror(errno), c->file.name, c->file.fd, c->file.mmap.offset, (off_t) c->file.mmap.length); + return -1; + } + +#if defined(LOCAL_BUFFERING) + buffer_copy_string_len(c->mem, c->file.mmap.start, c->file.mmap.length); +#else +# if defined(HAVE_MADVISE) + /* don't advise files < 64Kb */ + if (c->file.mmap.length > (64*1024)) { + /* darwin 7 is returning EINVAL all the time and I don't know how to + * detect this at runtime. + * + * ignore the return value for now */ + madvise(c->file.mmap.start, c->file.mmap.length, MADV_WILLNEED); + } +# endif +#endif + } + + force_assert(offset >= c->file.mmap.offset); + mmap_offset = offset - c->file.mmap.offset; + force_assert(c->file.mmap.length > mmap_offset); + mmap_avail = c->file.mmap.length - mmap_offset; + if (toSend > (off_t) mmap_avail) toSend = mmap_avail; + +#if defined(LOCAL_BUFFERING) + data = c->mem->ptr + mmap_offset; +#else + data = c->file.mmap.start + mmap_offset; +#endif + +#if defined(__WIN32) + if ((r = send(fd, data, toSend, 0)) < 0) { + int lastError = WSAGetLastError(); + switch (lastError) { + case WSAEINTR: + case WSAEWOULDBLOCK: + break; + case WSAECONNRESET: + case WSAETIMEDOUT: + case WSAECONNABORTED: + return -2; + default: + log_error_write(srv, __FILE__, __LINE__, "sdd", + "send failed: ", lastError, fd); + return -1; + } + } +#else /* __WIN32 */ + if ((r = write(fd, data, toSend)) < 0) { + switch (errno) { + case EAGAIN: + case EINTR: + break; + case EPIPE: + case ECONNRESET: + return -2; + default: + log_error_write(srv, __FILE__, __LINE__, "ssd", + "write failed:", strerror(errno), fd); + return -1; + } + } +#endif /* __WIN32 */ + + if (r >= 0) { + *p_max_bytes -= r; + chunkqueue_mark_written(cq, r); + } + + return (r > 0 && r == toSend) ? 0 : -3; +} + +#endif /* USE_MMAP */ diff --git a/src/network_write_no_mmap.c b/src/network_write_no_mmap.c new file mode 100644 index 00000000..f5f50cea --- /dev/null +++ b/src/network_write_no_mmap.c @@ -0,0 +1,135 @@ +#include "network_backends.h" + +#include "network.h" +#include "fdevent.h" +#include "log.h" +#include "stat_cache.h" + +#include "sys-socket.h" + +#include <sys/time.h> +#include <stdlib.h> + +#include <fcntl.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <errno.h> +#include <string.h> + +int network_open_file_chunk(server *srv, connection *con, chunkqueue *cq) { + chunk* const c = cq->first; + off_t file_size, offset, toSend; + + force_assert(NULL != c); + force_assert(FILE_CHUNK == c->type); + force_assert(c->offset >= 0 && c->offset <= c->file.length); + + offset = c->file.start + c->offset; + toSend = c->file.length - c->offset; + + if (-1 == c->file.fd) { + stat_cache_entry *sce = NULL; + + if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) { + log_error_write(srv, __FILE__, __LINE__, "ssb", "stat-cache failed:", strerror(errno), c->file.name); + return -1; + } + + if (-1 == (c->file.fd = open(c->file.name->ptr, O_RDONLY|O_NOCTTY))) { + log_error_write(srv, __FILE__, __LINE__, "ssb", "open failed:", strerror(errno), c->file.name); + return -1; + } + fd_close_on_exec(c->file.fd); + + file_size = sce->st.st_size; + } else { + struct stat st; + if (-1 == fstat(c->file.fd, &st)) { + log_error_write(srv, __FILE__, __LINE__, "ss", "fstat failed:", strerror(errno)); + return -1; + } + file_size = st.st_size; + } + + if (offset > file_size || toSend > file_size || offset > file_size - toSend) { + log_error_write(srv, __FILE__, __LINE__, "sb", "file was shrinked:", c->file.name); + return -1; + } + + return 0; +} + +int network_write_file_chunk_no_mmap(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes) { + chunk* const c = cq->first; + off_t offset, toSend; + ssize_t r; + + force_assert(NULL != c); + force_assert(FILE_CHUNK == c->type); + force_assert(c->offset >= 0 && c->offset <= c->file.length); + + offset = c->file.start + c->offset; + toSend = c->file.length - c->offset; + if (toSend > 64*1024) toSend = 64*1024; /* max read 64kb in one step */ + if (toSend > *p_max_bytes) toSend = *p_max_bytes; + + if (0 == toSend) { + chunkqueue_remove_finished_chunks(cq); + return 0; + } + + if (0 != network_open_file_chunk(srv, con, cq)) return -1; + + buffer_string_prepare_copy(srv->tmp_buf, toSend); + + if (-1 == lseek(c->file.fd, offset, SEEK_SET)) { + log_error_write(srv, __FILE__, __LINE__, "ss", "lseek: ", strerror(errno)); + return -1; + } + if (-1 == (toSend = read(c->file.fd, srv->tmp_buf->ptr, toSend))) { + log_error_write(srv, __FILE__, __LINE__, "ss", "read: ", strerror(errno)); + return -1; + } + +#if defined(__WIN32) + if ((r = send(fd, srv->tmp_buf->ptr, toSend, 0)) < 0) { + int lastError = WSAGetLastError(); + switch (lastError) { + case WSAEINTR: + case WSAEWOULDBLOCK: + break; + case WSAECONNRESET: + case WSAETIMEDOUT: + case WSAECONNABORTED: + return -2; + default: + log_error_write(srv, __FILE__, __LINE__, "sdd", + "send failed: ", lastError, fd); + return -1; + } + } +#else /* __WIN32 */ + if ((r = write(fd, srv->tmp_buf->ptr, toSend)) < 0) { + switch (errno) { + case EAGAIN: + case EINTR: + break; + case EPIPE: + case ECONNRESET: + return -2; + default: + log_error_write(srv, __FILE__, __LINE__, "ssd", + "write failed:", strerror(errno), fd); + return -1; + } + } +#endif /* __WIN32 */ + + if (r >= 0) { + *p_max_bytes -= r; + chunkqueue_mark_written(cq, r); + } + + return (r > 0 && r == toSend) ? 0 : -3; +} diff --git a/src/network_writev.c b/src/network_writev.c index 895336c6..121f7822 100644 --- a/src/network_writev.c +++ b/src/network_writev.c @@ -1,333 +1,119 @@ #include "network_backends.h" -#ifdef USE_WRITEV +#if defined(USE_WRITEV) #include "network.h" -#include "fdevent.h" #include "log.h" -#include "stat_cache.h" -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/uio.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <sys/resource.h> -#include <netinet/in.h> -#include <netinet/tcp.h> +#if defined(HAVE_SYS_UIO_H) +# include <sys/uio.h> +#endif #include <errno.h> -#include <fcntl.h> -#include <unistd.h> -#include <netdb.h> #include <string.h> #include <stdlib.h> -#include <limits.h> -#include <stdio.h> -#include <assert.h> - -#if 0 -#define LOCAL_BUFFERING 1 -#endif #if defined(UIO_MAXIOV) -# define MAX_CHUNKS UIO_MAXIOV +# define SYS_MAX_CHUNKS UIO_MAXIOV #elif defined(IOV_MAX) /* new name for UIO_MAXIOV since IEEE Std 1003.1-2001 */ -# define MAX_CHUNKS IOV_MAX +# define SYS_MAX_CHUNKS IOV_MAX #elif defined(_XOPEN_IOV_MAX) /* minimum value for sysconf(_SC_IOV_MAX); posix requires this to be at least 16, which is good enough - no need to call sysconf() */ -# define MAX_CHUNKS _XOPEN_IOV_MAX +# define SYS_MAX_CHUNKS _XOPEN_IOV_MAX #else # error neither UIO_MAXIOV nor IOV_MAX nor _XOPEN_IOV_MAX are defined #endif -int network_write_chunkqueue_writev(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes) { - chunk *c; - - for(c = cq->first; (max_bytes > 0) && (NULL != c); c = c->next) { - int chunk_finished = 0; - - switch(c->type) { - case MEM_CHUNK: { - char * offset; - off_t toSend; - ssize_t r; - - size_t num_chunks, i; - struct iovec *chunks; - chunk *tc; - size_t num_bytes = 0; - - /* build writev list - * - * 1. limit: num_chunks < MAX_CHUNKS - * 2. limit: num_bytes < max_bytes - */ - for (num_chunks = 0, tc = c; tc && tc->type == MEM_CHUNK && num_chunks < MAX_CHUNKS; num_chunks++, tc = tc->next); - - chunks = calloc(num_chunks, sizeof(*chunks)); - - for(tc = c, i = 0; i < num_chunks; tc = tc->next, i++) { - if (buffer_string_is_empty(tc->mem)) { - chunks[i].iov_base = tc->mem->ptr; - chunks[i].iov_len = 0; - } else { - offset = tc->mem->ptr + tc->offset; - toSend = buffer_string_length(tc->mem) - tc->offset; - - chunks[i].iov_base = offset; - - /* protect the return value of writev() */ - if (toSend > max_bytes || - (off_t) num_bytes + toSend > max_bytes) { - chunks[i].iov_len = max_bytes - num_bytes; - - num_chunks = i + 1; - break; - } else { - chunks[i].iov_len = toSend; - } - - num_bytes += toSend; - } - } - - if ((r = writev(fd, chunks, num_chunks)) < 0) { - switch (errno) { - case EAGAIN: - case EINTR: - r = 0; - break; - case EPIPE: - case ECONNRESET: - free(chunks); - return -2; - default: - log_error_write(srv, __FILE__, __LINE__, "ssd", - "writev failed:", strerror(errno), fd); - - free(chunks); - return -1; - } - } - - cq->bytes_out += r; - max_bytes -= r; - - /* check which chunks have been written */ - - for(i = 0, tc = c; i < num_chunks; i++, tc = tc->next) { - if (r >= (ssize_t)chunks[i].iov_len) { - /* written */ - r -= chunks[i].iov_len; - tc->offset += chunks[i].iov_len; - - if (chunk_finished) { - /* skip the chunks from further touches */ - c = c->next; - } else { - /* chunks_written + c = c->next is done in the for()*/ - chunk_finished = 1; - } - } else { - /* partially written */ - - tc->offset += r; - chunk_finished = 0; - - break; - } - } - free(chunks); - - break; - } - case FILE_CHUNK: { - ssize_t r; - off_t abs_offset; - off_t toSend; - stat_cache_entry *sce = NULL; - -#define KByte * 1024 -#define MByte * 1024 KByte -#define GByte * 1024 MByte - const off_t we_want_to_mmap = 512 KByte; - char *start = NULL; - - if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) { - log_error_write(srv, __FILE__, __LINE__, "sb", - strerror(errno), c->file.name); - return -1; - } - - abs_offset = c->file.start + c->offset; - - if (abs_offset > sce->st.st_size) { - log_error_write(srv, __FILE__, __LINE__, "sb", - "file was shrinked:", c->file.name); - - return -1; - } - - /* mmap the buffer - * - first mmap - * - new mmap as the we are at the end of the last one */ - if (c->file.mmap.start == MAP_FAILED || - abs_offset == (off_t)(c->file.mmap.offset + c->file.mmap.length)) { - - /* Optimizations for the future: - * - * adaptive mem-mapping - * the problem: - * we mmap() the whole file. If someone has alot large files and 32bit - * machine the virtual address area will be unrun and we will have a failing - * mmap() call. - * solution: - * only mmap 16M in one chunk and move the window as soon as we have finished - * the first 8M - * - * read-ahead buffering - * the problem: - * sending out several large files in parallel trashes the read-ahead of the - * kernel leading to long wait-for-seek times. - * solutions: (increasing complexity) - * 1. use madvise - * 2. use a internal read-ahead buffer in the chunk-structure - * 3. use non-blocking IO for file-transfers - * */ - - /* all mmap()ed areas are 512kb expect the last which might be smaller */ - off_t we_want_to_send; - size_t to_mmap; - - /* this is a remap, move the mmap-offset */ - if (c->file.mmap.start != MAP_FAILED) { - munmap(c->file.mmap.start, c->file.mmap.length); - c->file.mmap.offset += we_want_to_mmap; - } else { - /* in case the range-offset is after the first mmap()ed area we skip the area */ - c->file.mmap.offset = 0; - - while (c->file.mmap.offset + we_want_to_mmap < c->file.start) { - c->file.mmap.offset += we_want_to_mmap; - } - } - - /* length is rel, c->offset too, assume there is no limit at the mmap-boundaries */ - we_want_to_send = c->file.length - c->offset; - to_mmap = (c->file.start + c->file.length) - c->file.mmap.offset; - - /* we have more to send than we can mmap() at once */ - if (abs_offset + we_want_to_send > c->file.mmap.offset + we_want_to_mmap) { - we_want_to_send = (c->file.mmap.offset + we_want_to_mmap) - abs_offset; - to_mmap = we_want_to_mmap; - } - - if (-1 == c->file.fd) { /* open the file if not already open */ - if (-1 == (c->file.fd = open(c->file.name->ptr, O_RDONLY))) { - log_error_write(srv, __FILE__, __LINE__, "sbs", "open failed for:", c->file.name, strerror(errno)); - - return -1; - } - fd_close_on_exec(c->file.fd); - } - - if (MAP_FAILED == (c->file.mmap.start = mmap(NULL, to_mmap, PROT_READ, MAP_SHARED, c->file.fd, c->file.mmap.offset))) { - log_error_write(srv, __FILE__, __LINE__, "ssbd", "mmap failed:", - strerror(errno), c->file.name, c->file.fd); - - return -1; - } - - c->file.mmap.length = to_mmap; -#ifdef LOCAL_BUFFERING - buffer_copy_string_len(c->mem, c->file.mmap.start, c->file.mmap.length); +/* allocate iovec[MAX_CHUNKS] on stack, so pick a sane limit: + * - each entry will use 1 pointer + 1 size_t + * - 32 chunks -> 256 / 512 bytes (32-bit/64-bit pointers) + */ +#define STACK_MAX_ALLOC_CHUNKS 32 +#if SYS_MAX_CHUNKS > STACK_MAX_ALLOC_CHUNKS +# define MAX_CHUNKS STACK_MAX_ALLOC_CHUNKS #else -#ifdef HAVE_MADVISE - /* don't advise files < 64Kb */ - if (c->file.mmap.length > (64 KByte)) { - /* darwin 7 is returning EINVAL all the time and I don't know how to - * detect this at runtime.i - * - * ignore the return value for now */ - madvise(c->file.mmap.start, c->file.mmap.length, MADV_WILLNEED); - } -#endif +# define MAX_CHUNKS SYS_MAX_CHUNKS #endif - /* chunk_reset() or chunk_free() will cleanup for us */ - } +int network_writev_mem_chunks(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes) { + struct iovec chunks[MAX_CHUNKS]; + size_t num_chunks; + off_t max_bytes = *p_max_bytes; + off_t toSend; + ssize_t r; + UNUSED(con); - /* to_send = abs_mmap_end - abs_offset */ - toSend = (c->file.mmap.offset + c->file.mmap.length) - (abs_offset); + force_assert(NULL != cq->first); + force_assert(MEM_CHUNK == cq->first->type); - if (toSend < 0) { - log_error_write(srv, __FILE__, __LINE__, "soooo", - "toSend is negative:", - toSend, - c->file.mmap.length, - abs_offset, - c->file.mmap.offset); - force_assert(toSend < 0); - } + { + chunk const *c; - if (toSend > max_bytes) toSend = max_bytes; + toSend = 0; + num_chunks = 0; + for (c = cq->first; NULL != c && MEM_CHUNK == c->type && num_chunks < MAX_CHUNKS && toSend < max_bytes; c = c->next) { + size_t c_len; -#ifdef LOCAL_BUFFERING - start = c->mem->ptr; -#else - start = c->file.mmap.start; -#endif + force_assert(c->offset >= 0 && c->offset <= (off_t)buffer_string_length(c->mem)); + c_len = buffer_string_length(c->mem) - c->offset; + if (c_len > 0) { + toSend += c_len; - if ((r = write(fd, start + (abs_offset - c->file.mmap.offset), toSend)) < 0) { - switch (errno) { - case EAGAIN: - case EINTR: - r = 0; - break; - case EPIPE: - case ECONNRESET: - return -2; - default: - log_error_write(srv, __FILE__, __LINE__, "ssd", - "write failed:", strerror(errno), fd); + chunks[num_chunks].iov_base = c->mem->ptr + c->offset; + chunks[num_chunks].iov_len = c_len; - return -1; - } + ++num_chunks; } + } + } - c->offset += r; - cq->bytes_out += r; - max_bytes -= r; - - if (c->offset == c->file.length) { - chunk_finished = 1; + if (0 == num_chunks) { + chunkqueue_remove_finished_chunks(cq); + return 0; + } - /* we don't need the mmaping anymore */ - if (c->file.mmap.start != MAP_FAILED) { - munmap(c->file.mmap.start, c->file.mmap.length); - c->file.mmap.start = MAP_FAILED; - } - } + r = writev(fd, chunks, num_chunks); + + if (r < 0) switch (errno) { + case EAGAIN: + case EINTR: + break; + case EPIPE: + case ECONNRESET: + return -2; + default: + log_error_write(srv, __FILE__, __LINE__, "ssd", + "writev failed:", strerror(errno), fd); + return -1; + } - break; - } - default: + if (r >= 0) { + *p_max_bytes -= r; + chunkqueue_mark_written(cq, r); + } - log_error_write(srv, __FILE__, __LINE__, "ds", c, "type not known"); + return (r > 0 && r == toSend) ? 0 : -3; +} - return -1; - } +#endif /* USE_WRITEV */ - if (!chunk_finished) { - /* not finished yet */ +int network_write_chunkqueue_writev(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes) { + while (max_bytes > 0 && NULL != cq->first) { + int r = -1; + switch (cq->first->type) { + case MEM_CHUNK: + r = network_writev_mem_chunks(srv, con, fd, cq, &max_bytes); + break; + case FILE_CHUNK: + r = network_write_file_chunk_mmap(srv, con, fd, cq, &max_bytes); break; } + + if (-3 == r) return 0; + if (0 != r) return r; } return 0; } - -#endif diff --git a/src/sys-mmap.h b/src/sys-mmap.h index 94aaa19b..288ac90e 100644 --- a/src/sys-mmap.h +++ b/src/sys-mmap.h @@ -1,24 +1,22 @@ -#ifndef WIN32_MMAP_H -#define WIN32_MMAP_H +#ifndef LI_SYS_MMAP_H +#define LI_SYS_MMAP_H -#ifdef __WIN32 +#if defined(HAVE_SYS_MMAN_H) +# include <sys/mman.h> +#else /* HAVE_SYS_MMAN_H */ -#define MAP_FAILED -1 -#define PROT_SHARED 0 -#define MAP_SHARED 0 -#define PROT_READ 0 +# define PROT_SHARED 0 +# define MAP_SHARED 0 +# define PROT_READ 0 -#define mmap(a, b, c, d, e, f) (-1) -#define munmap(a, b) (-1) +# define mmap(a, b, c, d, e, f) (-1) +# define munmap(a, b) (-1) -#include <windows.h> +#endif /* HAVE_SYS_MMAN_H */ -#else -#include <sys/mman.h> - -#ifndef MAP_FAILED -#define MAP_FAILED -1 -#endif +/* NetBSD 1.3.x needs it; also make it available if mmap() is not present */ +#if !defined(MAP_FAILED) +# define MAP_FAILED ((char*)-1) #endif #endif |