summaryrefslogtreecommitdiff
path: root/src/network_write.c
blob: 6266877cef3e7470a69f98c2c3d0eede8bbd42f0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
#include "first.h"

#include "network_write.h"

#include "base.h"
#include "ck.h"
#include "log.h"

#include <sys/types.h>
#include "sys-socket.h"
#include "sys-unistd.h" /* <unistd.h> */

#include <errno.h>
#include <string.h>


/* on linux 2.4.x you get either sendfile or LFS */
#if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILE \
 && (!defined _LARGEFILE_SOURCE || defined HAVE_SENDFILE64) \
 && defined(__linux__) && !defined HAVE_SENDFILE_BROKEN
# ifdef NETWORK_WRITE_USE_SENDFILE
#  error "can't have more than one sendfile implementation"
# endif
# define NETWORK_WRITE_USE_SENDFILE "linux-sendfile"
# define NETWORK_WRITE_USE_LINUX_SENDFILE
#endif

#if defined HAVE_SENDFILE && (defined(__FreeBSD__) || defined(__DragonFly__))
# ifdef NETWORK_WRITE_USE_SENDFILE
#  error "can't have more than one sendfile implementation"
# endif
# define NETWORK_WRITE_USE_SENDFILE "freebsd-sendfile"
# define NETWORK_WRITE_USE_FREEBSD_SENDFILE
#endif

#if defined HAVE_SENDFILE && defined(__APPLE__)
# ifdef NETWORK_WRITE_USE_SENDFILE
#  error "can't have more than one sendfile implementation"
# endif
# define NETWORK_WRITE_USE_SENDFILE "darwin-sendfile"
# define NETWORK_WRITE_USE_DARWIN_SENDFILE
#endif

#if defined(__APPLE__) && defined(__MACH__)
/* sendfile() on iOS/tvOS sendfile() raises SIGSYS insead of returning ENOSYS
 * https://github.com/ndfred/iperf-ios/issues/17
 * https://github.com/dotnet/runtime/pull/69436 */
#include <TargetConditionals.h> /* TARGET_OS_IPHONE, TARGET_OS_MAC */
#if TARGET_OS_IPHONE            /* iOS, tvOS, or watchOS device */
#undef NETWORK_WRITE_USE_SENDFILE
#undef NETWORK_WRITE_USE_DARWIN_SENDFILE
#endif
#endif

#if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILEV && defined(__sun)
# ifdef NETWORK_WRITE_USE_SENDFILE
#  error "can't have more than one sendfile implementation"
# endif
# define NETWORK_WRITE_USE_SENDFILE "solaris-sendfilev"
# define NETWORK_WRITE_USE_SOLARIS_SENDFILEV
#endif

/* not supported so far
#if defined HAVE_SEND_FILE && defined(__aix)
# ifdef NETWORK_WRITE_USE_SENDFILE
#  error "can't have more than one sendfile implementation"
# endif
# define NETWORK_WRITE_USE_SENDFILE "aix-sendfile"
# define NETWORK_WRITE_USE_AIX_SENDFILE
#endif
*/

#if defined HAVE_SYS_UIO_H && defined HAVE_WRITEV
# define NETWORK_WRITE_USE_WRITEV
#endif
#ifdef _WIN32
# define NETWORK_WRITE_USE_WRITEV
#endif

#if defined(HAVE_MMAP) || defined(_WIN32) /*(see local sys-mmap.h)*/
#ifdef ENABLE_MMAP
# define NETWORK_WRITE_USE_MMAP
#endif
#endif


__attribute_cold__
static int network_write_error(int fd, log_error_st *errh) {
  #ifdef _WIN32
    switch (WSAGetLastError()) {
      case WSAEINTR:
      case WSAEWOULDBLOCK:
        return -3;
      case WSAECONNRESET:
      case WSAETIMEDOUT:
      case WSAECONNABORTED:
        return -2;
      default:
        log_serror(errh, __FILE__, __LINE__, "send() %d", fd);
        return -1;
    }
  #else
    switch (errno) {
      case EAGAIN:
      case EINTR:
        return -3;
      case EPIPE:
      case ECONNRESET:
        return -2;
      default:
        log_perror(errh, __FILE__, __LINE__, "write() %d", fd);
        return -1;
    }
  #endif
}

__attribute_cold__
static int network_remove_finished_chunks(chunkqueue * const cq, const off_t len) {
    force_assert(len >= 0);
    chunkqueue_remove_finished_chunks(cq);
    return 0;
}

inline
static ssize_t network_write_data_len(int fd, const char *data, off_t len) {
  #ifdef _WIN32
    return send(fd, data, len, 0);
  #else
    return write(fd, data, len);
  #endif
}

static int network_write_accounting(const int fd, chunkqueue * const cq, off_t * const p_max_bytes, log_error_st * const errh, const ssize_t wr, const off_t toSend) {
    if (wr >= 0) {
        *p_max_bytes -= wr;/*(toSend > 0 if we reach this func)*/
        const int rc = (wr == toSend && *p_max_bytes > 0) ? 0 : -3;
        chunkqueue_mark_written(cq, wr);
        return rc;
    }
    else
        return network_write_error(fd, errh);
}




/* write next chunk(s); finished chunks are removed afterwards after successful writes.
 * return values: similar as backends (0 success, -1 error, -2 remote close, -3 try again later (EINTR/EAGAIN)) */
/* next chunk must be MEM_CHUNK. use write()/send() */
#if !defined(NETWORK_WRITE_USE_WRITEV)
static int network_write_mem_chunk(const int fd, chunkqueue * const cq, off_t * const p_max_bytes, log_error_st * const errh) {
    chunk* const c = cq->first;
    off_t c_len = (off_t)buffer_clen(c->mem) - c->offset;
    if (c_len > *p_max_bytes) c_len = *p_max_bytes;
    if (c_len <= 0) return network_remove_finished_chunks(cq, c_len);

    ssize_t wr = network_write_data_len(fd, c->mem->ptr + c->offset, c_len);
    return network_write_accounting(fd, cq, p_max_bytes, errh, wr, c_len);
}
#endif




__attribute_noinline__
static int network_write_file_chunk_no_mmap(const int fd, chunkqueue * const cq, off_t * const p_max_bytes, log_error_st * const errh) {
    chunk* const c = cq->first;
    off_t toSend = c->file.length - c->offset;
    char buf[16384]; /* max read 16kb in one step */

    if (toSend > *p_max_bytes) toSend = *p_max_bytes;
    if (toSend <= 0) return network_remove_finished_chunks(cq, toSend);
    if (toSend > (off_t)sizeof(buf)) toSend = (off_t)sizeof(buf);

    if (c->file.fd < 0 && 0 != chunkqueue_open_file_chunk(cq, errh)) return -1;

    toSend = chunk_file_pread(c->file.fd, buf, toSend, c->offset);
    if (toSend <= 0) {
        log_perror(errh, __FILE__, __LINE__, "read");/* err or unexpected EOF */
        return -1;
    }

    ssize_t wr = network_write_data_len(fd, buf, toSend);
    return network_write_accounting(fd, cq, p_max_bytes, errh, wr, toSend);
}




#if defined(NETWORK_WRITE_USE_MMAP)

#include "sys-setjmp.h"

static off_t
network_write_setjmp_write_cb (void *fd, const void *data, off_t len)
{
    return network_write_data_len((int)(uintptr_t)fd, data, len);
}

/* next chunk must be FILE_CHUNK. send mmap()ed file with write() */
static int network_write_file_chunk_mmap(const int fd, chunkqueue * const cq, off_t * const p_max_bytes, log_error_st * const errh) {
    chunk * const restrict c = cq->first;
    const chunk_file_view * const restrict cfv = (!c->file.is_temp)
      ? chunkqueue_chunk_file_view(cq->first, 0, errh)/*use default 512k block*/
      : NULL;
    if (NULL == cfv)
        return network_write_file_chunk_no_mmap(fd, cq, p_max_bytes, errh);

    off_t toSend = c->file.length - c->offset;
    if (toSend > *p_max_bytes) toSend = *p_max_bytes;
    if (toSend <= 0) return network_remove_finished_chunks(cq, toSend);

    const off_t mmap_avail = chunk_file_view_dlen(cfv, c->offset);
    const char * const data = chunk_file_view_dptr(cfv, c->offset);
    if (toSend > mmap_avail) toSend = mmap_avail;
    off_t wr = sys_setjmp_eval3(network_write_setjmp_write_cb,
                                (void *)(uintptr_t)fd, data, toSend);
    return network_write_accounting(fd,cq,p_max_bytes,errh,(ssize_t)wr,toSend);
}

#endif /* NETWORK_WRITE_USE_MMAP */




#if defined(NETWORK_WRITE_USE_WRITEV)

#if defined(HAVE_SYS_UIO_H)
# include <sys/uio.h>
#endif

#if defined(UIO_MAXIOV)
# define SYS_MAX_CHUNKS UIO_MAXIOV
#elif defined(IOV_MAX)
/* new name for UIO_MAXIOV since IEEE Std 1003.1-2001 */
# define SYS_MAX_CHUNKS IOV_MAX
#elif defined(_XOPEN_IOV_MAX)
/* minimum value for sysconf(_SC_IOV_MAX); posix requires this to be at least 16, which is good enough - no need to call sysconf() */
# define SYS_MAX_CHUNKS _XOPEN_IOV_MAX
#elif defined(_WIN32)
# define SYS_MAX_CHUNKS 32
#else
# error neither UIO_MAXIOV nor IOV_MAX nor _XOPEN_IOV_MAX are defined
#endif

/* allocate iovec[MAX_CHUNKS] on stack, so pick a sane limit:
 * - each entry will use 1 pointer + 1 size_t
 * - 32 chunks -> 256 / 512 bytes (32-bit/64-bit pointers)
 */
#define STACK_MAX_ALLOC_CHUNKS 32
#if SYS_MAX_CHUNKS > STACK_MAX_ALLOC_CHUNKS
# define MAX_CHUNKS STACK_MAX_ALLOC_CHUNKS
#else
# define MAX_CHUNKS SYS_MAX_CHUNKS
#endif

#ifdef _WIN32
/* rewrite iov to WSABUF */
#define iovec _WSABUF
#define iov_len len
#define iov_base buf
#endif

/* next chunk must be MEM_CHUNK. send multiple mem chunks using writev() */
static int network_writev_mem_chunks(const int fd, chunkqueue * const cq, off_t * const p_max_bytes, log_error_st * const errh) {
    size_t num_chunks = 0;
    off_t toSend = 0;
    struct iovec chunks[MAX_CHUNKS];

    for (const chunk *c = cq->first; c && MEM_CHUNK == c->type; c = c->next) {
        const off_t c_len = (off_t)buffer_clen(c->mem) - c->offset;
        if (c_len > 0) {
            toSend += c_len;

            chunks[num_chunks].iov_base = c->mem->ptr + c->offset;
            chunks[num_chunks].iov_len = (size_t)c_len;

            if (++num_chunks == MAX_CHUNKS || toSend >= *p_max_bytes) break;
        }
        else if (c_len < 0) /*(should not happen; trigger assert)*/
            return network_remove_finished_chunks(cq, c_len);
    }
    if (0 == num_chunks) return network_remove_finished_chunks(cq, 0);

  #ifdef _WIN32
    DWORD dw;
    ssize_t wr = WSASend(fd, chunks, num_chunks, &dw, 0, NULL, NULL);
    if (0 == wr) wr = (ssize_t)dw;
  #else
    ssize_t wr = writev(fd, chunks, num_chunks);
  #endif
    return network_write_accounting(fd, cq, p_max_bytes, errh, wr, toSend);
}

#endif /* NETWORK_WRITE_USE_WRITEV */




#if defined(NETWORK_WRITE_USE_SENDFILE)

#if defined(NETWORK_WRITE_USE_LINUX_SENDFILE) \
 || defined(NETWORK_WRITE_USE_SOLARIS_SENDFILEV)
#include <sys/sendfile.h>
#endif

#if defined(NETWORK_WRITE_USE_FREEBSD_SENDFILE) \
 || defined(NETWORK_WRITE_USE_DARWIN_SENDFILE)
#include <sys/uio.h>
#endif

static int network_write_file_chunk_sendfile(const int fd, chunkqueue * const cq, off_t * const p_max_bytes, log_error_st * const errh) {
    chunk * const c = cq->first;
    ssize_t wr;
    off_t offset;
    off_t toSend;
    off_t written = 0;

    offset = c->offset;
    toSend = c->file.length - c->offset;
    if (toSend > *p_max_bytes) toSend = *p_max_bytes;
    if (toSend <= 0) return network_remove_finished_chunks(cq, toSend);

    if (c->file.fd < 0 && 0 != chunkqueue_open_file_chunk(cq, errh)) return -1;

    /* Darwin, FreeBSD, and Solaris variants support iovecs and could
     * be optimized to send more than just file in single syscall */

  #if defined(NETWORK_WRITE_USE_LINUX_SENDFILE)

    wr = sendfile(fd, c->file.fd, &offset, toSend);
    if (wr > 0) written = (off_t)wr;

  #elif defined(NETWORK_WRITE_USE_DARWIN_SENDFILE)

    written = toSend;
    wr = sendfile(c->file.fd, fd, offset, &written, NULL, 0);
    /* (for EAGAIN/EINTR written still contains the sent bytes) */

  #elif defined(NETWORK_WRITE_USE_FREEBSD_SENDFILE)

    wr = sendfile(c->file.fd, fd, offset, toSend, NULL, &written, 0);
    /* (for EAGAIN/EINTR written still contains the sent bytes) */

  #elif defined(NETWORK_WRITE_USE_SOLARIS_SENDFILEV)
    {
        sendfilevec_t fvec;
        fvec.sfv_fd = c->file.fd;
        fvec.sfv_flag = 0;
        fvec.sfv_off = offset;
        fvec.sfv_len = toSend;

        /* Solaris sendfilev() */
        wr = sendfilev(fd, &fvec, 1, (size_t *)&written);
        /* (for EAGAIN/EINTR written still contains the sent bytes) */
    }
  #else

    wr = -1;
    errno = ENOSYS;

  #endif

    if (-1 == wr) {
        switch(errno) {
          case EAGAIN:
          case EINTR:
            break; /* try again later */
          case EPIPE:
          case ECONNRESET:
          case ENOTCONN:
            return -2;
          case EINVAL:
          case ENOSYS:
         #if defined(ENOTSUP) && (!defined(EOPNOTSUPP) || EOPNOTSUPP != ENOTSUP)
          case ENOTSUP:
         #endif
         #ifdef EOPNOTSUPP
          case EOPNOTSUPP:
         #endif
         #ifdef ESOCKTNOSUPPORT
          case ESOCKTNOSUPPORT:
         #endif
         #ifdef EAFNOSUPPORT
          case EAFNOSUPPORT:
         #endif
           #ifdef NETWORK_WRITE_USE_MMAP
            return network_write_file_chunk_mmap(fd, cq, p_max_bytes, errh);
           #else
            return network_write_file_chunk_no_mmap(fd, cq, p_max_bytes, errh);
           #endif
          default:
            log_perror(errh, __FILE__, __LINE__, "sendfile(): fd: %d", fd);
            return -1;
        }
    }

    if (written > 0) {
        chunkqueue_mark_written(cq, written);
        *p_max_bytes -= written;
        if (__builtin_expect( (*p_max_bytes <= 0), 0)) return -3;
    }
    else if (0 == wr) { /*(-1 != wr && 0 == written)*/
        log_error(errh, __FILE__, __LINE__,
                  "sendfile(): fd: %d file truncated", fd);
        return -1;
    }

    return (wr >= 0 && written == toSend) ? 0 : -3;
}

#endif




/* return values:
 * >= 0 : no error
 *   -1 : error (on our side)
 *   -2 : remote close
 */

static int network_write_chunkqueue_writev(const int fd, chunkqueue * const cq, off_t max_bytes, log_error_st * const errh) {
    while (NULL != cq->first) {
        int rc = -1;

        switch (cq->first->type) {
        case MEM_CHUNK:
          #if defined(NETWORK_WRITE_USE_WRITEV)
            rc = network_writev_mem_chunks(fd, cq, &max_bytes, errh);
          #else
            rc = network_write_mem_chunk(fd, cq, &max_bytes, errh);
          #endif
            break;
        case FILE_CHUNK:
          #ifdef NETWORK_WRITE_USE_MMAP
            rc = network_write_file_chunk_mmap(fd, cq, &max_bytes, errh);
          #else
            rc = network_write_file_chunk_no_mmap(fd, cq, &max_bytes, errh);
          #endif
            break;
        }

        if (__builtin_expect( (0 != rc), 0)) return (-3 == rc) ? 0 : rc;
    }

    return 0;
}

#if defined(NETWORK_WRITE_USE_SENDFILE)
static int network_write_chunkqueue_sendfile(const int fd, chunkqueue * const cq, off_t max_bytes, log_error_st * const errh) {
    while (NULL != cq->first) {
        int rc = -1;

        switch (cq->first->type) {
        case MEM_CHUNK:
          #if defined(NETWORK_WRITE_USE_WRITEV)
            rc = network_writev_mem_chunks(fd, cq, &max_bytes, errh);
          #else
            rc = network_write_mem_chunk(fd, cq, &max_bytes, errh);
          #endif
            break;
        case FILE_CHUNK:
          #if defined(NETWORK_WRITE_USE_SENDFILE)
            rc = network_write_file_chunk_sendfile(fd, cq, &max_bytes, errh);
          #elif defined(NETWORK_WRITE_USE_MMAP)
            rc = network_write_file_chunk_mmap(fd, cq, &max_bytes, errh);
          #else
            rc = network_write_file_chunk_no_mmap(fd, cq, &max_bytes, errh);
          #endif
            break;
        }

        if (__builtin_expect( (0 != rc), 0)) return (-3 == rc) ? 0 : rc;
    }

    return 0;
}
#endif

int network_write_init(server *srv) {
    typedef enum {
        NETWORK_BACKEND_UNSET,
        NETWORK_BACKEND_WRITE,
        NETWORK_BACKEND_WRITEV,
        NETWORK_BACKEND_SENDFILE,
    } network_backend_t;

    network_backend_t backend;

    struct nb_map {
        network_backend_t nb;
        const char *name;
    } network_backends[] = {
        /* lowest id wins */
        { NETWORK_BACKEND_SENDFILE, "sendfile" },
        { NETWORK_BACKEND_SENDFILE, "linux-sendfile" },
        { NETWORK_BACKEND_SENDFILE, "freebsd-sendfile" },
        { NETWORK_BACKEND_SENDFILE, "solaris-sendfilev" },
        { NETWORK_BACKEND_WRITEV,   "writev" },
        { NETWORK_BACKEND_WRITE,    "write" },
        { NETWORK_BACKEND_UNSET,    NULL }
    };

    /* get a useful default */
    backend = network_backends[0].nb;

    /* match name against known types */
    if (srv->srvconf.network_backend) {
        const char *name, *confname = srv->srvconf.network_backend->ptr;
        for (size_t i = 0; NULL != (name = network_backends[i].name); ++i) {
            if (0 == strcmp(confname, name)) {
                backend = network_backends[i].nb;
                break;
            }
        }
        if (NULL == name) {
            log_error(srv->errh, __FILE__, __LINE__,
              "server.network-backend has an unknown value: %s", confname);
            return -1;
        }
    }

    switch(backend) {
    case NETWORK_BACKEND_SENDFILE:
      #if defined(NETWORK_WRITE_USE_SENDFILE)
        srv->network_backend_write = network_write_chunkqueue_sendfile;
        break;
      #endif
    case NETWORK_BACKEND_WRITEV:
    case NETWORK_BACKEND_WRITE:
        srv->network_backend_write = network_write_chunkqueue_writev;
        break;
    default:
        return -1;
    }

    return 0;
}

const char * network_write_show_handlers(void) {
    return
      "\nNetwork handler:\n\n"
     #if defined NETWORK_WRITE_USE_LINUX_SENDFILE
      "\t+ linux-sendfile\n"
     #else
      "\t- linux-sendfile\n"
     #endif
     #if defined NETWORK_WRITE_USE_FREEBSD_SENDFILE
      "\t+ freebsd-sendfile\n"
     #else
      "\t- freebsd-sendfile\n"
     #endif
     #if defined NETWORK_WRITE_USE_DARWIN_SENDFILE
      "\t+ darwin-sendfile\n"
     #else
      "\t- darwin-sendfile\n"
     #endif
     #if defined NETWORK_WRITE_USE_SOLARIS_SENDFILEV
      "\t+ solaris-sendfilev\n"
     #else
      "\t- solaris-sendfilev\n"
     #endif
     #if defined NETWORK_WRITE_USE_WRITEV
      "\t+ writev\n"
     #else
      "\t- writev\n"
     #endif
      "\t+ write\n"
     #ifdef NETWORK_WRITE_USE_MMAP
      "\t+ mmap support\n"
     #else
      "\t- mmap support\n"
     #endif
      ;
}